📝 Renaming and Sorting Data

Clear column names and well-organized data make analysis much easier! Renaming columns helps you work with descriptive names instead of cryptic abbreviations, while sorting helps you find patterns and organize your data logically.

Think of it like organizing your desk - everything has a clear label and is arranged in a way that makes sense.

import pandas as pd

# Messy column names from a CSV export
data = pd.DataFrame({
    'nm': ['Alice', 'Charlie', 'Bob'],
    'age_yrs': [25, 35, 30],
    'sal_amt': [50000, 70000, 60000],
    'dept_cd': ['SLS', 'IT', 'HR']
})

print("Original data with messy names:")
print(data)
print()

# Rename columns to be clear
data = data.rename(columns={
    'nm': 'name',
    'age_yrs': 'age',
    'sal_amt': 'salary',
    'dept_cd': 'department'
})

# Sort by salary (highest first)
data = data.sort_values('salary', ascending=False)

print("After renaming and sorting:")
print(data)

📝 Renaming Columns

Good column names make your code much more readable and maintainable:

Basic Column Renaming

import pandas as pd

# Data with unclear column names
products = pd.DataFrame({
    'prod_id': [1, 2, 3],
    'nm': ['Laptop', 'Mouse', 'Keyboard'],
    'pr': [999, 25, 75],
    'qty': [10, 50, 25]
})

print("Before renaming:")
print(products)
print()

# Rename specific columns
products = products.rename(columns={
    'prod_id': 'product_id',
    'nm': 'product_name',
    'pr': 'price',
    'qty': 'quantity'
})

print("After renaming:")
print(products)

Bulk Column Renaming

import pandas as pd

# Survey data with column codes
survey = pd.DataFrame({
    'Q1': [4, 5, 3],
    'Q2': [3, 4, 5],
    'Q3': [5, 5, 4],
    'age': [25, 30, 35]
})

print("Survey with question codes:")
print(survey)
print()

# Create a mapping for clearer names
question_map = {
    'Q1': 'satisfaction',
    'Q2': 'recommendation',
    'Q3': 'ease_of_use'
}

# Rename multiple columns at once
survey = survey.rename(columns=question_map)

print("With descriptive names:")
print(survey)

Column Name Cleaning

import pandas as pd

# Data imported from Excel with messy names
sales = pd.DataFrame({
    'Customer Name ': ['Alice', 'Bob', 'Charlie'],
    'Total Sales ($)': [1500, 2300, 1800],
    'Region/Territory': ['North', 'South', 'East']
})

print("Messy column names:")
print(list(sales.columns))
print()

# Clean up column names
sales.columns = sales.columns.str.strip()  # Remove spaces
sales.columns = sales.columns.str.lower()  # Make lowercase
sales.columns = sales.columns.str.replace(' ', '_')  # Replace spaces with underscore
sales.columns = sales.columns.str.replace('($)', '')  # Remove special characters
sales.columns = sales.columns.str.replace('/', '_')  # Replace slash with underscore

print("Cleaned column names:")
print(list(sales.columns))
print()
print(sales)

📊 Sorting Data

Sorting helps you organize data to find patterns and insights:

Basic Sorting

import pandas as pd

# Student grades
students = pd.DataFrame({
    'name': ['Alice', 'Charlie', 'Bob', 'Diana'],
    'grade': [85, 92, 78, 88],
    'subject': ['Math', 'Science', 'Math', 'Science']
})

print("Original order:")
print(students)
print()

# Sort by grade (highest first)
students_by_grade = students.sort_values('grade', ascending=False)
print("Sorted by grade (highest first):")
print(students_by_grade)
print()

# Sort alphabetically by name
students_by_name = students.sort_values('name')
print("Sorted by name (alphabetically):")
print(students_by_name)

Multi-Column Sorting

import pandas as pd

# Employee data
employees = pd.DataFrame({
    'department': ['Sales', 'IT', 'Sales', 'IT', 'HR'],
    'name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
    'salary': [50000, 75000, 52000, 80000, 48000]
})

print("Employee data:")
print(employees)
print()

# Sort by department first, then by salary within each department
employees_sorted = employees.sort_values(['department', 'salary'], ascending=[True, False])
print("Sorted by department, then salary (high to low):")
print(employees_sorted)

Index Sorting

import pandas as pd

# Data with mixed-up index
data = pd.DataFrame({
    'product': ['Keyboard', 'Mouse', 'Laptop'],
    'price': [75, 25, 999]
}, index=[3, 1, 2])

print("Mixed-up index:")
print(data)
print()

# Sort by index
data_sorted = data.sort_index()
print("Sorted by index:")
print(data_sorted)

🔄 Combining Renaming and Sorting

Real-world workflow often involves both operations:

import pandas as pd

# Raw sales data export
raw_data = pd.DataFrame({
    'cust_nm': ['Alice Johnson', 'Bob Smith', 'Charlie Brown'],
    'ord_dt': ['2023-01-15', '2023-01-10', '2023-01-20'],
    'ord_amt': [299.99, 149.50, 89.99],
    'prod_cat': ['Electronics', 'Books', 'Electronics']
})

print("Raw exported data:")
print(raw_data)
print()

# Step 1: Rename columns for clarity
raw_data = raw_data.rename(columns={
    'cust_nm': 'customer_name',
    'ord_dt': 'order_date',
    'ord_amt': 'order_amount',
    'prod_cat': 'product_category'
})

# Step 2: Convert date column to datetime for proper sorting
raw_data['order_date'] = pd.to_datetime(raw_data['order_date'])

# Step 3: Sort by order date (most recent first)
clean_data = raw_data.sort_values('order_date', ascending=False)

print("After renaming and sorting:")
print(clean_data)

🎯 Advanced Sorting Techniques

Custom Sorting

import pandas as pd

# Data with custom category order
feedback = pd.DataFrame({
    'customer': ['Alice', 'Bob', 'Charlie', 'Diana'],
    'rating': ['Good', 'Excellent', 'Poor', 'Good'],
    'score': [4, 5, 2, 4]
})

print("Feedback data:")
print(feedback)
print()

# Define custom order for ratings
rating_order = ['Poor', 'Good', 'Excellent']

# Convert to categorical with custom order
feedback['rating'] = pd.Categorical(
    feedback['rating'], 
    categories=rating_order, 
    ordered=True
)

# Sort by custom rating order
feedback_sorted = feedback.sort_values('rating')
print("Sorted by custom rating order:")
print(feedback_sorted)

📋 Best Practices for Organization

📝 Practice: Customer Data Organization

Let's practice with a complete example:

import pandas as pd

# Messy customer data
customers = pd.DataFrame({
    'cust_id': [102, 101, 103, 105, 104],
    'f_name': ['john', 'ALICE', 'bob', 'DIANA', 'charlie'],
    'l_name': ['smith', 'JOHNSON', 'brown', 'WILSON', 'davis'],
    'purch_amt': [250.00, 1200.50, 75.25, 890.00, 450.75],
    'signup_dt': ['2023-03-15', '2023-01-10', '2023-02-28', '2023-01-05', '2023-02-14']
})

print("Messy customer data:")
print(customers)
print()

# Step 1: Rename columns
customers = customers.rename(columns={
    'cust_id': 'customer_id',
    'f_name': 'first_name',
    'l_name': 'last_name',
    'purch_amt': 'purchase_amount',
    'signup_dt': 'signup_date'
})

# Step 2: Clean up text data
customers['first_name'] = customers['first_name'].str.title()
customers['last_name'] = customers['last_name'].str.title()

# Step 3: Convert date and sort
customers['signup_date'] = pd.to_datetime(customers['signup_date'])
customers = customers.sort_values(['signup_date', 'purchase_amount'], ascending=[True, False])

# Step 4: Reset index for clean display
customers = customers.reset_index(drop=True)

print("Organized customer data:")
print(customers)

🎯 Key Takeaways

🚀 What's Next?

Perfect! You now know how to organize your data with clear names and logical ordering. Next, let's explore grouping and aggregation - powerful techniques for summarizing data.

Continue to: GroupBy Operations

Keep organizing! 📝✨

Online Python

📝 Renaming and Sorting Data

Track Your Learning Progress