🔍 Using loc and iloc
loc
and iloc
are pandas' precision tools for selecting data. Think of them as the difference between finding a house by its address (loc) versus finding it by "the 3rd house on the left" (iloc). Both get you there, but they use different approaches!
🏠 Understanding iloc (Position-Based)
iloc
uses positions like counting: 0, 1, 2, 3... It's like finding seats in a theater by row and seat number.
import pandas as pd
# Student grades
grades = pd.DataFrame({
'name': ['Alice', 'Bob', 'Charlie', 'Diana'],
'math': [85, 92, 78, 95],
'science': [90, 88, 85, 92],
'english': [88, 85, 90, 89]
})
print("Student Grades:")
print(grades)
print()
print("🔍 iloc Examples (Position-based):")
print()
# Single cell - row 0, column 1
print("1️⃣ First student's math grade:")
print(grades.iloc[0, 1]) # Row 0, Column 1 (math)
print()
# Single row - all columns
print("2️⃣ Bob's all grades (row 1):")
print(grades.iloc[1])
print()
# Multiple rows and columns
print("3️⃣ First 2 students, math and science:")
print(grades.iloc[0:2, 1:3])
print()
# Specific positions
print("4️⃣ Alice and Charlie's names:")
print(grades.iloc[[0, 2], 0]) # Rows 0,2, Column 0
🏷️ Understanding loc (Label-Based)
loc
uses actual names/labels. It's like finding a person by their name rather than their position in line.
import pandas as pd
# Product inventory
inventory = pd.DataFrame({
'product': ['Laptop', 'Mouse', 'Keyboard', 'Monitor'],
'stock': [15, 50, 25, 8],
'price': [999, 25, 75, 300],
'category': ['Computer', 'Accessory', 'Accessory', 'Computer']
})
print("Product Inventory:")
print(inventory)
print()
print("🏷️ loc Examples (Label-based):")
print()
# Single cell - row 0, column 'product'
print("1️⃣ First product name:")
print(inventory.loc[0, 'product'])
print()
# Single row - all columns
print("2️⃣ Mouse details (row 1):")
print(inventory.loc[1])
print()
# Multiple rows and specific columns
print("3️⃣ First 2 products, name and price:")
print(inventory.loc[0:1, ['product', 'price']])
print()
# Range of columns
print("4️⃣ All products, stock to price:")
print(inventory.loc[:, 'stock':'price'])
🎯 When to Use loc vs iloc
Different situations call for different tools:
import pandas as pd
# Sales data
sales = pd.DataFrame({
'month': ['Jan', 'Feb', 'Mar', 'Apr'],
'revenue': [10000, 12000, 15000, 11000],
'expenses': [7000, 8000, 9000, 7500],
'profit': [3000, 4000, 6000, 3500]
})
print("Sales Data:")
print(sales)
print()
print("🎯 When to Use Each:")
print()
print("✅ Use iloc when you know positions:")
print("Last 2 months (positions -2 to end):")
print(sales.iloc[-2:])
print()
print("✅ Use loc when you know names:")
print("Revenue and profit columns:")
print(sales.loc[:, ['revenue', 'profit']])
print()
print("✅ Use iloc for consistent patterns:")
print("Every other row (0, 2):")
print(sales.iloc[::2])
print()
print("✅ Use loc for meaningful selection:")
print("Q1 data (first 3 months), financial columns:")
print(sales.loc[0:2, 'revenue':'profit'])
📊 Selecting Ranges
Both loc and iloc can select ranges, but they work differently:
import pandas as pd
# Employee data
employees = pd.DataFrame({
'name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
'age': [25, 30, 35, 28, 32],
'department': ['Sales', 'IT', 'HR', 'Sales', 'IT'],
'salary': [50000, 75000, 60000, 52000, 78000]
})
print("Employee Data:")
print(employees)
print()
print("📊 Range Selection:")
print()
print("iloc ranges (position-based):")
print("Rows 1-3, columns 1-2:")
print(employees.iloc[1:4, 1:3]) # Excludes end position
print()
print("loc ranges (label-based):")
print("Rows 1-3, age to department:")
print(employees.loc[1:3, 'age':'department']) # Includes end label
print()
print("💡 Key Difference:")
print("iloc[1:4] = positions 1, 2, 3 (excludes 4)")
print("loc[1:3] = indices 1, 2, 3 (includes 3)")
🎯 Practical Selection Examples
Real-world scenarios where loc and iloc shine:
import pandas as pd
# Survey responses
survey = pd.DataFrame({
'respondent_id': [101, 102, 103, 104, 105],
'age_group': ['18-25', '26-35', '36-45', '26-35', '18-25'],
'satisfaction': [4, 5, 3, 4, 5],
'recommend': [True, True, False, True, True],
'comments': ['Good', 'Excellent', 'Average', 'Great', 'Perfect']
})
print("Survey Data:")
print(survey)
print()
print("🎯 Practical Examples:")
print()
print("1️⃣ Get specific respondent's feedback:")
respondent = survey.loc[2] # Respondent at index 2
print(f"Respondent {respondent['respondent_id']}: {respondent['comments']}")
print()
print("2️⃣ Get satisfaction scores only:")
satisfaction = survey.loc[:, 'satisfaction']
print(f"Satisfaction scores: {list(satisfaction)}")
print(f"Average satisfaction: {satisfaction.mean():.1f}")
print()
print("3️⃣ Get first and last 2 responses:")
first_last = survey.iloc[[0, 1, -2, -1]]
print("First and last 2 responses:")
print(first_last[['respondent_id', 'satisfaction']])
print()
print("4️⃣ Get core metrics (columns 2-3):")
metrics = survey.iloc[:, 2:4]
print("Core satisfaction metrics:")
print(metrics)
🔧 Advanced Selection Techniques
Combine conditions and selections for powerful data access:
import pandas as pd
# Student performance
students = pd.DataFrame({
'name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
'grade': ['A', 'B', 'A', 'C', 'B'],
'score': [95, 82, 88, 76, 85],
'attendance': [98, 92, 95, 85, 90]
})
print("Student Performance:")
print(students)
print()
print("🔧 Advanced Techniques:")
print()
print("1️⃣ High performers (score > 85) - names only:")
high_performers = students.loc[students['score'] > 85, 'name']
print(list(high_performers))
print()
print("2️⃣ Grade A students - all info:")
grade_a = students.loc[students['grade'] == 'A']
print(grade_a)
print()
print("3️⃣ Good attendance (>90) - names and scores:")
good_attendance = students.loc[
students['attendance'] > 90,
['name', 'score']
]
print(good_attendance)
print()
print("4️⃣ Multiple conditions - A grade OR score > 85:")
top_students = students.loc[
(students['grade'] == 'A') | (students['score'] > 85)
]
print(top_students[['name', 'grade', 'score']])
📋 loc and iloc Reference Guide
Task | loc (Label-based) | iloc (Position-based) |
---|---|---|
Single cell | df.loc[0, 'name'] | df.iloc[0, 1] |
Single row | df.loc[0] | df.iloc[0] |
Single column | df.loc[:, 'name'] | df.iloc[:, 1] |
Row range | df.loc[0:2] | df.iloc[0:3] |
Column range | df.loc[:, 'name':'age'] | df.iloc[:, 1:3] |
Specific rows/cols | df.loc[[0,2], ['name','age']] | df.iloc[[0,2], [1,3]] |
Last row | df.loc[df.index[-1]] | df.iloc[-1] |
Conditional | df.loc[df['age'] > 30] | Not applicable |
⚠️ Common Mistakes
Avoid these loc/iloc pitfalls that can cause errors:
import pandas as pd
# Sample data for demonstrating correct usage
data = pd.DataFrame({
'name': ['Alice', 'Bob', 'Charlie'],
'age': [25, 30, 35],
'city': ['NYC', 'LA', 'Chicago']
})
print("✅ Correct loc and iloc Usage:")
print()
print("Using loc with labels:")
result_loc = data.loc[0, 'name']
print(f"data.loc[0, 'name'] = {result_loc}")
print()
print("Using iloc with positions:")
result_iloc = data.iloc[0, 0] # Row 0, Column 0
print(f"data.iloc[0, 0] = {result_iloc}")
print()
print("Complex selection with loc:")
filtered = data.loc[data['age'] > 25, ['name', 'city']]
print("People over 25 - names and cities:")
print(filtered)
print()
print("Position-based selection with iloc:")
first_two_rows = data.iloc[0:2, 1:3] # First 2 rows, columns 1-2
print("First 2 rows, age and city columns:")
print(first_two_rows)
🎯 Key Takeaways
🎮 Practice Challenge
Test your loc/iloc skills:
import pandas as pd
# Customer data
customers = pd.DataFrame({
'customer_id': [1001, 1002, 1003, 1004, 1005],
'name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
'age': [25, 35, 30, 28, 45],
'city': ['NYC', 'LA', 'Chicago', 'Boston', 'Miami'],
'total_orders': [5, 12, 8, 15, 3]
})
print("Customer Dataset:")
print(customers)
print()
print("🎮 Practice Challenges:")
print()
print("1️⃣ Get Bob's age using loc:")
bob_age = customers.loc[customers['name'] == 'Bob', 'age'].iloc[0]
print(f"Bob's age: {bob_age}")
print()
print("2️⃣ Get last 2 customers using iloc:")
last_two = customers.iloc[-2:]
print("Last 2 customers:")
print(last_two[['name', 'city']])
print()
print("3️⃣ High-value customers (>10 orders) - names only:")
high_value = customers.loc[customers['total_orders'] > 10, 'name']
print("High-value customers:", list(high_value))
🚀 What's Next?
Excellent! You now have precise control over data selection. Next, let's learn about filtering data using conditions and boolean logic.
Continue to: Boolean and Conditional Selection
You're becoming a data selection expert! 🔍🎯
Was this helpful?
Track Your Learning Progress
Sign in to bookmark tutorials and keep track of your learning journey.
Your progress is saved automatically as you read.