🔢 Sorting Arrays
Sorting is essential for organizing data and finding patterns! NumPy provides powerful sorting functions that help you arrange data in meaningful order, find rankings, and identify top performers efficiently.
import numpy as np
# Sorting arrays overview
test_scores = np.array([85, 92, 78, 96, 89, 74, 88, 93, 82, 90])
students = np.array(['Alice', 'Bob', 'Carol', 'David', 'Eve',
'Frank', 'Grace', 'Henry', 'Iris', 'Jack'])
# Sort scores
sorted_scores = np.sort(test_scores)
print(f"Original scores: {test_scores}")
print(f"Sorted scores: {sorted_scores}")
# Find top 3 students
top_3_indices = np.argsort(test_scores)[-3:][::-1]
print(f"Top 3 students: {students[top_3_indices]}")
print(f"Their scores: {test_scores[top_3_indices]}")
🔄 Basic Sorting
Sort arrays in ascending and descending order.
Simple Sorting
import numpy as np
# Product ratings
ratings = np.array([4.2, 3.8, 4.7, 3.5, 4.9, 4.1, 3.9, 4.5])
# Sort in ascending order
sorted_asc = np.sort(ratings)
print(f"Original: {ratings}")
print(f"Ascending: {sorted_asc}")
# Sort in descending order
sorted_desc = np.sort(ratings)[::-1] # Reverse the sorted array
print(f"Descending: {sorted_desc}")
# Quick insights
print(f"Best rating: {sorted_desc[0]}")
print(f"Worst rating: {sorted_asc[0]}")
print(f"Median rating: {sorted_asc[len(sorted_asc)//2]}")
In-place Sorting
import numpy as np
# Response times (modifiable copy)
response_times = np.array([245, 123, 456, 189, 334, 267, 198])
original = response_times.copy()
print(f"Before sorting: {response_times}")
# Sort in-place (modifies original array)
response_times.sort()
print(f"After sorting: {response_times}")
# Performance analysis
fastest_3 = response_times[:3]
slowest_3 = response_times[-3:]
print(f"Fastest 3: {fastest_3}ms")
print(f"Slowest 3: {slowest_3}ms")
📍 Finding Sort Positions
Use argsort()
to find the indices that would sort an array.
Basic argsort
import numpy as np
# Sales performance
sales = np.array([45000, 52000, 38000, 61000, 48000])
salespeople = np.array(['Alice', 'Bob', 'Carol', 'David', 'Eve'])
# Get sorting indices
sort_indices = np.argsort(sales) # Ascending order
print(f"Sales: {sales}")
print(f"Sort indices: {sort_indices}")
# Use indices to sort related arrays
print(f"Ranked by performance (worst to best):")
for i, idx in enumerate(sort_indices):
print(f" {i+1}. {salespeople[idx]}: ${sales[idx]:,}")
Top Performers
import numpy as np
# Employee ratings
ratings = np.array([8.5, 7.2, 9.1, 6.8, 8.9, 7.5, 9.3, 8.1])
employees = np.array(['John', 'Sarah', 'Mike', 'Lisa', 'Tom',
'Anna', 'Chris', 'Emma'])
# Find top 3 performers
top_3_indices = np.argsort(ratings)[-3:][::-1] # Last 3, reversed
print(f"🏆 Top 3 Performers:")
for i, idx in enumerate(top_3_indices):
print(f" {i+1}. {employees[idx]}: {ratings[idx]}/10")
# Find bottom 2 performers
bottom_2_indices = np.argsort(ratings)[:2]
print(f"\n📉 Need Improvement:")
for i, idx in enumerate(bottom_2_indices):
print(f" {employees[idx]}: {ratings[idx]}/10")
Multi-criteria Sorting
import numpy as np
# Student data: [score, attendance_percentage]
student_data = np.array([[85, 95], [92, 88], [78, 92], [96, 85], [89, 98]])
names = np.array(['Alice', 'Bob', 'Carol', 'David', 'Eve'])
# Sort by score first, then by attendance
scores = student_data[:, 0]
attendance = student_data[:, 1]
# Primary sort by scores
score_order = np.argsort(scores)[::-1] # Descending
print(f"Ranked by test scores:")
for i, idx in enumerate(score_order):
print(f" {i+1}. {names[idx]}: {scores[idx]}% (attendance: {attendance[idx]}%)")
# Find students with both high scores and attendance
good_students = (scores >= 85) & (attendance >= 90)
print(f"\nExcellent students (85%+ score, 90%+ attendance): {names[good_students]}")
🎯 Multi-dimensional Sorting
Sort along different axes of multi-dimensional arrays.
Row and Column Sorting
import numpy as np
# Quarterly sales: 4 products × 4 quarters
sales_matrix = np.array([[120, 135, 145, 160], # Product A
[98, 112, 125, 140], # Product B
[156, 167, 175, 185], # Product C
[89, 95, 105, 125]]) # Product D
products = ['Product A', 'Product B', 'Product C', 'Product D']
quarters = ['Q1', 'Q2', 'Q3', 'Q4']
print(f"Original sales matrix: \n{sales_matrix}")
# Sort each row (quarters for each product)
sorted_by_quarters = np.sort(sales_matrix, axis=1)
print(f"Each product's quarters sorted: \n{sorted_by_quarters}")
# Sort each column (products for each quarter)
sorted_by_products = np.sort(sales_matrix, axis=0)
print(f"Each quarter's products sorted: \n{sorted_by_products}")
Ranking Products
import numpy as np
sales_matrix = np.array([[120, 135, 145, 160],
[98, 112, 125, 140],
[156, 167, 175, 185],
[89, 95, 105, 125]])
products = ['Product A', 'Product B', 'Product C', 'Product D']
# Calculate total sales per product
product_totals = np.sum(sales_matrix, axis=1)
# Rank products by total sales
ranking_indices = np.argsort(product_totals)[::-1]
print(f"🏆 Product Rankings by Total Sales:")
for i, idx in enumerate(ranking_indices):
print(f" {i+1}. {products[idx]}: ${product_totals[idx]:,}")
# Find most consistent product (lowest standard deviation)
product_consistency = np.std(sales_matrix, axis=1)
most_consistent_idx = np.argmin(product_consistency)
print(f"\n📊 Most Consistent: {products[most_consistent_idx]} (std: {product_consistency[most_consistent_idx]:.1f})")
⚡ Partial Sorting
Use partition for efficient partial sorting when you only need top/bottom N elements.
Partition for Top N
import numpy as np
# Large dataset - website page views
page_views = np.array([1200, 3500, 890, 4200, 1800, 2900, 650,
3800, 1500, 2200, 5100, 1100, 2800, 4500])
pages = [f"Page-{i+1}" for i in range(len(page_views))]
# Find top 5 pages efficiently using partition
k = 5
partition_idx = len(page_views) - k
partitioned = np.argpartition(page_views, partition_idx)
# Get top 5 indices and sort them
top_5_indices = partitioned[-k:]
top_5_sorted = top_5_indices[np.argsort(page_views[top_5_indices])[::-1]]
print(f"🔥 Top 5 Most Viewed Pages:")
for i, idx in enumerate(top_5_sorted):
print(f" {i+1}. {pages[idx]}: {page_views[idx]:,} views")
# Efficiency note: partition is faster than full sort for large arrays
total_top_5_views = np.sum(page_views[top_5_sorted])
print(f"\nTop 5 pages account for {total_top_5_views:,} total views")
Finding Percentiles
import numpy as np
# Salary data
salaries = np.array([45000, 52000, 38000, 65000, 48000, 72000,
42000, 58000, 61000, 55000, 68000, 49000])
# Sort salaries
sorted_salaries = np.sort(salaries)
# Calculate percentiles
n = len(sorted_salaries)
percentiles = [25, 50, 75, 90]
print(f"💰 Salary Analysis:")
print(f"Total employees: {n}")
print(f"Salary range: ${sorted_salaries[0]:,} - ${sorted_salaries[-1]:,}")
for p in percentiles:
idx = int(n * p / 100)
if idx >= n:
idx = n - 1
print(f"{p}th percentile: ${sorted_salaries[idx]:,}")
# Find employees above 75th percentile
p75_idx = int(n * 0.75)
high_earners = sorted_salaries[p75_idx:]
print(f"High earners (top 25%): {len(high_earners)} people")
🧠 Real-World Applications
Customer Analytics
import numpy as np
# Customer data: [purchase_amount, days_since_last_purchase]
customer_data = np.array([[250, 5], [180, 12], [420, 3], [95, 25],
[310, 8], [75, 45], [380, 6], [150, 18]])
customer_ids = [f"C{1000+i}" for i in range(len(customer_data))]
purchase_amounts = customer_data[:, 0]
days_since_purchase = customer_data[:, 1]
# Find high-value customers
high_value_threshold = np.percentile(purchase_amounts, 75)
high_value_customers = purchase_amounts > high_value_threshold
# Find recently active customers
recent_threshold = 10 # days
recent_customers = days_since_purchase <= recent_threshold
# Combine criteria for VIP customers
vip_customers = high_value_customers & recent_customers
print(f"🌟 Customer Segmentation:")
print(f"High-value customers (75th+ percentile): {np.sum(high_value_customers)}")
print(f"Recent customers (<{recent_threshold} days): {np.sum(recent_customers)}")
print(f"VIP customers (both criteria): {np.sum(vip_customers)}")
if np.any(vip_customers):
vip_indices = np.where(vip_customers)[0]
print(f"VIP customer IDs: {[customer_ids[i] for i in vip_indices]}")
🎯 Key Takeaways
🚀 What's Next?
Master array sorting! Now learn to find unique values and analyze data patterns.
Continue to: Finding Unique Values
Was this helpful?
Track Your Learning Progress
Sign in to bookmark tutorials and keep track of your learning journey.
Your progress is saved automatically as you read.