🔢 Sorting Arrays

Sorting is essential for organizing data and finding patterns! NumPy provides powerful sorting functions that help you arrange data in meaningful order, find rankings, and identify top performers efficiently.

import numpy as np

# Sorting arrays overview
test_scores = np.array([85, 92, 78, 96, 89, 74, 88, 93, 82, 90])
students = np.array(['Alice', 'Bob', 'Carol', 'David', 'Eve', 
                    'Frank', 'Grace', 'Henry', 'Iris', 'Jack'])

# Sort scores
sorted_scores = np.sort(test_scores)
print(f"Original scores: {test_scores}")
print(f"Sorted scores: {sorted_scores}")

# Find top 3 students
top_3_indices = np.argsort(test_scores)[-3:][::-1]
print(f"Top 3 students: {students[top_3_indices]}")
print(f"Their scores: {test_scores[top_3_indices]}")

🔄 Basic Sorting

Sort arrays in ascending and descending order.

Simple Sorting

import numpy as np

# Product ratings
ratings = np.array([4.2, 3.8, 4.7, 3.5, 4.9, 4.1, 3.9, 4.5])

# Sort in ascending order
sorted_asc = np.sort(ratings)
print(f"Original: {ratings}")
print(f"Ascending: {sorted_asc}")

# Sort in descending order
sorted_desc = np.sort(ratings)[::-1]  # Reverse the sorted array
print(f"Descending: {sorted_desc}")

# Quick insights
print(f"Best rating: {sorted_desc[0]}")
print(f"Worst rating: {sorted_asc[0]}")
print(f"Median rating: {sorted_asc[len(sorted_asc)//2]}")

In-place Sorting

import numpy as np

# Response times (modifiable copy)
response_times = np.array([245, 123, 456, 189, 334, 267, 198])
original = response_times.copy()

print(f"Before sorting: {response_times}")

# Sort in-place (modifies original array)
response_times.sort()
print(f"After sorting: {response_times}")

# Performance analysis
fastest_3 = response_times[:3]
slowest_3 = response_times[-3:]
print(f"Fastest 3: {fastest_3}ms")
print(f"Slowest 3: {slowest_3}ms")

📍 Finding Sort Positions

Use argsort() to find the indices that would sort an array.

Basic argsort

import numpy as np

# Sales performance
sales = np.array([45000, 52000, 38000, 61000, 48000])
salespeople = np.array(['Alice', 'Bob', 'Carol', 'David', 'Eve'])

# Get sorting indices
sort_indices = np.argsort(sales)  # Ascending order
print(f"Sales: {sales}")
print(f"Sort indices: {sort_indices}")

# Use indices to sort related arrays
print(f"Ranked by performance (worst to best):")
for i, idx in enumerate(sort_indices):
    print(f"  {i+1}. {salespeople[idx]}: ${sales[idx]:,}")

Top Performers

import numpy as np

# Employee ratings
ratings = np.array([8.5, 7.2, 9.1, 6.8, 8.9, 7.5, 9.3, 8.1])
employees = np.array(['John', 'Sarah', 'Mike', 'Lisa', 'Tom', 
                     'Anna', 'Chris', 'Emma'])

# Find top 3 performers
top_3_indices = np.argsort(ratings)[-3:][::-1]  # Last 3, reversed

print(f"🏆 Top 3 Performers:")
for i, idx in enumerate(top_3_indices):
    print(f"  {i+1}. {employees[idx]}: {ratings[idx]}/10")

# Find bottom 2 performers
bottom_2_indices = np.argsort(ratings)[:2]
print(f"\n📉 Need Improvement:")
for i, idx in enumerate(bottom_2_indices):
    print(f"  {employees[idx]}: {ratings[idx]}/10")

Multi-criteria Sorting

import numpy as np

# Student data: [score, attendance_percentage]
student_data = np.array([[85, 95], [92, 88], [78, 92], [96, 85], [89, 98]])
names = np.array(['Alice', 'Bob', 'Carol', 'David', 'Eve'])

# Sort by score first, then by attendance
scores = student_data[:, 0]
attendance = student_data[:, 1]

# Primary sort by scores
score_order = np.argsort(scores)[::-1]  # Descending

print(f"Ranked by test scores:")
for i, idx in enumerate(score_order):
    print(f"  {i+1}. {names[idx]}: {scores[idx]}% (attendance: {attendance[idx]}%)")

# Find students with both high scores and attendance
good_students = (scores >= 85) & (attendance >= 90)
print(f"\nExcellent students (85%+ score, 90%+ attendance): {names[good_students]}")

🎯 Multi-dimensional Sorting

Sort along different axes of multi-dimensional arrays.

Row and Column Sorting

import numpy as np

# Quarterly sales: 4 products × 4 quarters
sales_matrix = np.array([[120, 135, 145, 160],  # Product A
                        [98, 112, 125, 140],   # Product B
                        [156, 167, 175, 185],  # Product C
                        [89, 95, 105, 125]])   # Product D

products = ['Product A', 'Product B', 'Product C', 'Product D']
quarters = ['Q1', 'Q2', 'Q3', 'Q4']

print(f"Original sales matrix: \n{sales_matrix}")

# Sort each row (quarters for each product)
sorted_by_quarters = np.sort(sales_matrix, axis=1)
print(f"Each product's quarters sorted: \n{sorted_by_quarters}")

# Sort each column (products for each quarter)
sorted_by_products = np.sort(sales_matrix, axis=0)
print(f"Each quarter's products sorted: \n{sorted_by_products}")

Ranking Products

import numpy as np

sales_matrix = np.array([[120, 135, 145, 160],
                        [98, 112, 125, 140],
                        [156, 167, 175, 185],
                        [89, 95, 105, 125]])

products = ['Product A', 'Product B', 'Product C', 'Product D']

# Calculate total sales per product
product_totals = np.sum(sales_matrix, axis=1)

# Rank products by total sales
ranking_indices = np.argsort(product_totals)[::-1]

print(f"🏆 Product Rankings by Total Sales:")
for i, idx in enumerate(ranking_indices):
    print(f"  {i+1}. {products[idx]}: ${product_totals[idx]:,}")

# Find most consistent product (lowest standard deviation)
product_consistency = np.std(sales_matrix, axis=1)
most_consistent_idx = np.argmin(product_consistency)
print(f"\n📊 Most Consistent: {products[most_consistent_idx]} (std: {product_consistency[most_consistent_idx]:.1f})")

⚡ Partial Sorting

Use partition for efficient partial sorting when you only need top/bottom N elements.

Partition for Top N

import numpy as np

# Large dataset - website page views
page_views = np.array([1200, 3500, 890, 4200, 1800, 2900, 650, 
                      3800, 1500, 2200, 5100, 1100, 2800, 4500])
pages = [f"Page-{i+1}" for i in range(len(page_views))]

# Find top 5 pages efficiently using partition
k = 5
partition_idx = len(page_views) - k
partitioned = np.argpartition(page_views, partition_idx)

# Get top 5 indices and sort them
top_5_indices = partitioned[-k:]
top_5_sorted = top_5_indices[np.argsort(page_views[top_5_indices])[::-1]]

print(f"🔥 Top 5 Most Viewed Pages:")
for i, idx in enumerate(top_5_sorted):
    print(f"  {i+1}. {pages[idx]}: {page_views[idx]:,} views")

# Efficiency note: partition is faster than full sort for large arrays
total_top_5_views = np.sum(page_views[top_5_sorted])
print(f"\nTop 5 pages account for {total_top_5_views:,} total views")

Finding Percentiles

import numpy as np

# Salary data
salaries = np.array([45000, 52000, 38000, 65000, 48000, 72000, 
                    42000, 58000, 61000, 55000, 68000, 49000])

# Sort salaries
sorted_salaries = np.sort(salaries)

# Calculate percentiles
n = len(sorted_salaries)
percentiles = [25, 50, 75, 90]

print(f"💰 Salary Analysis:")
print(f"Total employees: {n}")
print(f"Salary range: ${sorted_salaries[0]:,} - ${sorted_salaries[-1]:,}")

for p in percentiles:
    idx = int(n * p / 100)
    if idx >= n:
        idx = n - 1
    print(f"{p}th percentile: ${sorted_salaries[idx]:,}")

# Find employees above 75th percentile
p75_idx = int(n * 0.75)
high_earners = sorted_salaries[p75_idx:]
print(f"High earners (top 25%): {len(high_earners)} people")

🧠 Real-World Applications

Customer Analytics

import numpy as np

# Customer data: [purchase_amount, days_since_last_purchase]
customer_data = np.array([[250, 5], [180, 12], [420, 3], [95, 25], 
                         [310, 8], [75, 45], [380, 6], [150, 18]])
customer_ids = [f"C{1000+i}" for i in range(len(customer_data))]

purchase_amounts = customer_data[:, 0]
days_since_purchase = customer_data[:, 1]

# Find high-value customers
high_value_threshold = np.percentile(purchase_amounts, 75)
high_value_customers = purchase_amounts > high_value_threshold

# Find recently active customers
recent_threshold = 10  # days
recent_customers = days_since_purchase <= recent_threshold

# Combine criteria for VIP customers
vip_customers = high_value_customers & recent_customers

print(f"🌟 Customer Segmentation:")
print(f"High-value customers (75th+ percentile): {np.sum(high_value_customers)}")
print(f"Recent customers (<{recent_threshold} days): {np.sum(recent_customers)}")
print(f"VIP customers (both criteria): {np.sum(vip_customers)}")

if np.any(vip_customers):
    vip_indices = np.where(vip_customers)[0]
    print(f"VIP customer IDs: {[customer_ids[i] for i in vip_indices]}")

🎯 Key Takeaways

🚀 What's Next?

Master array sorting! Now learn to find unique values and analyze data patterns.

Continue to: Finding Unique Values

Online Python

🔢 Sorting Arrays

Track Your Learning Progress