⚡ Performance Optimization
Writing fast, efficient NumPy code is crucial for large datasets! Learn vectorization techniques, memory optimization, and performance best practices to make your data processing lightning-fast and handle real-world scale.
import numpy as np
import time
# Performance comparison: loops vs vectorization
size = 100000
a = np.random.random(size)
b = np.random.random(size)
# Vectorized operation (fast)
start = time.time()
result = a * b + np.sin(a)
vectorized_time = time.time() - start
print(f"Dataset size: {size:,} elements")
print(f"Vectorized time: {vectorized_time:.4f} seconds")
print(f"Speed: {size/vectorized_time:,.0f} operations/second")
print(f"Memory usage: {result.nbytes / 1024:.1f} KB")
🚀 Vectorization Benefits
Replace loops with vectorized operations for massive speed gains.
Loop vs Vectorization
import numpy as np
import time
# Create test data
size = 50000
x = np.random.random(size)
y = np.random.random(size)
# Method 1: Python loop (slow)
start = time.time()
result_loop = []
for i in range(size):
result_loop.append(x[i] ** 2 + y[i] ** 2)
result_loop = np.array(result_loop)
loop_time = time.time() - start
# Method 2: Vectorized (fast)
start = time.time()
result_vectorized = x**2 + y**2
vectorized_time = time.time() - start
print(f"Performance Comparison ({size:,} elements):")
print(f"Python loop: {loop_time:.4f} seconds")
print(f"Vectorized: {vectorized_time:.4f} seconds")
print(f"Speedup: {loop_time/vectorized_time:.1f}x faster")
print(f"Results identical: {np.allclose(result_loop, result_vectorized)}")
Complex Calculations
import numpy as np
import time
# Complex calculation: distance between points
np.random.seed(42)
n_points = 10000
x1, y1 = np.random.random(n_points), np.random.random(n_points)
x2, y2 = np.random.random(n_points), np.random.random(n_points)
# Vectorized distance calculation
start = time.time()
distances = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
vectorized_time = time.time() - start
print(f"Distance Calculation ({n_points:,} point pairs):")
print(f"Vectorized time: {vectorized_time:.4f} seconds")
print(f"Average distance: {np.mean(distances):.3f}")
print(f"Min distance: {np.min(distances):.3f}")
print(f"Max distance: {np.max(distances):.3f}")
# Find closest pairs
closest_indices = np.argsort(distances)[:5]
print(f"5 closest distances: {distances[closest_indices].round(3)}")
💾 Memory Optimization
Choose appropriate data types and manage memory efficiently.
Data Type Selection
import numpy as np
# Memory comparison for different data types
size = 1000000
# Integer types
int8_array = np.arange(size, dtype=np.int8) # 1 byte per number
int32_array = np.arange(size, dtype=np.int32) # 4 bytes per number
int64_array = np.arange(size, dtype=np.int64) # 8 bytes per number
# Float types
float32_array = np.random.random(size).astype(np.float32) # 4 bytes
float64_array = np.random.random(size).astype(np.float64) # 8 bytes
print(f"Memory Usage Comparison ({size:,} elements):")
print(f"int8: {int8_array.nbytes / 1024 / 1024:.1f} MB")
print(f"int32: {int32_array.nbytes / 1024 / 1024:.1f} MB")
print(f"int64: {int64_array.nbytes / 1024 / 1024:.1f} MB")
print(f"float32: {float32_array.nbytes / 1024 / 1024:.1f} MB")
print(f"float64: {float64_array.nbytes / 1024 / 1024:.1f} MB")
# Choose appropriate type for your data range
ratings = np.array([1, 2, 3, 4, 5] * 200000, dtype=np.int8) # Ratings 1-5
prices = np.random.uniform(10, 1000, 100000).astype(np.float32) # Prices
print(f"\nOptimized arrays:")
print(f"Ratings (1-5): {ratings.nbytes / 1024:.1f} KB (int8)")
print(f"Prices: {prices.nbytes / 1024:.1f} KB (float32)")
Memory-Efficient Operations
import numpy as np
# In-place operations save memory
size = 100000
data = np.random.random(size)
original_memory = data.nbytes
print(f"Original array: {original_memory / 1024:.1f} KB")
# Method 1: Creates new array (uses more memory)
# result = data * 2 + 1
# Method 2: In-place operations (memory efficient)
data *= 2 # Multiply in-place
data += 1 # Add in-place
print(f"After in-place operations: {data.nbytes / 1024:.1f} KB")
print(f"Sample values: {data[:5].round(2)}")
# Memory-efficient mathematical operations
large_array = np.random.random(200000)
print(f"\nLarge array: {large_array.nbytes / 1024:.1f} KB")
# Use output parameter for memory efficiency
result = np.empty_like(large_array) # Pre-allocate
np.sin(large_array, out=result) # Write directly to result
print(f"Result array: {result.nbytes / 1024:.1f} KB")
print(f"No extra memory allocated during calculation")
🔧 Broadcasting Optimization
Use broadcasting for efficient operations on different-shaped arrays.
Efficient Broadcasting
import numpy as np
import time
# Sales data: 1000 products × 12 months
products = 1000
months = 12
sales = np.random.randint(100, 1000, (products, months))
monthly_targets = np.array([800, 850, 900, 920, 950, 980,
990, 960, 940, 910, 880, 860])
print(f"Sales data shape: {sales.shape}")
print(f"Monthly targets shape: {monthly_targets.shape}")
# Efficient broadcasting comparison
start = time.time()
performance = sales / monthly_targets # Broadcasting automatically
broadcast_time = time.time() - start
print(f"Broadcasting time: {broadcast_time:.4f} seconds")
print(f"Performance matrix shape: {performance.shape}")
# Analyze performance
exceeded_target = sales > monthly_targets
products_above_target = np.sum(exceeded_target, axis=1) # Per product
months_above_target = np.sum(exceeded_target, axis=0) # Per month
print(f"\nPerformance Analysis:")
print(f"Products consistently above target: {np.sum(products_above_target == months)}")
print(f"Best performing month: Month {np.argmax(months_above_target) + 1}")
print(f"Average performance ratio: {np.mean(performance):.2f}")
Multi-dimensional Broadcasting
import numpy as np
# Customer analysis: regions × age_groups × products
regions = 4
age_groups = 5
products = 3
# Purchase probabilities by age group (affects all regions and products)
age_factors = np.array([0.6, 0.8, 1.0, 0.9, 0.7]) # Shape: (5,)
# Regional factors (affects all age groups and products)
regional_factors = np.array([1.2, 0.9, 1.1, 0.8]) # Shape: (4,)
# Product base rates (affects all regions and age groups)
product_rates = np.array([0.15, 0.25, 0.10]) # Shape: (3,)
# Efficient broadcasting calculation
# Reshape for broadcasting: (4,1,1) × (1,5,1) × (1,1,3)
purchase_prob = (regional_factors.reshape(4,1,1) *
age_factors.reshape(1,5,1) *
product_rates.reshape(1,1,3))
print(f"Purchase probability matrix shape: {purchase_prob.shape}")
print(f"Example - Region 1, Age group 3, Product 2: {purchase_prob[0,2,1]:.3f}")
# Analysis
best_region = np.argmax(np.mean(purchase_prob, axis=(1,2)))
best_age_group = np.argmax(np.mean(purchase_prob, axis=(0,2)))
best_product = np.argmax(np.mean(purchase_prob, axis=(0,1)))
print(f"Best performing region: {best_region + 1}")
print(f"Best age group: {best_age_group + 1}")
print(f"Best product: {best_product + 1}")
🎯 Algorithm Optimization
Choose efficient algorithms and functions for your specific needs.
Efficient Sorting and Searching
import numpy as np
import time
# Large dataset for performance testing
size = 100000
data = np.random.randint(1, 10000, size)
# Find top 10 values efficiently
k = 10
# Method 1: Full sort (slower for large arrays)
start = time.time()
full_sort = np.sort(data)
top_10_sort = full_sort[-k:]
sort_time = time.time() - start
# Method 2: Partial sort (faster)
start = time.time()
partition_idx = size - k
partitioned = np.argpartition(data, partition_idx)
top_10_indices = partitioned[-k:]
top_10_partition = data[top_10_indices]
top_10_sorted = np.sort(top_10_partition) # Sort only the top k
partition_time = time.time() - start
print(f"Performance Comparison (finding top {k} from {size:,} elements):")
print(f"Full sort: {sort_time:.4f} seconds")
print(f"Partition: {partition_time:.4f} seconds")
print(f"Speedup: {sort_time/partition_time:.1f}x faster")
print(f"Results match: {np.array_equal(top_10_sort, top_10_sorted)}")
Efficient Aggregations
import numpy as np
import time
# Multi-dimensional aggregation optimization
shape = (1000, 500)
data = np.random.random(shape)
print(f"Data shape: {shape} ({data.nbytes / 1024 / 1024:.1f} MB)")
# Efficient aggregations along axes
start = time.time()
row_sums = np.sum(data, axis=1) # Sum each row
col_means = np.mean(data, axis=0) # Mean of each column
overall_std = np.std(data) # Overall standard deviation
axis_time = time.time() - start
print(f"Aggregation time: {axis_time:.4f} seconds")
print(f"Row sums shape: {row_sums.shape}")
print(f"Column means shape: {col_means.shape}")
# Find patterns efficiently
start = time.time()
high_values = data > 0.8
high_value_counts = np.sum(high_values, axis=1) # Count per row
rows_with_many_high = np.sum(high_value_counts > 100) # Rows with >100 high values
pattern_time = time.time() - start
print(f"Pattern analysis time: {pattern_time:.4f} seconds")
print(f"Rows with >100 high values: {rows_with_many_high}")
# Memory-efficient boolean operations
start = time.time()
condition1 = data > 0.3
condition2 = data < 0.7
combined = condition1 & condition2 # Element-wise AND
middle_range_count = np.sum(combined)
boolean_time = time.time() - start
print(f"Boolean operations time: {boolean_time:.4f} seconds")
print(f"Values in middle range (0.3-0.7): {middle_range_count:,}")
📊 Real-World Performance Example
Optimize a complete data processing pipeline.
Customer Analytics Pipeline
import numpy as np
import time
# Simulate large customer dataset
n_customers = 100000
n_features = 20
print(f"🏢 Customer Analytics Pipeline")
print(f"Processing {n_customers:,} customers with {n_features} features")
start_total = time.time()
# Step 1: Generate realistic customer data
np.random.seed(123)
customer_data = np.random.random((n_customers, n_features))
# Step 2: Feature engineering (vectorized)
start = time.time()
# Normalize features (mean=0, std=1)
means = np.mean(customer_data, axis=0)
stds = np.std(customer_data, axis=0)
normalized_data = (customer_data - means) / stds
# Create composite scores
engagement_score = np.mean(normalized_data[:, :5], axis=1)
value_score = np.mean(normalized_data[:, 5:10], axis=1)
loyalty_score = np.mean(normalized_data[:, 10:15], axis=1)
feature_time = time.time() - start
# Step 3: Customer segmentation (efficient)
start = time.time()
# Multi-dimensional segmentation
high_engagement = engagement_score > 0.5
high_value = value_score > 0.5
high_loyalty = loyalty_score > 0.5
# Combine conditions efficiently
vip_customers = high_engagement & high_value & high_loyalty
at_risk = high_value & ~high_loyalty
prospects = high_engagement & ~high_value
segmentation_time = time.time() - start
# Step 4: Analytics (vectorized aggregations)
start = time.time()
segment_stats = {
'VIP': np.sum(vip_customers),
'At Risk': np.sum(at_risk),
'Prospects': np.sum(prospects),
'Regular': n_customers - np.sum(vip_customers | at_risk | prospects)
}
# Performance metrics
avg_engagement = np.mean(engagement_score)
avg_value = np.mean(value_score)
avg_loyalty = np.mean(loyalty_score)
analytics_time = time.time() - start
total_time = time.time() - start_total
# Results
print(f"\n⏱️ Pipeline Performance:")
print(f"Feature engineering: {feature_time:.3f}s")
print(f"Segmentation: {segmentation_time:.3f}s")
print(f"Analytics: {analytics_time:.3f}s")
print(f"Total time: {total_time:.3f}s")
print(f"Processing rate: {n_customers/total_time:,.0f} customers/second")
print(f"\n📊 Customer Segments:")
for segment, count in segment_stats.items():
percentage = count / n_customers * 100
print(f"{segment}: {count:,} ({percentage:.1f}%)")
print(f"\n📈 Average Scores:")
print(f"Engagement: {avg_engagement:.3f}")
print(f"Value: {avg_value:.3f}")
print(f"Loyalty: {avg_loyalty:.3f}")
🎯 Key Takeaways
🚀 What's Next?
Excellent work mastering NumPy performance! You're now ready for the final section on file operations.
Continue to: File Operations
Was this helpful?
Track Your Learning Progress
Sign in to bookmark tutorials and keep track of your learning journey.
Your progress is saved automatically as you read.