⚡ Performance Optimization

Writing fast, efficient NumPy code is crucial for large datasets! Learn vectorization techniques, memory optimization, and performance best practices to make your data processing lightning-fast and handle real-world scale.

import numpy as np
import time

# Performance comparison: loops vs vectorization
size = 100000
a = np.random.random(size)
b = np.random.random(size)

# Vectorized operation (fast)
start = time.time()
result = a * b + np.sin(a)
vectorized_time = time.time() - start

print(f"Dataset size: {size:,} elements")
print(f"Vectorized time: {vectorized_time:.4f} seconds")
print(f"Speed: {size/vectorized_time:,.0f} operations/second")
print(f"Memory usage: {result.nbytes / 1024:.1f} KB")

🚀 Vectorization Benefits

Replace loops with vectorized operations for massive speed gains.

Loop vs Vectorization

import numpy as np
import time

# Create test data
size = 50000
x = np.random.random(size)
y = np.random.random(size)

# Method 1: Python loop (slow)
start = time.time()
result_loop = []
for i in range(size):
    result_loop.append(x[i] ** 2 + y[i] ** 2)
result_loop = np.array(result_loop)
loop_time = time.time() - start

# Method 2: Vectorized (fast)
start = time.time()
result_vectorized = x**2 + y**2
vectorized_time = time.time() - start

print(f"Performance Comparison ({size:,} elements):")
print(f"Python loop: {loop_time:.4f} seconds")
print(f"Vectorized: {vectorized_time:.4f} seconds")
print(f"Speedup: {loop_time/vectorized_time:.1f}x faster")
print(f"Results identical: {np.allclose(result_loop, result_vectorized)}")

Complex Calculations

import numpy as np
import time

# Complex calculation: distance between points
np.random.seed(42)
n_points = 10000
x1, y1 = np.random.random(n_points), np.random.random(n_points)
x2, y2 = np.random.random(n_points), np.random.random(n_points)

# Vectorized distance calculation
start = time.time()
distances = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
vectorized_time = time.time() - start

print(f"Distance Calculation ({n_points:,} point pairs):")
print(f"Vectorized time: {vectorized_time:.4f} seconds")
print(f"Average distance: {np.mean(distances):.3f}")
print(f"Min distance: {np.min(distances):.3f}")
print(f"Max distance: {np.max(distances):.3f}")

# Find closest pairs
closest_indices = np.argsort(distances)[:5]
print(f"5 closest distances: {distances[closest_indices].round(3)}")

💾 Memory Optimization

Choose appropriate data types and manage memory efficiently.

Data Type Selection

import numpy as np

# Memory comparison for different data types
size = 1000000

# Integer types
int8_array = np.arange(size, dtype=np.int8)      # 1 byte per number
int32_array = np.arange(size, dtype=np.int32)    # 4 bytes per number
int64_array = np.arange(size, dtype=np.int64)    # 8 bytes per number

# Float types
float32_array = np.random.random(size).astype(np.float32)  # 4 bytes
float64_array = np.random.random(size).astype(np.float64)  # 8 bytes

print(f"Memory Usage Comparison ({size:,} elements):")
print(f"int8:    {int8_array.nbytes / 1024 / 1024:.1f} MB")
print(f"int32:   {int32_array.nbytes / 1024 / 1024:.1f} MB") 
print(f"int64:   {int64_array.nbytes / 1024 / 1024:.1f} MB")
print(f"float32: {float32_array.nbytes / 1024 / 1024:.1f} MB")
print(f"float64: {float64_array.nbytes / 1024 / 1024:.1f} MB")

# Choose appropriate type for your data range
ratings = np.array([1, 2, 3, 4, 5] * 200000, dtype=np.int8)  # Ratings 1-5
prices = np.random.uniform(10, 1000, 100000).astype(np.float32)  # Prices

print(f"\nOptimized arrays:")
print(f"Ratings (1-5): {ratings.nbytes / 1024:.1f} KB (int8)")
print(f"Prices: {prices.nbytes / 1024:.1f} KB (float32)")

Memory-Efficient Operations

import numpy as np

# In-place operations save memory
size = 100000
data = np.random.random(size)
original_memory = data.nbytes

print(f"Original array: {original_memory / 1024:.1f} KB")

# Method 1: Creates new array (uses more memory)
# result = data * 2 + 1

# Method 2: In-place operations (memory efficient)
data *= 2      # Multiply in-place
data += 1      # Add in-place

print(f"After in-place operations: {data.nbytes / 1024:.1f} KB")
print(f"Sample values: {data[:5].round(2)}")

# Memory-efficient mathematical operations
large_array = np.random.random(200000)
print(f"\nLarge array: {large_array.nbytes / 1024:.1f} KB")

# Use output parameter for memory efficiency
result = np.empty_like(large_array)  # Pre-allocate
np.sin(large_array, out=result)      # Write directly to result
print(f"Result array: {result.nbytes / 1024:.1f} KB")
print(f"No extra memory allocated during calculation")

🔧 Broadcasting Optimization

Use broadcasting for efficient operations on different-shaped arrays.

Efficient Broadcasting

import numpy as np
import time

# Sales data: 1000 products × 12 months
products = 1000
months = 12
sales = np.random.randint(100, 1000, (products, months))
monthly_targets = np.array([800, 850, 900, 920, 950, 980, 
                           990, 960, 940, 910, 880, 860])

print(f"Sales data shape: {sales.shape}")
print(f"Monthly targets shape: {monthly_targets.shape}")

# Efficient broadcasting comparison
start = time.time()
performance = sales / monthly_targets  # Broadcasting automatically
broadcast_time = time.time() - start

print(f"Broadcasting time: {broadcast_time:.4f} seconds")
print(f"Performance matrix shape: {performance.shape}")

# Analyze performance
exceeded_target = sales > monthly_targets
products_above_target = np.sum(exceeded_target, axis=1)  # Per product
months_above_target = np.sum(exceeded_target, axis=0)    # Per month

print(f"\nPerformance Analysis:")
print(f"Products consistently above target: {np.sum(products_above_target == months)}")
print(f"Best performing month: Month {np.argmax(months_above_target) + 1}")
print(f"Average performance ratio: {np.mean(performance):.2f}")

Multi-dimensional Broadcasting

import numpy as np

# Customer analysis: regions × age_groups × products
regions = 4
age_groups = 5  
products = 3

# Purchase probabilities by age group (affects all regions and products)
age_factors = np.array([0.6, 0.8, 1.0, 0.9, 0.7])  # Shape: (5,)

# Regional factors (affects all age groups and products)  
regional_factors = np.array([1.2, 0.9, 1.1, 0.8])  # Shape: (4,)

# Product base rates (affects all regions and age groups)
product_rates = np.array([0.15, 0.25, 0.10])  # Shape: (3,)

# Efficient broadcasting calculation
# Reshape for broadcasting: (4,1,1) × (1,5,1) × (1,1,3)
purchase_prob = (regional_factors.reshape(4,1,1) * 
                age_factors.reshape(1,5,1) * 
                product_rates.reshape(1,1,3))

print(f"Purchase probability matrix shape: {purchase_prob.shape}")
print(f"Example - Region 1, Age group 3, Product 2: {purchase_prob[0,2,1]:.3f}")

# Analysis
best_region = np.argmax(np.mean(purchase_prob, axis=(1,2)))
best_age_group = np.argmax(np.mean(purchase_prob, axis=(0,2)))
best_product = np.argmax(np.mean(purchase_prob, axis=(0,1)))

print(f"Best performing region: {best_region + 1}")
print(f"Best age group: {best_age_group + 1}")
print(f"Best product: {best_product + 1}")

🎯 Algorithm Optimization

Choose efficient algorithms and functions for your specific needs.

Efficient Sorting and Searching

import numpy as np
import time

# Large dataset for performance testing
size = 100000
data = np.random.randint(1, 10000, size)

# Find top 10 values efficiently
k = 10

# Method 1: Full sort (slower for large arrays)
start = time.time()
full_sort = np.sort(data)
top_10_sort = full_sort[-k:]
sort_time = time.time() - start

# Method 2: Partial sort (faster)
start = time.time()
partition_idx = size - k
partitioned = np.argpartition(data, partition_idx)
top_10_indices = partitioned[-k:]
top_10_partition = data[top_10_indices]
top_10_sorted = np.sort(top_10_partition)  # Sort only the top k
partition_time = time.time() - start

print(f"Performance Comparison (finding top {k} from {size:,} elements):")
print(f"Full sort: {sort_time:.4f} seconds")
print(f"Partition: {partition_time:.4f} seconds")
print(f"Speedup: {sort_time/partition_time:.1f}x faster")
print(f"Results match: {np.array_equal(top_10_sort, top_10_sorted)}")

Efficient Aggregations

import numpy as np
import time

# Multi-dimensional aggregation optimization
shape = (1000, 500)
data = np.random.random(shape)

print(f"Data shape: {shape} ({data.nbytes / 1024 / 1024:.1f} MB)")

# Efficient aggregations along axes
start = time.time()
row_sums = np.sum(data, axis=1)      # Sum each row
col_means = np.mean(data, axis=0)    # Mean of each column
overall_std = np.std(data)           # Overall standard deviation
axis_time = time.time() - start

print(f"Aggregation time: {axis_time:.4f} seconds")
print(f"Row sums shape: {row_sums.shape}")
print(f"Column means shape: {col_means.shape}")

# Find patterns efficiently
start = time.time()
high_values = data > 0.8
high_value_counts = np.sum(high_values, axis=1)  # Count per row
rows_with_many_high = np.sum(high_value_counts > 100)  # Rows with >100 high values
pattern_time = time.time() - start

print(f"Pattern analysis time: {pattern_time:.4f} seconds")
print(f"Rows with >100 high values: {rows_with_many_high}")

# Memory-efficient boolean operations
start = time.time()
condition1 = data > 0.3
condition2 = data < 0.7
combined = condition1 & condition2  # Element-wise AND
middle_range_count = np.sum(combined)
boolean_time = time.time() - start

print(f"Boolean operations time: {boolean_time:.4f} seconds")
print(f"Values in middle range (0.3-0.7): {middle_range_count:,}")

📊 Real-World Performance Example

Optimize a complete data processing pipeline.

Customer Analytics Pipeline

import numpy as np
import time

# Simulate large customer dataset
n_customers = 100000
n_features = 20

print(f"🏢 Customer Analytics Pipeline")
print(f"Processing {n_customers:,} customers with {n_features} features")

start_total = time.time()

# Step 1: Generate realistic customer data
np.random.seed(123)
customer_data = np.random.random((n_customers, n_features))

# Step 2: Feature engineering (vectorized)
start = time.time()
# Normalize features (mean=0, std=1)
means = np.mean(customer_data, axis=0)
stds = np.std(customer_data, axis=0)
normalized_data = (customer_data - means) / stds

# Create composite scores
engagement_score = np.mean(normalized_data[:, :5], axis=1)
value_score = np.mean(normalized_data[:, 5:10], axis=1)
loyalty_score = np.mean(normalized_data[:, 10:15], axis=1)
feature_time = time.time() - start

# Step 3: Customer segmentation (efficient)
start = time.time()
# Multi-dimensional segmentation
high_engagement = engagement_score > 0.5
high_value = value_score > 0.5
high_loyalty = loyalty_score > 0.5

# Combine conditions efficiently
vip_customers = high_engagement & high_value & high_loyalty
at_risk = high_value & ~high_loyalty
prospects = high_engagement & ~high_value

segmentation_time = time.time() - start

# Step 4: Analytics (vectorized aggregations)
start = time.time()
segment_stats = {
    'VIP': np.sum(vip_customers),
    'At Risk': np.sum(at_risk), 
    'Prospects': np.sum(prospects),
    'Regular': n_customers - np.sum(vip_customers | at_risk | prospects)
}

# Performance metrics
avg_engagement = np.mean(engagement_score)
avg_value = np.mean(value_score)
avg_loyalty = np.mean(loyalty_score)
analytics_time = time.time() - start

total_time = time.time() - start_total

# Results
print(f"\n⏱️  Pipeline Performance:")
print(f"Feature engineering: {feature_time:.3f}s")
print(f"Segmentation: {segmentation_time:.3f}s") 
print(f"Analytics: {analytics_time:.3f}s")
print(f"Total time: {total_time:.3f}s")
print(f"Processing rate: {n_customers/total_time:,.0f} customers/second")

print(f"\n📊 Customer Segments:")
for segment, count in segment_stats.items():
    percentage = count / n_customers * 100
    print(f"{segment}: {count:,} ({percentage:.1f}%)")

print(f"\n📈 Average Scores:")
print(f"Engagement: {avg_engagement:.3f}")
print(f"Value: {avg_value:.3f}")
print(f"Loyalty: {avg_loyalty:.3f}")

🎯 Key Takeaways

🚀 What's Next?

Excellent work mastering NumPy performance! You're now ready for the final section on file operations.

Continue to: File Operations

Online Python

⚡ Performance Optimization

Track Your Learning Progress