📈 Sum, Mean, Min, Max
NumPy's reduction functions are your essential tools for data summarization! Calculate totals, averages, and extremes efficiently across entire arrays or specific dimensions to extract meaningful insights from your data.
import numpy as np
# Basic aggregation functions
sales = np.array([1200, 1350, 1180, 1420, 1290, 1380, 1250])
print(f"Monthly sales: {sales}")
# Essential aggregations
print(f"Total: ${np.sum(sales):,}")
print(f"Average: ${np.mean(sales):,.0f}")
print(f"Best month: ${np.max(sales):,}")
print(f"Worst month: ${np.min(sales):,}")
📊 Sum Operations
Calculate totals across arrays and dimensions.
Basic Sum
import numpy as np
# Simple summation
daily_visitors = np.array([1200, 1350, 1180, 1420, 1290, 1100, 980])
days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
total_visitors = np.sum(daily_visitors)
print(f"Daily visitors: {daily_visitors}")
print(f"Weekly total: {total_visitors:,} visitors")
# Running totals
cumulative = np.cumsum(daily_visitors)
print(f"Cumulative visitors: {cumulative}")
Multi-dimensional Sum
import numpy as np
# Sales data: 3 stores × 4 quarters
sales_matrix = np.array([[120, 135, 145, 160], # Store A
[98, 112, 125, 140], # Store B
[156, 167, 175, 185]]) # Store C
stores = ['Store A', 'Store B', 'Store C']
quarters = ['Q1', 'Q2', 'Q3', 'Q4']
# Sum across different axes
store_totals = np.sum(sales_matrix, axis=1) # Sum quarters for each store
quarter_totals = np.sum(sales_matrix, axis=0) # Sum stores for each quarter
grand_total = np.sum(sales_matrix) # Sum everything
print(f"Store totals: {dict(zip(stores, store_totals))}")
print(f"Quarter totals: {dict(zip(quarters, quarter_totals))}")
print(f"Grand total: {grand_total}")
📊 Mean (Average) Calculations
Calculate central tendencies and typical values.
Basic Mean
import numpy as np
# Test scores
test_scores = np.array([85, 92, 78, 96, 89, 74, 88, 93, 82, 90])
print(f"Test scores: {test_scores}")
# Calculate mean
class_average = np.mean(test_scores)
print(f"Class average: {class_average:.1f}")
# Compare with median
class_median = np.median(test_scores)
print(f"Class median: {class_median:.1f}")
# Check distribution
if class_average > class_median:
print("Scores are slightly skewed high")
elif class_average < class_median:
print("Scores are slightly skewed low")
else:
print("Scores are evenly distributed")
Weighted Averages
import numpy as np
# Course grades with different weights
grades = np.array([85, 92, 78, 96])
weights = np.array([0.2, 0.3, 0.3, 0.2]) # Quiz, Midterm, Final, Project
categories = ['Quiz', 'Midterm', 'Final', 'Project']
# Weighted average
weighted_grade = np.average(grades, weights=weights)
simple_average = np.mean(grades)
print(f"Grade breakdown:")
for cat, grade, weight in zip(categories, grades, weights):
print(f" {cat}: {grade} (weight: {weight*100:.0f}%)")
print(f"Weighted average: {weighted_grade:.1f}")
print(f"Simple average: {simple_average:.1f}")
Axis-specific Means
import numpy as np
# Student performance: rows=students, cols=subjects
performance = np.array([[85, 92, 78, 88], # Alice
[79, 85, 91, 82], # Bob
[94, 89, 96, 93], # Carol
[72, 78, 74, 76]]) # David
students = ['Alice', 'Bob', 'Carol', 'David']
subjects = ['Math', 'Science', 'English', 'History']
# Averages by student and subject
student_averages = np.mean(performance, axis=1)
subject_averages = np.mean(performance, axis=0)
print(f"Student averages:")
for student, avg in zip(students, student_averages):
print(f" {student}: {avg:.1f}")
print(f"Subject averages:")
for subject, avg in zip(subjects, subject_averages):
print(f" {subject}: {avg:.1f}")
🔝 Maximum Values
Find the largest values and their positions.
Basic Maximum
import numpy as np
# Product ratings
ratings = np.array([4.2, 3.8, 4.7, 3.5, 4.9, 4.1, 3.9, 4.5])
products = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
# Find maximum
max_rating = np.max(ratings)
max_position = np.argmax(ratings) # Index of maximum
print(f"Ratings: {ratings}")
print(f"Highest rating: {max_rating}")
print(f"Best product: {products[max_position]} (rating: {max_rating})")
# Find top 3 products
top_3_indices = np.argsort(ratings)[-3:][::-1] # Last 3, reversed
print(f"Top 3 products: {[products[i] for i in top_3_indices]}")
Multi-dimensional Maximum
import numpy as np
# Temperature data: 4 cities × 7 days
temperatures = np.array([[22, 25, 18, 30, 27, 15, 32], # City A
[28, 31, 24, 35, 29, 20, 36], # City B
[19, 22, 16, 27, 24, 12, 29], # City C
[25, 28, 21, 32, 30, 18, 34]]) # City D
cities = ['City A', 'City B', 'City C', 'City D']
days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
# Maximum by city and day
city_max_temps = np.max(temperatures, axis=1)
daily_max_temps = np.max(temperatures, axis=0)
# Hottest city overall
hottest_city_idx = np.argmax(city_max_temps)
print(f"Hottest city: {cities[hottest_city_idx]} ({city_max_temps[hottest_city_idx]}°C)")
# Hottest day overall
hottest_day_idx = np.argmax(daily_max_temps)
print(f"Hottest day: {days[hottest_day_idx]} ({daily_max_temps[hottest_day_idx]}°C)")
🔻 Minimum Values
Find the smallest values and their positions.
Basic Minimum
import numpy as np
# Response times in milliseconds
response_times = np.array([245, 123, 456, 189, 334, 267, 198, 412])
servers = ['Server-1', 'Server-2', 'Server-3', 'Server-4',
'Server-5', 'Server-6', 'Server-7', 'Server-8']
# Find minimum
fastest_time = np.min(response_times)
fastest_server_idx = np.argmin(response_times)
print(f"Response times: {response_times}")
print(f"Fastest response: {fastest_time}ms")
print(f"Fastest server: {servers[fastest_server_idx]}")
# Performance analysis
avg_time = np.mean(response_times)
print(f"Average response: {avg_time:.1f}ms")
print(f"Performance gain: {avg_time - fastest_time:.1f}ms faster than average")
Range and Spread
import numpy as np
# Stock prices over time
stock_prices = np.array([142.50, 145.20, 141.80, 148.60, 143.90,
146.70, 144.30, 149.10, 147.50, 145.80])
# Price analysis
current_price = stock_prices[-1]
min_price = np.min(stock_prices)
max_price = np.max(stock_prices)
price_range = np.ptp(stock_prices) # peak-to-peak (max - min)
print(f"Stock price analysis:")
print(f"Current price: ${current_price:.2f}")
print(f"Period low: ${min_price:.2f}")
print(f"Period high: ${max_price:.2f}")
print(f"Price range: ${price_range:.2f}")
# Position analysis
print(f"Current vs low: +${current_price - min_price:.2f}")
print(f"Current vs high: ${current_price - max_price:.2f}")
🎯 Combined Analysis
Use multiple aggregation functions together for comprehensive insights.
Performance Dashboard
import numpy as np
# Website metrics: daily page views
page_views = np.array([12500, 13200, 11800, 14500, 13900,
12100, 10800, 13600, 14200, 12900])
# Complete analysis
total_views = np.sum(page_views)
avg_views = np.mean(page_views)
best_day = np.max(page_views)
worst_day = np.min(page_views)
consistency = np.std(page_views)
print(f"📊 Website Performance Dashboard")
print(f"=" * 35)
print(f"Total page views: {total_views:,}")
print(f"Daily average: {avg_views:,.0f}")
print(f"Best day: {best_day:,} views")
print(f"Worst day: {worst_day:,} views")
print(f"Variability: {consistency:,.0f} (std dev)")
# Performance rating
performance_ratio = avg_views / best_day
if performance_ratio > 0.9:
rating = "Excellent - Very consistent"
elif performance_ratio > 0.8:
rating = "Good - Fairly consistent"
else:
rating = "Needs improvement - High variability"
print(f"Performance rating: {rating}")
Comparative Analysis
import numpy as np
# Sales comparison: 2 quarters
q1_sales = np.array([45000, 48000, 42000, 51000, 47000])
q2_sales = np.array([52000, 49000, 55000, 58000, 54000])
teams = ['Team A', 'Team B', 'Team C', 'Team D', 'Team E']
# Quarter comparison
q1_stats = [np.sum(q1_sales), np.mean(q1_sales), np.max(q1_sales), np.min(q1_sales)]
q2_stats = [np.sum(q2_sales), np.mean(q2_sales), np.max(q2_sales), np.min(q2_sales)]
metrics = ['Total', 'Average', 'Best', 'Worst']
print(f"📈 Quarterly Sales Comparison")
print(f"=" * 30)
for metric, q1, q2 in zip(metrics, q1_stats, q2_stats):
change = ((q2 - q1) / q1) * 100
print(f"{metric:8}: Q1=${q1:7,.0f} → Q2=${q2:7,.0f} ({change:+.1f}%)")
# Best improvement
improvements = q2_sales - q1_sales
best_improvement_idx = np.argmax(improvements)
print(f"\nBest improvement: {teams[best_improvement_idx]} (+${improvements[best_improvement_idx]:,})")
🎯 Key Takeaways
🚀 What's Next?
Master basic aggregations! Now learn to organize data with sorting operations.
Continue to: Sorting Arrays
Was this helpful?
Track Your Learning Progress
Sign in to bookmark tutorials and keep track of your learning journey.
Your progress is saved automatically as you read.