📈 Sum, Mean, Min, Max

NumPy's reduction functions are your essential tools for data summarization! Calculate totals, averages, and extremes efficiently across entire arrays or specific dimensions to extract meaningful insights from your data.

import numpy as np

# Basic aggregation functions
sales = np.array([1200, 1350, 1180, 1420, 1290, 1380, 1250])
print(f"Monthly sales: {sales}")

# Essential aggregations
print(f"Total: ${np.sum(sales):,}")
print(f"Average: ${np.mean(sales):,.0f}")
print(f"Best month: ${np.max(sales):,}")
print(f"Worst month: ${np.min(sales):,}")

📊 Sum Operations

Calculate totals across arrays and dimensions.

Basic Sum

import numpy as np

# Simple summation
daily_visitors = np.array([1200, 1350, 1180, 1420, 1290, 1100, 980])
days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']

total_visitors = np.sum(daily_visitors)
print(f"Daily visitors: {daily_visitors}")
print(f"Weekly total: {total_visitors:,} visitors")

# Running totals
cumulative = np.cumsum(daily_visitors)
print(f"Cumulative visitors: {cumulative}")

Multi-dimensional Sum

import numpy as np

# Sales data: 3 stores × 4 quarters
sales_matrix = np.array([[120, 135, 145, 160],  # Store A
                        [98, 112, 125, 140],   # Store B
                        [156, 167, 175, 185]]) # Store C

stores = ['Store A', 'Store B', 'Store C']
quarters = ['Q1', 'Q2', 'Q3', 'Q4']

# Sum across different axes
store_totals = np.sum(sales_matrix, axis=1)  # Sum quarters for each store
quarter_totals = np.sum(sales_matrix, axis=0)  # Sum stores for each quarter
grand_total = np.sum(sales_matrix)  # Sum everything

print(f"Store totals: {dict(zip(stores, store_totals))}")
print(f"Quarter totals: {dict(zip(quarters, quarter_totals))}")
print(f"Grand total: {grand_total}")

📊 Mean (Average) Calculations

Calculate central tendencies and typical values.

Basic Mean

import numpy as np

# Test scores
test_scores = np.array([85, 92, 78, 96, 89, 74, 88, 93, 82, 90])
print(f"Test scores: {test_scores}")

# Calculate mean
class_average = np.mean(test_scores)
print(f"Class average: {class_average:.1f}")

# Compare with median
class_median = np.median(test_scores)
print(f"Class median: {class_median:.1f}")

# Check distribution
if class_average > class_median:
    print("Scores are slightly skewed high")
elif class_average < class_median:
    print("Scores are slightly skewed low")
else:
    print("Scores are evenly distributed")

Weighted Averages

import numpy as np

# Course grades with different weights
grades = np.array([85, 92, 78, 96])
weights = np.array([0.2, 0.3, 0.3, 0.2])  # Quiz, Midterm, Final, Project
categories = ['Quiz', 'Midterm', 'Final', 'Project']

# Weighted average
weighted_grade = np.average(grades, weights=weights)
simple_average = np.mean(grades)

print(f"Grade breakdown:")
for cat, grade, weight in zip(categories, grades, weights):
    print(f"  {cat}: {grade} (weight: {weight*100:.0f}%)")

print(f"Weighted average: {weighted_grade:.1f}")
print(f"Simple average: {simple_average:.1f}")

Axis-specific Means

import numpy as np

# Student performance: rows=students, cols=subjects
performance = np.array([[85, 92, 78, 88],   # Alice
                       [79, 85, 91, 82],   # Bob
                       [94, 89, 96, 93],   # Carol
                       [72, 78, 74, 76]])  # David

students = ['Alice', 'Bob', 'Carol', 'David']
subjects = ['Math', 'Science', 'English', 'History']

# Averages by student and subject
student_averages = np.mean(performance, axis=1)
subject_averages = np.mean(performance, axis=0)

print(f"Student averages:")
for student, avg in zip(students, student_averages):
    print(f"  {student}: {avg:.1f}")

print(f"Subject averages:")
for subject, avg in zip(subjects, subject_averages):
    print(f"  {subject}: {avg:.1f}")

🔝 Maximum Values

Find the largest values and their positions.

Basic Maximum

import numpy as np

# Product ratings
ratings = np.array([4.2, 3.8, 4.7, 3.5, 4.9, 4.1, 3.9, 4.5])
products = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']

# Find maximum
max_rating = np.max(ratings)
max_position = np.argmax(ratings)  # Index of maximum

print(f"Ratings: {ratings}")
print(f"Highest rating: {max_rating}")
print(f"Best product: {products[max_position]} (rating: {max_rating})")

# Find top 3 products
top_3_indices = np.argsort(ratings)[-3:][::-1]  # Last 3, reversed
print(f"Top 3 products: {[products[i] for i in top_3_indices]}")

Multi-dimensional Maximum

import numpy as np

# Temperature data: 4 cities × 7 days
temperatures = np.array([[22, 25, 18, 30, 27, 15, 32],  # City A
                        [28, 31, 24, 35, 29, 20, 36],  # City B
                        [19, 22, 16, 27, 24, 12, 29],  # City C
                        [25, 28, 21, 32, 30, 18, 34]]) # City D

cities = ['City A', 'City B', 'City C', 'City D']
days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']

# Maximum by city and day
city_max_temps = np.max(temperatures, axis=1)
daily_max_temps = np.max(temperatures, axis=0)

# Hottest city overall
hottest_city_idx = np.argmax(city_max_temps)
print(f"Hottest city: {cities[hottest_city_idx]} ({city_max_temps[hottest_city_idx]}°C)")

# Hottest day overall
hottest_day_idx = np.argmax(daily_max_temps)
print(f"Hottest day: {days[hottest_day_idx]} ({daily_max_temps[hottest_day_idx]}°C)")

🔻 Minimum Values

Find the smallest values and their positions.

Basic Minimum

import numpy as np

# Response times in milliseconds
response_times = np.array([245, 123, 456, 189, 334, 267, 198, 412])
servers = ['Server-1', 'Server-2', 'Server-3', 'Server-4', 
          'Server-5', 'Server-6', 'Server-7', 'Server-8']

# Find minimum
fastest_time = np.min(response_times)
fastest_server_idx = np.argmin(response_times)

print(f"Response times: {response_times}")
print(f"Fastest response: {fastest_time}ms")
print(f"Fastest server: {servers[fastest_server_idx]}")

# Performance analysis
avg_time = np.mean(response_times)
print(f"Average response: {avg_time:.1f}ms")
print(f"Performance gain: {avg_time - fastest_time:.1f}ms faster than average")

Range and Spread

import numpy as np

# Stock prices over time
stock_prices = np.array([142.50, 145.20, 141.80, 148.60, 143.90, 
                        146.70, 144.30, 149.10, 147.50, 145.80])

# Price analysis
current_price = stock_prices[-1]
min_price = np.min(stock_prices)
max_price = np.max(stock_prices)
price_range = np.ptp(stock_prices)  # peak-to-peak (max - min)

print(f"Stock price analysis:")
print(f"Current price: ${current_price:.2f}")
print(f"Period low: ${min_price:.2f}")
print(f"Period high: ${max_price:.2f}")
print(f"Price range: ${price_range:.2f}")

# Position analysis
print(f"Current vs low: +${current_price - min_price:.2f}")
print(f"Current vs high: ${current_price - max_price:.2f}")

🎯 Combined Analysis

Use multiple aggregation functions together for comprehensive insights.

Performance Dashboard

import numpy as np

# Website metrics: daily page views
page_views = np.array([12500, 13200, 11800, 14500, 13900, 
                      12100, 10800, 13600, 14200, 12900])

# Complete analysis
total_views = np.sum(page_views)
avg_views = np.mean(page_views)
best_day = np.max(page_views)
worst_day = np.min(page_views)
consistency = np.std(page_views)

print(f"📊 Website Performance Dashboard")
print(f"=" * 35)
print(f"Total page views: {total_views:,}")
print(f"Daily average: {avg_views:,.0f}")
print(f"Best day: {best_day:,} views")
print(f"Worst day: {worst_day:,} views")
print(f"Variability: {consistency:,.0f} (std dev)")

# Performance rating
performance_ratio = avg_views / best_day
if performance_ratio > 0.9:
    rating = "Excellent - Very consistent"
elif performance_ratio > 0.8:
    rating = "Good - Fairly consistent"
else:
    rating = "Needs improvement - High variability"

print(f"Performance rating: {rating}")

Comparative Analysis

import numpy as np

# Sales comparison: 2 quarters
q1_sales = np.array([45000, 48000, 42000, 51000, 47000])
q2_sales = np.array([52000, 49000, 55000, 58000, 54000])

teams = ['Team A', 'Team B', 'Team C', 'Team D', 'Team E']

# Quarter comparison
q1_stats = [np.sum(q1_sales), np.mean(q1_sales), np.max(q1_sales), np.min(q1_sales)]
q2_stats = [np.sum(q2_sales), np.mean(q2_sales), np.max(q2_sales), np.min(q2_sales)]

metrics = ['Total', 'Average', 'Best', 'Worst']

print(f"📈 Quarterly Sales Comparison")
print(f"=" * 30)
for metric, q1, q2 in zip(metrics, q1_stats, q2_stats):
    change = ((q2 - q1) / q1) * 100
    print(f"{metric:8}: Q1=${q1:7,.0f} → Q2=${q2:7,.0f} ({change:+.1f}%)")

# Best improvement
improvements = q2_sales - q1_sales
best_improvement_idx = np.argmax(improvements)
print(f"\nBest improvement: {teams[best_improvement_idx]} (+${improvements[best_improvement_idx]:,})")

🎯 Key Takeaways

🚀 What's Next?

Master basic aggregations! Now learn to organize data with sorting operations.

Continue to: Sorting Arrays

Was this helpful?

😔Poor
🙁Fair
😊Good
😄Great
🤩Excellent