📊 Statistical Operations
NumPy provides powerful statistical functions to analyze and understand your data! From basic descriptive statistics to advanced correlation analysis, these tools help extract insights from numerical data.
import numpy as np
# Statistical operations overview
test_scores = np.array([85, 92, 78, 96, 89, 74, 88, 93, 82, 90])
print(f"Test scores: {test_scores}")
# Basic statistics
print(f"Mean: {np.mean(test_scores):.1f}")
print(f"Median: {np.median(test_scores):.1f}")
print(f"Std Dev: {np.std(test_scores):.2f}")
print(f"Range: {np.ptp(test_scores)}") # peak-to-peak
📈 Descriptive Statistics
Understand your data distribution with central tendency and spread measures.
Central Tendency
import numpy as np
grades = np.array([78, 85, 92, 88, 79, 94, 87, 91, 83, 89])
# Measures of central tendency
mean_grade = np.mean(grades)
median_grade = np.median(grades)
print(f"Grades: {grades}")
print(f"Mean (average): {mean_grade:.1f}")
print(f"Median (middle): {median_grade:.1f}")
# Find mode (most frequent)
unique, counts = np.unique(grades, return_counts=True)
mode_idx = np.argmax(counts)
print(f"Most common: {unique[mode_idx]} (appears {counts[mode_idx]} times)")
Measures of Spread
import numpy as np
sales = np.array([1200, 1350, 1180, 1420, 1290, 1380])
# Spread measures
std_dev = np.std(sales)
variance = np.var(sales)
range_val = np.ptp(sales) # max - min
print(f"Sales: {sales}")
print(f"Standard deviation: {std_dev:.1f}")
print(f"Variance: {variance:.1f}")
print(f"Range: {range_val}")
print(f"Min/Max: {np.min(sales)}, {np.max(sales)}")
Percentiles and Quartiles
import numpy as np
response_times = np.array([120, 150, 180, 200, 230, 250, 280, 320, 350, 400])
# Quartile analysis
q1 = np.percentile(response_times, 25)
q2 = np.percentile(response_times, 50) # median
q3 = np.percentile(response_times, 75)
print(f"Response times: {response_times}")
print(f"Q1 (25th): {q1}")
print(f"Q2 (50th): {q2}")
print(f"Q3 (75th): {q3}")
print(f"IQR: {q3 - q1}")
📊 Multi-dimensional Statistics
Analyze statistics along specific axes of arrays.
By Rows and Columns
import numpy as np
# Student scores: rows=students, cols=subjects
scores = np.array([[85, 92, 78, 88], # Alice
[79, 85, 91, 82], # Bob
[94, 89, 96, 93], # Carol
[72, 78, 74, 76]]) # David
students = ['Alice', 'Bob', 'Carol', 'David']
subjects = ['Math', 'Science', 'English', 'History']
# Statistics by student (axis=1)
student_avgs = np.mean(scores, axis=1)
print(f"Student averages: {student_avgs.round(1)}")
# Statistics by subject (axis=0)
subject_avgs = np.mean(scores, axis=0)
print(f"Subject averages: {subject_avgs.round(1)}")
Finding Best Performers
import numpy as np
scores = np.array([[85, 92, 78, 88],
[79, 85, 91, 82],
[94, 89, 96, 93],
[72, 78, 74, 76]])
students = ['Alice', 'Bob', 'Carol', 'David']
# Best overall student
totals = np.sum(scores, axis=1)
best_idx = np.argmax(totals)
print(f"Top student: {students[best_idx]} ({totals[best_idx]} total)")
# Subject averages
subject_avgs = np.mean(scores, axis=0)
best_subject = np.argmax(subject_avgs)
subjects = ['Math', 'Science', 'English', 'History']
print(f"Best subject: {subjects[best_subject]} ({subject_avgs[best_subject]:.1f})")
🔗 Correlation Analysis
Measure relationships between variables.
Correlation Coefficient
import numpy as np
# Study hours vs test scores
study_hours = np.array([2, 4, 3, 6, 5, 8, 7, 9, 1, 10])
test_scores = np.array([65, 75, 70, 85, 80, 95, 90, 98, 60, 100])
# Calculate correlation
correlation = np.corrcoef(study_hours, test_scores)[0, 1]
print(f"Study hours: {study_hours}")
print(f"Test scores: {test_scores}")
print(f"Correlation: {correlation:.3f}")
if correlation > 0.7:
print("Strong positive correlation!")
elif correlation > 0.3:
print("Moderate positive correlation")
else:
print("Weak correlation")
Correlation Matrix
import numpy as np
# Multiple variables: height, weight, age
data = np.array([[170, 65, 25], # Person 1
[175, 70, 30], # Person 2
[165, 60, 22], # Person 3
[180, 80, 35], # Person 4
[160, 55, 20]]) # Person 5
# Correlation matrix
corr_matrix = np.corrcoef(data.T)
print(f"Correlation matrix:")
variables = ['Height', 'Weight', 'Age']
for i, var in enumerate(variables):
print(f"{var}: {corr_matrix[i].round(3)}")
📊 Distribution Analysis
Analyze how data is distributed.
Histogram Analysis
import numpy as np
exam_scores = np.array([65, 67, 70, 72, 75, 78, 80, 82, 85, 87,
89, 90, 92, 94, 95, 96, 98, 85, 88, 91])
# Create histogram
hist, bin_edges = np.histogram(exam_scores, bins=5)
print(f"Score distribution:")
for i in range(len(hist)):
range_str = f"{bin_edges[i]:.0f}-{bin_edges[i+1]:.0f}"
print(f"{range_str}: {hist[i]} students")
# Skewness indicator
skew = np.mean(exam_scores) - np.median(exam_scores)
print(f"Skewness indicator: {skew:.2f}")
Outlier Detection
import numpy as np
# Data with outliers
data = np.array([120, 115, 130, 125, 140, 135, 128, 2000, 122, 138])
# IQR method
q1 = np.percentile(data, 25)
q3 = np.percentile(data, 75)
iqr = q3 - q1
# Outlier boundaries
lower = q1 - 1.5 * iqr
upper = q3 + 1.5 * iqr
# Find outliers
outliers = (data < lower) | (data > upper)
print(f"Data: {data}")
print(f"Outlier bounds: [{lower:.1f}, {upper:.1f}]")
print(f"Outliers: {data[outliers]}")
🧮 Advanced Statistics
More sophisticated statistical operations.
Weighted Statistics
import numpy as np
# Grades with weights
grades = np.array([85, 92, 78, 96])
weights = np.array([0.2, 0.3, 0.3, 0.2]) # Quiz, Midterm, Final, Project
# Weighted average
weighted_avg = np.average(grades, weights=weights)
print(f"Grades: {grades}")
print(f"Weights: {weights}")
print(f"Weighted average: {weighted_avg:.1f}")
print(f"Regular average: {np.mean(grades):.1f}")
Cumulative Statistics
import numpy as np
daily_sales = np.array([120, 150, 130, 180, 160, 200, 175])
# Cumulative operations
cumsum = np.cumsum(daily_sales)
running_avg = cumsum / np.arange(1, len(daily_sales) + 1)
print(f"Daily sales: {daily_sales}")
print(f"Cumulative total: {cumsum}")
print(f"Running average: {running_avg.round(1)}")
# Growth rate
growth = np.diff(daily_sales) / daily_sales[:-1] * 100
print(f"Daily growth %: {growth.round(1)}")
🧠 Business Analytics Example
import numpy as np
# Quarterly revenue comparison
q1_revenue = np.array([50000, 45000, 60000, 55000, 48000])
q2_revenue = np.array([55000, 50000, 65000, 60000, 52000])
# Compare quarters
q1_mean, q1_std = np.mean(q1_revenue), np.std(q1_revenue)
q2_mean, q2_std = np.mean(q2_revenue), np.std(q2_revenue)
print(f"Q1 - Mean: ${q1_mean:,.0f}, Std: ${q1_std:,.0f}")
print(f"Q2 - Mean: ${q2_mean:,.0f}, Std: ${q2_std:,.0f}")
# Growth analysis
growth = ((q2_mean - q1_mean) / q1_mean) * 100
print(f"Quarter-over-quarter growth: {growth:.1f}%")
🎯 Key Takeaways
🚀 What's Next?
Master statistical analysis! Now explore linear algebra for matrix operations.
Continue to: Linear Algebra Operations
Was this helpful?
Track Your Learning Progress
Sign in to bookmark tutorials and keep track of your learning journey.
Your progress is saved automatically as you read.