🎲 Random Number Generation

Random numbers are essential for testing, simulations, and data analysis! NumPy's random module provides powerful tools for generating controlled randomness, creating test datasets, and performing statistical sampling with reproducible results.

import numpy as np

# Set seed for reproducible results
np.random.seed(42)

# Generate different types of random data
random_integers = np.random.randint(1, 101, 5)    # Integers 1-100
random_floats = np.random.random(5)               # Floats 0-1
normal_data = np.random.normal(50, 10, 5)        # Normal distribution

print(f"Random integers: {random_integers}")
print(f"Random floats: {random_floats.round(3)}")
print(f"Normal distribution: {normal_data.round(1)}")

# Every time you run this with seed=42, you get the same results!

🎯 Setting Random Seeds

Control randomness for reproducible results.

Reproducible Random Numbers

import numpy as np

# Demonstrate reproducibility
print("First run with seed 123:")
np.random.seed(123)
data1 = np.random.random(5)
print(f"Random numbers: {data1.round(3)}")

print("\nSecond run with same seed:")
np.random.seed(123)
data2 = np.random.random(5)
print(f"Random numbers: {data2.round(3)}")

print(f"Are they identical? {np.array_equal(data1, data2)}")

# Different seed gives different results
print("\nThird run with different seed (456):")
np.random.seed(456)
data3 = np.random.random(5)
print(f"Random numbers: {data3.round(3)}")

When to Use Seeds

import numpy as np

# Use seeds for testing and debugging
def generate_test_data(seed=None):
    if seed is not None:
        np.random.seed(seed)
    
    return {
        'user_ids': np.random.randint(1000, 9999, 10),
        'scores': np.random.uniform(0, 100, 10),
        'categories': np.random.choice(['A', 'B', 'C'], 10)
    }

# Reproducible test data
test_data = generate_test_data(seed=42)
print("Test dataset (reproducible):")
print(f"User IDs: {test_data['user_ids'][:5]}")
print(f"Scores: {test_data['scores'][:5].round(1)}")
print(f"Categories: {test_data['categories'][:5]}")

# Production data (no seed - truly random)
prod_data = generate_test_data()  # No seed
print(f"\nProduction data (random): {prod_data['user_ids'][:3]}")

🔢 Basic Random Functions

Generate different types of random numbers.

Random Integers

import numpy as np

np.random.seed(42)

# Random integers in different ranges
dice_rolls = np.random.randint(1, 7, 10)        # Dice: 1-6
user_ratings = np.random.randint(1, 6, 8)       # Ratings: 1-5
lottery_numbers = np.random.randint(1, 50, 6)   # Lottery: 1-49

print(f"Dice rolls: {dice_rolls}")
print(f"User ratings: {user_ratings}")
print(f"Lottery numbers: {sorted(lottery_numbers)}")

# Generate user IDs
user_ids = np.random.randint(10000, 99999, 5)
print(f"User IDs: {user_ids}")

Random Floats

import numpy as np

np.random.seed(123)

# Random floats 0-1
probabilities = np.random.random(5)
print(f"Probabilities: {probabilities.round(3)}")

# Random floats in custom range
temperatures = np.random.uniform(15, 35, 7)     # Temperature 15-35°C
prices = np.random.uniform(10.99, 99.99, 5)     # Prices $10.99-$99.99

print(f"Temperatures: {temperatures.round(1)}°C")
print(f"Prices: ${prices.round(2)}")

# Multiple ranges
discounts = np.random.uniform(0.05, 0.25, 4)    # 5-25% discounts
print(f"Discounts: {(discounts * 100).round(1)}%")

Random Choice and Sampling

import numpy as np

np.random.seed(456)

# Choose from predefined options
colors = ['red', 'blue', 'green', 'yellow', 'purple']
random_colors = np.random.choice(colors, 8)
print(f"Random colors: {random_colors}")

# Sample without replacement
unique_colors = np.random.choice(colors, 3, replace=False)
print(f"Unique sample: {unique_colors}")

# Weighted choices
products = ['Basic', 'Premium', 'Enterprise']
weights = [0.6, 0.3, 0.1]  # Basic is most common
customer_plans = np.random.choice(products, 10, p=weights)
print(f"Customer plans: {customer_plans}")

# Count distribution
unique, counts = np.unique(customer_plans, return_counts=True)
for plan, count in zip(unique, counts):
    print(f"  {plan}: {count} customers")

📊 Probability Distributions

Generate data following statistical distributions.

Normal Distribution

import numpy as np

np.random.seed(789)

# Standard normal (mean=0, std=1)
standard_normal = np.random.standard_normal(5)
print(f"Standard normal: {standard_normal.round(2)}")

# Custom normal distribution
test_scores = np.random.normal(75, 10, 20)    # Mean=75, std=10
heights = np.random.normal(170, 15, 15)       # Mean=170cm, std=15

print(f"Test scores (mean=75): {test_scores.round(1)}")
print(f"Heights (mean=170cm): {heights.round(1)}")

# Analyze distribution
print(f"Actual test score mean: {np.mean(test_scores):.1f}")
print(f"Actual test score std: {np.std(test_scores):.1f}")

Other Useful Distributions

import numpy as np

np.random.seed(101)

# Exponential distribution (waiting times)
wait_times = np.random.exponential(5, 8)       # Average wait = 5 minutes
print(f"Wait times: {wait_times.round(1)} minutes")

# Binomial distribution (success/failure)
coin_flips = np.random.binomial(10, 0.5, 5)    # 10 flips, 50% probability
print(f"Heads in 10 flips: {coin_flips}")

# Poisson distribution (events per time period)
daily_emails = np.random.poisson(25, 7)        # Average 25 emails/day
print(f"Daily emails: {daily_emails}")

# Uniform distribution
random_angles = np.random.uniform(0, 360, 4)   # Random angles
print(f"Random angles: {random_angles.round(1)}°")

🧪 Data Simulation

Create realistic test datasets for analysis.

Customer Data Simulation

import numpy as np

np.random.seed(2023)

# Simulate customer database
n_customers = 100

# Generate correlated data (age affects income)
ages = np.random.normal(40, 15, n_customers)
ages = np.clip(ages, 18, 80).astype(int)       # Keep realistic

# Income correlates with age (peak around 45-50)
base_income = 30000 + (ages - 18) * 1000
income_noise = np.random.normal(0, 15000, n_customers)
incomes = np.maximum(base_income + income_noise, 20000)

# Purchase behavior correlates with income
purchase_prob = np.minimum(incomes / 100000, 0.8)  # Higher income = more likely to buy
purchases = np.random.binomial(1, purchase_prob, n_customers)

print(f"Customer Simulation ({n_customers} customers):")
print(f"Age range: {ages.min()}-{ages.max()} years")
print(f"Income range: ${incomes.min():,.0f}-${incomes.max():,.0f}")
print(f"Purchase rate: {np.mean(purchases):.1%}")

# Analyze by age groups
young = ages < 30
middle = (ages >= 30) & (ages < 50)
senior = ages >= 50

print(f"\nBy age group:")
print(f"Young (<30): {np.sum(young)} customers, {np.mean(purchases[young]):.1%} purchase rate")
print(f"Middle (30-49): {np.sum(middle)} customers, {np.mean(purchases[middle]):.1%} purchase rate")
print(f"Senior (50+): {np.sum(senior)} customers, {np.mean(purchases[senior]):.1%} purchase rate")

A/B Test Simulation

import numpy as np

np.random.seed(555)

# A/B test parameters
control_rate = 0.12        # 12% conversion rate
test_rate = 0.15           # 15% conversion rate (25% improvement)
sample_size = 1000

# Simulate test results
control_conversions = np.random.binomial(sample_size, control_rate)
test_conversions = np.random.binomial(sample_size, test_rate)

# Calculate metrics
control_percentage = control_conversions / sample_size
test_percentage = test_conversions / sample_size
improvement = (test_percentage - control_percentage) / control_percentage

print(f"🧪 A/B Test Simulation Results:")
print(f"Sample size: {sample_size} visitors per group")
print(f"Control group: {control_conversions} conversions ({control_percentage:.1%})")
print(f"Test group: {test_conversions} conversions ({test_percentage:.1%})")
print(f"Improvement: {improvement:.1%}")

# Statistical significance (simple check)
difference = test_conversions - control_conversions
if difference > 20:  # Simple threshold
    print("✅ Likely statistically significant!")
else:
    print("⚠️ May need larger sample size")

🎮 Practical Applications

Game Development

import numpy as np

np.random.seed(777)

# RPG character generation
def generate_character():
    # Ability scores (3d6 each)
    abilities = []
    for _ in range(6):  # Strength, Dex, Con, Int, Wis, Cha
        rolls = np.random.randint(1, 7, 3)  # Roll 3 dice
        ability_score = np.sum(rolls)
        abilities.append(ability_score)
    
    # Random class based on highest ability
    classes = ['Fighter', 'Rogue', 'Wizard', 'Cleric', 'Ranger', 'Bard']
    best_ability = np.argmax(abilities)
    character_class = classes[best_ability]
    
    return {
        'class': character_class,
        'abilities': abilities,
        'hp': np.random.randint(6, 15),  # Random HP
        'gold': np.random.randint(10, 100)  # Starting gold
    }

# Generate party of adventurers
party_size = 4
print(f"🗡️ Generated Party of {party_size}:")

ability_names = ['STR', 'DEX', 'CON', 'INT', 'WIS', 'CHA']
for i in range(party_size):
    char = generate_character()
    abilities_str = ', '.join([f"{name}:{score}" for name, score in zip(ability_names, char['abilities'])])
    print(f"  {char['class']}: HP={char['hp']}, Gold={char['gold']}, [{abilities_str}]")

Quality Assurance Testing

import numpy as np

np.random.seed(888)

# Generate test data for QA
def generate_test_scenarios(n_tests=20):
    scenarios = []
    
    for i in range(n_tests):
        scenario = {
            'test_id': f"TEST_{i+1:03d}",
            'user_type': np.random.choice(['guest', 'registered', 'premium'], 
                                        p=[0.3, 0.5, 0.2]),
            'device': np.random.choice(['mobile', 'desktop', 'tablet'], 
                                     p=[0.6, 0.3, 0.1]),
            'load_time': np.random.exponential(2.0),  # Average 2 seconds
            'success': np.random.choice([True, False], p=[0.85, 0.15])
        }
        scenarios.append(scenario)
    
    return scenarios

# Generate test suite
test_suite = generate_test_scenarios(15)

print("🧪 QA Test Scenarios Generated:")
success_count = 0
for scenario in test_suite[:8]:  # Show first 8
    status = "✅ PASS" if scenario['success'] else "❌ FAIL"
    print(f"  {scenario['test_id']}: {scenario['user_type']}/{scenario['device']} "
          f"({scenario['load_time']:.1f}s) {status}")
    if scenario['success']:
        success_count += 1

print(f"\nSummary: {success_count}/{len(test_suite[:8])} tests passed ({success_count/8:.1%})")

🎯 Key Takeaways

🚀 What's Next?

Master random number generation! Now learn to handle missing data with NaN values.

Continue to: Working with NaN Values

Was this helpful?

😔Poor
🙁Fair
😊Good
😄Great
🤩Excellent