💾 Saving and Loading Arrays
Mastering file operations is essential for real-world data workflows! Learn to save analysis results, share datasets, and build robust data pipelines using NumPy's comprehensive file I/O capabilities.
import numpy as np
# Sample analysis data to save
sales_data = np.array([[1200, 1350, 1180, 1420],
[1290, 1380, 1250, 1500],
[1100, 1200, 1150, 1320]])
regions = ['North', 'South', 'East']
months = ['Jan', 'Feb', 'Mar', 'Apr']
print(f"Sales data shape: {sales_data.shape}")
print(f"Total sales: ${np.sum(sales_data):,}")
print(f"Average monthly sales: ${np.mean(sales_data):,.0f}")
# This data is ready to be saved in various formats!
💾 Binary Format (.npy)
Save and load single arrays efficiently with perfect precision.
Basic Save and Load
import numpy as np
import tempfile
import os
# Create sample data
monthly_revenue = np.array([45000, 52000, 48000, 61000, 58000, 55000])
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']
print(f"Monthly revenue: {monthly_revenue}")
print(f"Data type: {monthly_revenue.dtype}")
# Save to binary file
# Note: In real usage, you'd use actual file paths
temp_dir = tempfile.gettempdir()
file_path = os.path.join(temp_dir, 'monthly_revenue.npy')
# Save array
np.save(file_path, monthly_revenue)
print(f"✅ Saved to: {file_path}")
# Load array back
loaded_revenue = np.load(file_path)
print(f"📂 Loaded: {loaded_revenue}")
# Verify data integrity
print(f"Data identical: {np.array_equal(monthly_revenue, loaded_revenue)}")
print(f"Data type preserved: {loaded_revenue.dtype}")
# Clean up
os.remove(file_path)
Preserving Data Types
import numpy as np
import tempfile
import os
# Different data types
customer_ids = np.array([1001, 1002, 1003, 1004], dtype=np.int32)
ratings = np.array([4.5, 3.8, 4.9, 4.1], dtype=np.float32)
is_premium = np.array([True, False, True, False], dtype=bool)
print(f"Original data types:")
print(f"Customer IDs: {customer_ids.dtype}")
print(f"Ratings: {ratings.dtype}")
print(f"Premium status: {is_premium.dtype}")
temp_dir = tempfile.gettempdir()
# Save each array
np.save(os.path.join(temp_dir, 'customer_ids.npy'), customer_ids)
np.save(os.path.join(temp_dir, 'ratings.npy'), ratings)
np.save(os.path.join(temp_dir, 'is_premium.npy'), is_premium)
# Load back
loaded_ids = np.load(os.path.join(temp_dir, 'customer_ids.npy'))
loaded_ratings = np.load(os.path.join(temp_dir, 'ratings.npy'))
loaded_premium = np.load(os.path.join(temp_dir, 'is_premium.npy'))
print(f"\nLoaded data types:")
print(f"Customer IDs: {loaded_ids.dtype}")
print(f"Ratings: {loaded_ratings.dtype}")
print(f"Premium status: {loaded_premium.dtype}")
# Clean up
for file in ['customer_ids.npy', 'ratings.npy', 'is_premium.npy']:
os.remove(os.path.join(temp_dir, file))
📦 Archive Format (.npz)
Save multiple related arrays in a single file.
Multiple Arrays
import numpy as np
import tempfile
import os
# Related analysis data
customer_analysis = {
'customer_ids': np.array([1001, 1002, 1003, 1004, 1005]),
'ages': np.array([25, 34, 28, 45, 31]),
'purchase_amounts': np.array([250.50, 180.25, 420.75, 95.00, 310.50]),
'satisfaction_scores': np.array([4.5, 3.8, 4.9, 3.2, 4.1]),
'last_purchase_days': np.array([5, 12, 3, 45, 8])
}
print(f"Customer analysis contains {len(customer_analysis)} arrays:")
for name, data in customer_analysis.items():
print(f" {name}: {data.shape} {data.dtype}")
temp_dir = tempfile.gettempdir()
file_path = os.path.join(temp_dir, 'customer_analysis.npz')
# Save all arrays in one file
np.savez(file_path, **customer_analysis)
print(f"\n✅ Saved all arrays to: customer_analysis.npz")
# Load the archive
loaded_data = np.load(file_path)
print(f"\n📂 Available arrays: {list(loaded_data.keys())}")
# Access individual arrays
print(f"Customer IDs: {loaded_data['customer_ids']}")
print(f"Average age: {np.mean(loaded_data['ages']):.1f}")
print(f"Total purchases: ${np.sum(loaded_data['purchase_amounts']):.2f}")
# Close and clean up
loaded_data.close()
os.remove(file_path)
Named vs Positional Arguments
import numpy as np
import tempfile
import os
# Sample data
sales_q1 = np.array([12000, 13500, 11800])
sales_q2 = np.array([14200, 12900, 15100])
sales_q3 = np.array([13800, 14500, 13200])
temp_dir = tempfile.gettempdir()
# Method 1: Named arguments (recommended)
file_path1 = os.path.join(temp_dir, 'sales_named.npz')
np.savez(file_path1, q1=sales_q1, q2=sales_q2, q3=sales_q3)
# Method 2: Positional arguments
file_path2 = os.path.join(temp_dir, 'sales_positional.npz')
np.savez(file_path2, sales_q1, sales_q2, sales_q3)
# Load named version
named_data = np.load(file_path1)
print(f"Named arrays: {list(named_data.keys())}")
print(f"Q1 sales: {named_data['q1']}")
# Load positional version
pos_data = np.load(file_path2)
print(f"\nPositional arrays: {list(pos_data.keys())}")
print(f"First array: {pos_data['arr_0']}") # Auto-named arr_0, arr_1, etc.
# Clean up
named_data.close()
pos_data.close()
os.remove(file_path1)
os.remove(file_path2)
Compressed Archives
import numpy as np
import tempfile
import os
# Large dataset for compression demo
large_data = {
'sensor_readings': np.random.random((1000, 50)), # 50,000 numbers
'timestamps': np.arange(1000),
'sensor_ids': np.random.randint(100, 999, 50),
'calibration_factors': np.ones(50) + np.random.random(50) * 0.1
}
print(f"Large dataset:")
total_elements = sum(arr.size for arr in large_data.values())
print(f"Total elements: {total_elements:,}")
temp_dir = tempfile.gettempdir()
# Save uncompressed
uncompressed_path = os.path.join(temp_dir, 'sensor_data.npz')
np.savez(uncompressed_path, **large_data)
# Save compressed
compressed_path = os.path.join(temp_dir, 'sensor_data_compressed.npz')
np.savez_compressed(compressed_path, **large_data)
# Compare file sizes
uncompressed_size = os.path.getsize(uncompressed_path)
compressed_size = os.path.getsize(compressed_path)
print(f"\n📁 File Size Comparison:")
print(f"Uncompressed: {uncompressed_size / 1024:.1f} KB")
print(f"Compressed: {compressed_size / 1024:.1f} KB")
print(f"Space savings: {(1 - compressed_size/uncompressed_size)*100:.1f}%")
# Load compressed data (same interface)
loaded_compressed = np.load(compressed_path)
print(f"\nCompressed data loaded successfully")
print(f"Available arrays: {list(loaded_compressed.keys())}")
# Clean up
loaded_compressed.close()
os.remove(uncompressed_path)
os.remove(compressed_path)
📄 Text Formats
Save human-readable files for sharing and compatibility.
Basic Text Files
import numpy as np
import tempfile
import os
# Simple 2D data
product_sales = np.array([[120, 135, 145],
[98, 112, 125],
[156, 167, 175]])
regions = ['North', 'South', 'East']
quarters = ['Q1', 'Q2', 'Q3']
print(f"Product sales by region and quarter:")
print(product_sales)
temp_dir = tempfile.gettempdir()
text_path = os.path.join(temp_dir, 'product_sales.txt')
# Save as text (space-separated by default)
np.savetxt(text_path, product_sales, fmt='%d')
print(f"\n✅ Saved as text file")
# Load back
loaded_sales = np.loadtxt(text_path)
print(f"📂 Loaded from text:")
print(loaded_sales)
# Check if data matches (converted to float)
print(f"Data matches: {np.allclose(product_sales, loaded_sales)}")
print(f"Note: Loaded as {loaded_sales.dtype} (text files default to float)")
os.remove(text_path)
CSV Format
import numpy as np
import tempfile
import os
# Customer data suitable for CSV
customer_data = np.array([[1001, 25, 4.5, 250.50],
[1002, 34, 3.8, 180.25],
[1003, 28, 4.9, 420.75],
[1004, 45, 3.2, 95.00]])
columns = ['CustomerID', 'Age', 'Rating', 'Purchase']
print(f"Customer data for CSV export:")
print(f"Columns: {columns}")
print(customer_data)
temp_dir = tempfile.gettempdir()
csv_path = os.path.join(temp_dir, 'customers.csv')
# Save as CSV
np.savetxt(csv_path, customer_data,
delimiter=',',
fmt=['%d', '%d', '%.1f', '%.2f'], # Different formats per column
header='CustomerID,Age,Rating,Purchase',
comments='') # Remove # from header
print(f"\n✅ Saved as CSV file")
# Read the text file to show format
with open(csv_path, 'r') as f:
content = f.read()
print(f"📄 CSV content:")
print(content)
# Load back
loaded_data = np.loadtxt(csv_path, delimiter=',', skiprows=1)
print(f"📂 Loaded data:")
print(loaded_data)
os.remove(csv_path)
Custom Formatting
import numpy as np
import tempfile
import os
# Financial data requiring specific formatting
financial_data = np.array([[1200.456, 0.12345, 45],
[1350.789, 0.15678, 52],
[1180.123, 0.08901, 38]])
print(f"Financial data (raw):")
print(financial_data)
temp_dir = tempfile.gettempdir()
formatted_path = os.path.join(temp_dir, 'financial_report.txt')
# Save with custom formatting
np.savetxt(formatted_path, financial_data,
fmt=['$%.2f', '%6.2f%%', '%3d'], # Currency, percentage, integer
delimiter='\t', # Tab separated
header='Revenue\tGrowth\tEmployees',
comments='# ')
print(f"\n✅ Saved with custom formatting")
# Show formatted content
with open(formatted_path, 'r') as f:
content = f.read()
print(f"📄 Formatted content:")
print(content)
# Load with custom format
loaded_financial = np.loadtxt(formatted_path, skiprows=1)
print(f"📂 Loaded data:")
print(loaded_financial)
os.remove(formatted_path)
🔄 Real-World Workflows
Practical examples of file operations in data workflows.
Analysis Pipeline
import numpy as np
import tempfile
import os
print("🔄 Data Analysis Pipeline Example")
# Step 1: Raw data processing
np.random.seed(42)
raw_sensor_data = np.random.normal(25, 3, (24, 10)) # 24 hours, 10 sensors
timestamps = np.arange(24)
print(f"Step 1: Processing raw data")
print(f"Raw data shape: {raw_sensor_data.shape}")
# Step 2: Data cleaning and analysis
valid_readings = (raw_sensor_data > 15) & (raw_sensor_data < 35)
hourly_averages = np.mean(raw_sensor_data, axis=1)
daily_trend = np.gradient(hourly_averages)
quality_score = np.mean(valid_readings)
print(f"Step 2: Analysis complete")
print(f"Data quality: {quality_score:.1%}")
print(f"Temperature trend: {np.mean(daily_trend):.3f}°C/hour")
# Step 3: Save intermediate results
temp_dir = tempfile.gettempdir()
pipeline_file = os.path.join(temp_dir, 'sensor_analysis.npz')
analysis_results = {
'raw_data': raw_sensor_data,
'hourly_averages': hourly_averages,
'daily_trend': daily_trend,
'quality_score': np.array([quality_score]),
'timestamps': timestamps,
'metadata': np.array(['2024-01-15', '10_sensors', 'calibrated'], dtype='U20')
}
np.savez_compressed(pipeline_file, **analysis_results)
print(f"Step 3: Saved analysis results")
# Step 4: Load for reporting (simulate new session)
print(f"\nStep 4: Generate reports (new session)")
loaded_analysis = np.load(pipeline_file)
print(f"📊 Analysis Summary:")
print(f"Data quality: {loaded_analysis['quality_score'][0]:.1%}")
print(f"Average temperature: {np.mean(loaded_analysis['hourly_averages']):.1f}°C")
print(f"Temperature range: {np.min(loaded_analysis['hourly_averages']):.1f} - {np.max(loaded_analysis['hourly_averages']):.1f}°C")
# Export summary for sharing
summary_path = os.path.join(temp_dir, 'temperature_summary.csv')
summary_data = np.column_stack([
loaded_analysis['timestamps'],
loaded_analysis['hourly_averages'],
loaded_analysis['daily_trend']
])
np.savetxt(summary_path, summary_data,
delimiter=',',
fmt=['%d', '%.2f', '%.4f'],
header='Hour,Temperature,Trend',
comments='')
print(f"📤 Exported summary for sharing: temperature_summary.csv")
# Clean up
loaded_analysis.close()
os.remove(pipeline_file)
os.remove(summary_path)
Data Backup and Versioning
import numpy as np
import tempfile
import os
from datetime import datetime
print("🗂️ Data Backup and Versioning Example")
# Simulate evolving analysis
def create_analysis_v1():
return {
'sales_data': np.array([1200, 1350, 1180, 1420]),
'growth_rates': np.array([0.12, 0.15, -0.05, 0.20]),
'version': np.array(['1.0'], dtype='U10'),
'created_date': np.array(['2024-01-15'], dtype='U10')
}
def create_analysis_v2():
return {
'sales_data': np.array([1200, 1350, 1180, 1420, 1290, 1380]),
'growth_rates': np.array([0.12, 0.15, -0.05, 0.20, -0.09, 0.07]),
'forecast': np.array([1450, 1500, 1520]), # New feature
'confidence_intervals': np.array([50, 60, 70]), # New feature
'version': np.array(['2.0'], dtype='U10'),
'created_date': np.array(['2024-01-22'], dtype='U10')
}
temp_dir = tempfile.gettempdir()
# Save version 1
analysis_v1 = create_analysis_v1()
v1_file = os.path.join(temp_dir, 'sales_analysis_v1.0.npz')
np.savez_compressed(v1_file, **analysis_v1)
print(f"✅ Saved version 1.0")
# Save version 2
analysis_v2 = create_analysis_v2()
v2_file = os.path.join(temp_dir, 'sales_analysis_v2.0.npz')
np.savez_compressed(v2_file, **analysis_v2)
print(f"✅ Saved version 2.0")
# Load and compare versions
v1_data = np.load(v1_file)
v2_data = np.load(v2_file)
print(f"\n📊 Version Comparison:")
print(f"V1 data points: {len(v1_data['sales_data'])}")
print(f"V2 data points: {len(v2_data['sales_data'])}")
print(f"V1 features: {list(v1_data.keys())}")
print(f"V2 features: {list(v2_data.keys())}")
# Show evolution
common_data = v1_data['sales_data']
v1_total = np.sum(common_data)
v2_total = np.sum(v2_data['sales_data'][:len(common_data)])
print(f"\nAnalysis Evolution:")
print(f"V1 total sales: ${v1_total:,}")
print(f"V2 total sales (same period): ${v2_total:,}")
print(f"Data consistency: {np.array_equal(common_data, v2_data['sales_data'][:len(common_data)])}")
if 'forecast' in v2_data:
print(f"V2 adds forecasting: {v2_data['forecast']}")
# Clean up
v1_data.close()
v2_data.close()
os.remove(v1_file)
os.remove(v2_file)
🛠️ Best Practices
File Organization
import numpy as np
import tempfile
import os
print("📁 File Organization Best Practices")
# Organize by analysis type and date
temp_dir = tempfile.gettempdir()
project_dir = os.path.join(temp_dir, 'customer_analysis_2024')
# Simulate directory structure
file_structure = {
'raw_data': 'customer_raw_data_2024_01_15.npz',
'processed': 'customer_processed_2024_01_15.npz',
'results': 'customer_analysis_results_2024_01_15.npz',
'reports': 'customer_summary_2024_01_15.csv'
}
print(f"Recommended file structure:")
for category, filename in file_structure.items():
print(f" {category}/: {filename}")
# Example data
sample_data = {
'customer_ids': np.array([1001, 1002, 1003]),
'scores': np.array([8.5, 7.2, 9.1]),
'analysis_date': np.array(['2024-01-15'], dtype='U10')
}
# Save with descriptive names
descriptive_filename = os.path.join(temp_dir, 'customer_segmentation_analysis_v2.1_2024-01-15.npz')
np.savez_compressed(descriptive_filename, **sample_data)
print(f"\n✅ Saved with descriptive name:")
print(f" {os.path.basename(descriptive_filename)}")
print(f" Contains: project_type_version_date")
os.remove(descriptive_filename)
Error Handling
import numpy as np
import tempfile
import os
print("⚠️ Error Handling Best Practices")
def safe_save_analysis(data_dict, filename):
"""Safely save analysis with backup and validation."""
try:
# Validate data before saving
for name, array in data_dict.items():
if not isinstance(array, np.ndarray):
raise ValueError(f"{name} is not a NumPy array")
if array.size == 0:
raise ValueError(f"{name} is empty")
# Create backup if file exists
if os.path.exists(filename):
backup_name = filename.replace('.npz', '_backup.npz')
if os.path.exists(backup_name):
os.remove(backup_name)
os.rename(filename, backup_name)
print(f"📦 Created backup: {os.path.basename(backup_name)}")
# Save new data
np.savez_compressed(filename, **data_dict)
print(f"✅ Successfully saved: {os.path.basename(filename)}")
return True
except Exception as e:
print(f"❌ Save failed: {e}")
return False
def safe_load_analysis(filename):
"""Safely load analysis with error handling."""
try:
if not os.path.exists(filename):
raise FileNotFoundError(f"File not found: {filename}")
data = np.load(filename)
print(f"✅ Successfully loaded: {os.path.basename(filename)}")
print(f" Contains: {list(data.keys())}")
return data
except Exception as e:
print(f"❌ Load failed: {e}")
return None
# Test error handling
temp_dir = tempfile.gettempdir()
test_file = os.path.join(temp_dir, 'test_analysis.npz')
# Valid data
valid_data = {
'sales': np.array([1200, 1350, 1180]),
'dates': np.array(['2024-01', '2024-02', '2024-03'], dtype='U10')
}
# Test saving
success = safe_save_analysis(valid_data, test_file)
if success:
# Test loading
loaded = safe_load_analysis(test_file)
if loaded:
loaded.close()
# Test with invalid data
invalid_data = {
'empty_array': np.array([]), # This will cause an error
'valid_data': np.array([1, 2, 3])
}
print(f"\nTesting with invalid data:")
safe_save_analysis(invalid_data, test_file)
# Clean up
if os.path.exists(test_file):
os.remove(test_file)
🎯 Key Takeaways
🎉 Congratulations!
You've mastered NumPy file operations! You can now:
- Save and load arrays in multiple formats
- Build robust data processing pipelines
- Share analysis results effectively
- Implement proper data backup strategies
🚀 What's Next?
You've completed the entire NumPy tutorial series! 🎉
You've mastered:
- ✅ Array creation and manipulation
- ✅ Mathematical operations and functions
- ✅ Indexing and data selection
- ✅ Aggregation and analysis
- ✅ Advanced features and optimization
- ✅ File operations and data persistence
Ready for more? Explore pandas for data analysis, matplotlib for visualization, or dive into machine learning with scikit-learn!
Congratulations on becoming a NumPy expert! 🏆
Was this helpful?
Track Your Learning Progress
Sign in to bookmark tutorials and keep track of your learning journey.
Your progress is saved automatically as you read.