✂️ Splitting and Joining Arrays
Splitting divides arrays into smaller parts. Joining combines multiple arrays into one. These operations are essential for data organization and processing workflows.
import numpy as np
# Basic splitting and joining
data = np.array([[1, 2, 3, 4],
[5, 6, 7, 8]])
# Split horizontally
left, right = np.hsplit(data, 2)
print(f"Left: \n{left}")
print(f"Right: \n{right}")
# Join back together
rejoined = np.hstack([left, right])
print(f"Rejoined: \n{rejoined}")
✂️ Splitting Arrays
Horizontal and Vertical Splits
import numpy as np
matrix = np.arange(24).reshape(4, 6)
print(f"Original: \n{matrix}")
# Split into 3 horizontal parts
h_splits = np.hsplit(matrix, 3)
print(f"Horizontal splits: {len(h_splits)} parts")
# Split into 2 vertical parts
v_splits = np.vsplit(matrix, 2)
print(f"Vertical splits: {len(v_splits)} parts")
Split at Specific Positions
import numpy as np
data = np.arange(20).reshape(4, 5)
# Split at specific column indices
parts = np.hsplit(data, [2, 4]) # Split at columns 2 and 4
print(f"Part 1: \n{parts[0]}")
print(f"Part 2: \n{parts[1]}")
print(f"Part 3: \n{parts[2]}")
🔗 Joining Arrays
Stack Horizontally and Vertically
import numpy as np
arr1 = np.array([[1, 2], [3, 4]])
arr2 = np.array([[5, 6], [7, 8]])
print(f"Array 1: \n{arr1}")
print(f"Array 2: \n{arr2}")
# Stack side by side
horizontal = np.hstack([arr1, arr2])
print(f"Horizontal stack: \n{horizontal}")
# Stack top to bottom
vertical = np.vstack([arr1, arr2])
print(f"Vertical stack: \n{vertical}")
Concatenate with Axis Control
import numpy as np
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])
# Concatenate along different axes
concat_rows = np.concatenate([a, b], axis=0) # Add rows
concat_cols = np.concatenate([a, b], axis=1) # Add columns
print(f"Concatenate rows: \n{concat_rows}")
print(f"Concatenate cols: \n{concat_cols}")
🎯 Practical Applications
Dataset Splitting for ML
import numpy as np
# Create sample dataset
X = np.random.rand(100, 5) # 100 samples, 5 features
y = np.random.randint(0, 2, 100) # Binary labels
# Split 80/20 for train/test
train_size = 80
X_train, X_test = np.split(X, [train_size])
y_train, y_test = np.split(y, [train_size])
print(f"Training: X={X_train.shape}, y={y_train.shape}")
print(f"Testing: X={X_test.shape}, y={y_test.shape}")
Combining Results
import numpy as np
# Predictions from different models
model1_pred = np.array([0.8, 0.3, 0.9, 0.2])
model2_pred = np.array([0.7, 0.4, 0.8, 0.3])
model3_pred = np.array([0.9, 0.2, 0.9, 0.1])
# Combine for ensemble
ensemble = np.column_stack([model1_pred, model2_pred, model3_pred])
ensemble_avg = ensemble.mean(axis=1)
print(f"Ensemble predictions: \n{ensemble}")
print(f"Average predictions: {ensemble_avg}")
Processing Data in Chunks
import numpy as np
# Large dataset processed in batches
large_data = np.random.rand(1000, 10)
# Split into batches of 200
batch_size = 200
num_batches = len(large_data) // batch_size
batches = np.split(large_data[:num_batches*batch_size], num_batches)
print(f"Split into {len(batches)} batches")
for i, batch in enumerate(batches[:2]): # Show first 2
print(f"Batch {i+1}: {batch.shape}")
🔧 Working with 1D Arrays
Combining Vectors
import numpy as np
vec1 = np.array([1, 2, 3])
vec2 = np.array([4, 5, 6])
vec3 = np.array([7, 8, 9])
# Simple concatenation
concatenated = np.concatenate([vec1, vec2, vec3])
print(f"Concatenated: {concatenated}")
# Stack as rows
stacked_rows = np.vstack([vec1, vec2, vec3])
print(f"Stacked rows: \n{stacked_rows}")
# Stack as columns
stacked_cols = np.column_stack([vec1, vec2, vec3])
print(f"Stacked columns: \n{stacked_cols}")
🧠 Practice Exercise
import numpy as np
# Sales data for 3 stores, 4 weeks each
store1 = np.random.randint(100, 300, 28).reshape(4, 7)
store2 = np.random.randint(80, 250, 28).reshape(4, 7)
store3 = np.random.randint(120, 350, 28).reshape(4, 7)
print(f"Store 1 weekly totals: {store1.sum(axis=1)}")
# Combine all stores
all_stores = np.stack([store1, store2, store3])
print(f"All stores shape: {all_stores.shape}")
# Calculate overall performance
total_sales = all_stores.sum(axis=(1, 2)) # Sum over weeks and days
print(f"Total sales per store: {total_sales}")
# Best performing week for each store
best_weeks = all_stores.sum(axis=2).argmax(axis=1)
print(f"Best week for each store: {best_weeks + 1}")
🎯 Key Takeaways
🚀 What's Next?
Now learn about views vs copies - crucial for understanding memory and performance!
Continue to: Copying and Views
Was this helpful?
Track Your Learning Progress
Sign in to bookmark tutorials and keep track of your learning journey.
Your progress is saved automatically as you read.