✂️ Splitting and Joining Arrays

Splitting divides arrays into smaller parts. Joining combines multiple arrays into one. These operations are essential for data organization and processing workflows.

import numpy as np

# Basic splitting and joining
data = np.array([[1, 2, 3, 4],
                 [5, 6, 7, 8]])

# Split horizontally
left, right = np.hsplit(data, 2)
print(f"Left: \n{left}")
print(f"Right: \n{right}")

# Join back together
rejoined = np.hstack([left, right])
print(f"Rejoined: \n{rejoined}")

✂️ Splitting Arrays

Horizontal and Vertical Splits

import numpy as np

matrix = np.arange(24).reshape(4, 6)
print(f"Original: \n{matrix}")

# Split into 3 horizontal parts
h_splits = np.hsplit(matrix, 3)
print(f"Horizontal splits: {len(h_splits)} parts")

# Split into 2 vertical parts  
v_splits = np.vsplit(matrix, 2)
print(f"Vertical splits: {len(v_splits)} parts")

Split at Specific Positions

import numpy as np

data = np.arange(20).reshape(4, 5)

# Split at specific column indices
parts = np.hsplit(data, [2, 4])  # Split at columns 2 and 4
print(f"Part 1: \n{parts[0]}")
print(f"Part 2: \n{parts[1]}")
print(f"Part 3: \n{parts[2]}")

🔗 Joining Arrays

Stack Horizontally and Vertically

import numpy as np

arr1 = np.array([[1, 2], [3, 4]])
arr2 = np.array([[5, 6], [7, 8]])

print(f"Array 1: \n{arr1}")
print(f"Array 2: \n{arr2}")

# Stack side by side
horizontal = np.hstack([arr1, arr2])
print(f"Horizontal stack: \n{horizontal}")

# Stack top to bottom
vertical = np.vstack([arr1, arr2])
print(f"Vertical stack: \n{vertical}")

Concatenate with Axis Control

import numpy as np

a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])

# Concatenate along different axes
concat_rows = np.concatenate([a, b], axis=0)  # Add rows
concat_cols = np.concatenate([a, b], axis=1)  # Add columns

print(f"Concatenate rows: \n{concat_rows}")
print(f"Concatenate cols: \n{concat_cols}")

🎯 Practical Applications

Dataset Splitting for ML

import numpy as np

# Create sample dataset
X = np.random.rand(100, 5)  # 100 samples, 5 features
y = np.random.randint(0, 2, 100)  # Binary labels

# Split 80/20 for train/test
train_size = 80
X_train, X_test = np.split(X, [train_size])
y_train, y_test = np.split(y, [train_size])

print(f"Training: X={X_train.shape}, y={y_train.shape}")
print(f"Testing: X={X_test.shape}, y={y_test.shape}")

Combining Results

import numpy as np

# Predictions from different models
model1_pred = np.array([0.8, 0.3, 0.9, 0.2])
model2_pred = np.array([0.7, 0.4, 0.8, 0.3])
model3_pred = np.array([0.9, 0.2, 0.9, 0.1])

# Combine for ensemble
ensemble = np.column_stack([model1_pred, model2_pred, model3_pred])
ensemble_avg = ensemble.mean(axis=1)

print(f"Ensemble predictions: \n{ensemble}")
print(f"Average predictions: {ensemble_avg}")

Processing Data in Chunks

import numpy as np

# Large dataset processed in batches
large_data = np.random.rand(1000, 10)

# Split into batches of 200
batch_size = 200
num_batches = len(large_data) // batch_size
batches = np.split(large_data[:num_batches*batch_size], num_batches)

print(f"Split into {len(batches)} batches")
for i, batch in enumerate(batches[:2]):  # Show first 2
    print(f"Batch {i+1}: {batch.shape}")

🔧 Working with 1D Arrays

Combining Vectors

import numpy as np

vec1 = np.array([1, 2, 3])
vec2 = np.array([4, 5, 6])
vec3 = np.array([7, 8, 9])

# Simple concatenation
concatenated = np.concatenate([vec1, vec2, vec3])
print(f"Concatenated: {concatenated}")

# Stack as rows
stacked_rows = np.vstack([vec1, vec2, vec3])
print(f"Stacked rows: \n{stacked_rows}")

# Stack as columns
stacked_cols = np.column_stack([vec1, vec2, vec3])
print(f"Stacked columns: \n{stacked_cols}")

🧠 Practice Exercise

import numpy as np

# Sales data for 3 stores, 4 weeks each
store1 = np.random.randint(100, 300, 28).reshape(4, 7)
store2 = np.random.randint(80, 250, 28).reshape(4, 7)
store3 = np.random.randint(120, 350, 28).reshape(4, 7)

print(f"Store 1 weekly totals: {store1.sum(axis=1)}")

# Combine all stores
all_stores = np.stack([store1, store2, store3])
print(f"All stores shape: {all_stores.shape}")

# Calculate overall performance
total_sales = all_stores.sum(axis=(1, 2))  # Sum over weeks and days
print(f"Total sales per store: {total_sales}")

# Best performing week for each store
best_weeks = all_stores.sum(axis=2).argmax(axis=1)
print(f"Best week for each store: {best_weeks + 1}")

🎯 Key Takeaways

🚀 What's Next?

Now learn about views vs copies - crucial for understanding memory and performance!

Continue to: Copying and Views

Was this helpful?

😔Poor
🙁Fair
😊Good
😄Great
🤩Excellent