🔢 Data Types and Memory
Understanding data types is crucial for efficient NumPy programming! Data types determine how numbers are stored in memory, how much space they use, and what operations are possible. Choosing the right data type can dramatically improve performance and reduce memory usage.
Think of data types as different "containers" for numbers - some are small and efficient, others are large but more precise!
import numpy as np
# Different data types demonstration
int_array = np.array([1, 2, 3], dtype=np.int32)
float_array = np.array([1.0, 2.0, 3.0], dtype=np.float64)
bool_array = np.array([True, False, True], dtype=bool)
print(f"Integer array: {int_array}, dtype: {int_array.dtype}")
print(f"Float array: {float_array}, dtype: {float_array.dtype}")
print(f"Boolean array: {bool_array}, dtype: {bool_array.dtype}")
print(f"Memory usage:")
print(f" int32: {int_array.nbytes} bytes")
print(f" float64: {float_array.nbytes} bytes")
print(f" bool: {bool_array.nbytes} bytes")
🎯 NumPy Data Type Categories
NumPy organizes data types into logical categories based on what kind of data they store:
🔍 Exploring Data Types
Let's see how different data types behave:
import numpy as np
# Same numbers, different types
numbers = [10, 20, 30, 40, 50]
int8_array = np.array(numbers, dtype=np.int8)
int32_array = np.array(numbers, dtype=np.int32)
int64_array = np.array(numbers, dtype=np.int64)
float32_array = np.array(numbers, dtype=np.float32)
float64_array = np.array(numbers, dtype=np.float64)
print(f"Same data, different types:")
print(f"int8: {int8_array} | {int8_array.nbytes} bytes")
print(f"int32: {int32_array} | {int32_array.nbytes} bytes")
print(f"int64: {int64_array} | {int64_array.nbytes} bytes")
print(f"float32: {float32_array} | {float32_array.nbytes} bytes")
print(f"float64: {float64_array} | {float64_array.nbytes} bytes")
# Memory savings
print(f"\nMemory comparison (vs int64):")
print(f"int8 saves: {int64_array.nbytes - int8_array.nbytes} bytes ({(1 - int8_array.nbytes/int64_array.nbytes)*100:.0f}%)")
print(f"int32 saves: {int64_array.nbytes - int32_array.nbytes} bytes ({(1 - int32_array.nbytes/int64_array.nbytes)*100:.0f}%)")
📊 Automatic Type Inference
NumPy automatically chooses data types based on your input:
import numpy as np
# NumPy's automatic type detection
integers = np.array([1, 2, 3])
floats = np.array([1.0, 2.0, 3.0])
mixed = np.array([1, 2.0, 3]) # Mixed types
booleans = np.array([True, False, True])
strings = np.array(['a', 'b', 'c'])
print(f"Integers: {integers}, dtype: {integers.dtype}")
print(f"Floats: {floats}, dtype: {floats.dtype}")
print(f"Mixed: {mixed}, dtype: {mixed.dtype}") # Promoted to float!
print(f"Booleans: {booleans}, dtype: {booleans.dtype}")
print(f"Strings: {strings}, dtype: {strings.dtype}")
# Type promotion rules
print(f"\nType promotion examples:")
int_plus_float = integers + floats
print(f"int + float → {int_plus_float.dtype}")
bool_plus_int = booleans + integers
print(f"bool + int → {bool_plus_int.dtype}")
💾 Memory Usage Optimization
Choosing the right data type can save significant memory, especially with large arrays:
import numpy as np
# Memory optimization example
large_size = 1000000 # 1 million elements
# Different integer types for the same data
default_array = np.arange(large_size) # Default int64
int32_array = np.arange(large_size, dtype=np.int32)
int16_array = np.arange(large_size, dtype=np.int16) # Will overflow!
uint8_array = np.arange(256, dtype=np.uint8) # Small range
print(f"Memory usage for {large_size:,} elements:")
print(f"int64 (default): {default_array.nbytes / 1024 / 1024:.1f} MB")
print(f"int32: {int32_array.nbytes / 1024 / 1024:.1f} MB")
print(f"int16: {int16_array.nbytes / 1024 / 1024:.1f} MB")
print(f"\nSmall array (256 elements):")
print(f"uint8: {uint8_array.nbytes} bytes")
print(f"Range: {uint8_array.min()} to {uint8_array.max()}")
# Memory savings calculation
savings = (default_array.nbytes - int32_array.nbytes) / 1024 / 1024
print(f"\nMemory saved with int32: {savings:.1f} MB")
⚠️ Data Type Limitations and Overflow
Different data types have different ranges. Exceeding these ranges causes overflow:
import numpy as np
# Demonstrating data type limits
print("🚨 Data Type Limits and Overflow")
print("=" * 35)
# Integer overflow examples
small_int = np.array([100], dtype=np.int8) # Range: -128 to 127
print(f"int8 value: {small_int[0]}")
# This will overflow!
small_int[0] = 200 # Exceeds 127
print(f"After overflow: {small_int[0]}") # Wraps around!
# Safe range checking
print(f"int8 range: {np.iinfo(np.int8).min} to {np.iinfo(np.int8).max}")
print(f"int16 range: {np.iinfo(np.int16).min} to {np.iinfo(np.int16).max}")
print(f"int32 range: {np.iinfo(np.int32).min} to {np.iinfo(np.int32).max}")
# Float precision limits
float32_val = np.array([1.23456789], dtype=np.float32)
float64_val = np.array([1.23456789], dtype=np.float64)
print(f"float32 precision: {float32_val[0]}")
print(f"float64 precision: {float64_val[0]}")
🔄 Converting Between Data Types
You can convert arrays between different data types using astype()
:
import numpy as np
# Type conversion examples
float_data = np.array([1.7, 2.3, 3.9, 4.1])
print(f"Original floats: {float_data}")
# Convert to different types
as_int = float_data.astype(int) # Truncates decimals
as_int32 = float_data.astype(np.int32) # Specific int type
as_bool = float_data.astype(bool) # Non-zero → True
print(f"As int: {as_int}")
print(f"As int32: {as_int32}")
print(f"As bool: {as_bool}")
# String conversions
numbers = np.array([1, 2, 3])
as_string = numbers.astype(str)
print(f"As strings: {as_string}")
# Back to numbers
string_numbers = np.array(['10', '20', '30'])
back_to_int = string_numbers.astype(int)
print(f"Back to int: {back_to_int}")
🎯 Choosing the Right Data Type
Here's a practical guide for selecting data types:
Use Case | Recommended Type | Reason |
---|---|---|
Image pixels | uint8 | 0-255 range, minimal memory |
Small integers | int16 or int32 | Good range, half the memory of int64 |
Large datasets | float32 | Half memory of float64, sufficient precision |
Scientific computing | float64 | Maximum precision for calculations |
Boolean flags | bool | Minimal memory, clear intent |
Counting/indices | uint32 | Positive integers, good range |
import numpy as np
# Practical data type selection
print("🎯 Practical Data Type Selection")
print("=" * 35)
# Example 1: Image processing
image_height, image_width = 1080, 1920
image_pixels = image_height * image_width
uint8_image = np.zeros((image_height, image_width, 3), dtype=np.uint8)
float64_image = np.zeros((image_height, image_width, 3), dtype=np.float64)
print(f"HD Image ({image_height}×{image_width}):")
print(f" uint8: {uint8_image.nbytes / 1024 / 1024:.1f} MB")
print(f" float64: {float64_image.nbytes / 1024 / 1024:.1f} MB")
print(f" Memory saved: {(float64_image.nbytes - uint8_image.nbytes) / 1024 / 1024:.1f} MB")
# Example 2: Scientific data
measurements = np.random.randn(10000)
float32_data = measurements.astype(np.float32)
float64_data = measurements.astype(np.float64)
print(f"\nScientific data (10,000 measurements):")
print(f" float32: {float32_data.nbytes / 1024:.1f} KB")
print(f" float64: {float64_data.nbytes / 1024:.1f} KB")
print(f" Precision difference: {np.abs(float32_data - float64_data).max():.2e}")
🔍 Inspecting Data Type Information
NumPy provides tools to inspect data type properties:
import numpy as np
# Data type inspection tools
array = np.array([1, 2, 3], dtype=np.int32)
print(f"Array: {array}")
print(f"Data type: {array.dtype}")
print(f"Type name: {array.dtype.name}")
print(f"Bytes per element: {array.itemsize}")
print(f"Total bytes: {array.nbytes}")
# Get detailed type information
print(f"\nDetailed int32 info:")
int32_info = np.iinfo(np.int32)
print(f" Minimum value: {int32_info.min:,}")
print(f" Maximum value: {int32_info.max:,}")
print(f" Bits: {int32_info.bits}")
print(f"\nDetailed float64 info:")
float64_info = np.finfo(np.float64)
print(f" Precision: {float64_info.precision} decimal digits")
print(f" Smallest positive: {float64_info.tiny}")
print(f" Largest value: {float64_info.max}")
🎯 Key Takeaways
🚀 What's Next?
Excellent! Now you understand how NumPy stores different types of data efficiently. Next, let's explore array size and indexing - learning how to access and navigate through your arrays.
Continue to: Array Size and Indexing
Ready to access your data! 📊✨
Was this helpful?
Track Your Learning Progress
Sign in to bookmark tutorials and keep track of your learning journey.
Your progress is saved automatically as you read.