🔢 Data Types and Memory

Understanding data types is crucial for efficient NumPy programming! Data types determine how numbers are stored in memory, how much space they use, and what operations are possible. Choosing the right data type can dramatically improve performance and reduce memory usage.

Think of data types as different "containers" for numbers - some are small and efficient, others are large but more precise!

import numpy as np

# Different data types demonstration
int_array = np.array([1, 2, 3], dtype=np.int32)
float_array = np.array([1.0, 2.0, 3.0], dtype=np.float64)
bool_array = np.array([True, False, True], dtype=bool)

print(f"Integer array: {int_array}, dtype: {int_array.dtype}")
print(f"Float array: {float_array}, dtype: {float_array.dtype}")
print(f"Boolean array: {bool_array}, dtype: {bool_array.dtype}")

print(f"Memory usage:")
print(f"  int32: {int_array.nbytes} bytes")
print(f"  float64: {float_array.nbytes} bytes")
print(f"  bool: {bool_array.nbytes} bytes")

🎯 NumPy Data Type Categories

NumPy organizes data types into logical categories based on what kind of data they store:

🔍 Exploring Data Types

Let's see how different data types behave:

import numpy as np

# Same numbers, different types
numbers = [10, 20, 30, 40, 50]

int8_array = np.array(numbers, dtype=np.int8)
int32_array = np.array(numbers, dtype=np.int32)
int64_array = np.array(numbers, dtype=np.int64)
float32_array = np.array(numbers, dtype=np.float32)
float64_array = np.array(numbers, dtype=np.float64)

print(f"Same data, different types:")
print(f"int8:    {int8_array} | {int8_array.nbytes} bytes")
print(f"int32:   {int32_array} | {int32_array.nbytes} bytes")
print(f"int64:   {int64_array} | {int64_array.nbytes} bytes")
print(f"float32: {float32_array} | {float32_array.nbytes} bytes")
print(f"float64: {float64_array} | {float64_array.nbytes} bytes")

# Memory savings
print(f"\nMemory comparison (vs int64):")
print(f"int8 saves:  {int64_array.nbytes - int8_array.nbytes} bytes ({(1 - int8_array.nbytes/int64_array.nbytes)*100:.0f}%)")
print(f"int32 saves: {int64_array.nbytes - int32_array.nbytes} bytes ({(1 - int32_array.nbytes/int64_array.nbytes)*100:.0f}%)")

📊 Automatic Type Inference

NumPy automatically chooses data types based on your input:

import numpy as np

# NumPy's automatic type detection
integers = np.array([1, 2, 3])
floats = np.array([1.0, 2.0, 3.0])
mixed = np.array([1, 2.0, 3])  # Mixed types
booleans = np.array([True, False, True])
strings = np.array(['a', 'b', 'c'])

print(f"Integers: {integers}, dtype: {integers.dtype}")
print(f"Floats: {floats}, dtype: {floats.dtype}")
print(f"Mixed: {mixed}, dtype: {mixed.dtype}")  # Promoted to float!
print(f"Booleans: {booleans}, dtype: {booleans.dtype}")
print(f"Strings: {strings}, dtype: {strings.dtype}")

# Type promotion rules
print(f"\nType promotion examples:")
int_plus_float = integers + floats
print(f"int + float → {int_plus_float.dtype}")

bool_plus_int = booleans + integers  
print(f"bool + int → {bool_plus_int.dtype}")

💾 Memory Usage Optimization

Choosing the right data type can save significant memory, especially with large arrays:

import numpy as np

# Memory optimization example
large_size = 1000000  # 1 million elements

# Different integer types for the same data
default_array = np.arange(large_size)              # Default int64
int32_array = np.arange(large_size, dtype=np.int32)
int16_array = np.arange(large_size, dtype=np.int16)  # Will overflow!
uint8_array = np.arange(256, dtype=np.uint8)       # Small range

print(f"Memory usage for {large_size:,} elements:")
print(f"int64 (default): {default_array.nbytes / 1024 / 1024:.1f} MB")
print(f"int32:           {int32_array.nbytes / 1024 / 1024:.1f} MB")
print(f"int16:           {int16_array.nbytes / 1024 / 1024:.1f} MB")

print(f"\nSmall array (256 elements):")
print(f"uint8: {uint8_array.nbytes} bytes")
print(f"Range: {uint8_array.min()} to {uint8_array.max()}")

# Memory savings calculation
savings = (default_array.nbytes - int32_array.nbytes) / 1024 / 1024
print(f"\nMemory saved with int32: {savings:.1f} MB")

⚠️ Data Type Limitations and Overflow

Different data types have different ranges. Exceeding these ranges causes overflow:

import numpy as np

# Demonstrating data type limits
print("🚨 Data Type Limits and Overflow")
print("=" * 35)

# Integer overflow examples
small_int = np.array([100], dtype=np.int8)  # Range: -128 to 127
print(f"int8 value: {small_int[0]}")

# This will overflow!
small_int[0] = 200  # Exceeds 127
print(f"After overflow: {small_int[0]}")  # Wraps around!

# Safe range checking
print(f"int8 range: {np.iinfo(np.int8).min} to {np.iinfo(np.int8).max}")
print(f"int16 range: {np.iinfo(np.int16).min} to {np.iinfo(np.int16).max}")
print(f"int32 range: {np.iinfo(np.int32).min} to {np.iinfo(np.int32).max}")

# Float precision limits
float32_val = np.array([1.23456789], dtype=np.float32)
float64_val = np.array([1.23456789], dtype=np.float64)

print(f"float32 precision: {float32_val[0]}")
print(f"float64 precision: {float64_val[0]}")

🔄 Converting Between Data Types

You can convert arrays between different data types using astype():

import numpy as np

# Type conversion examples
float_data = np.array([1.7, 2.3, 3.9, 4.1])
print(f"Original floats: {float_data}")

# Convert to different types
as_int = float_data.astype(int)           # Truncates decimals
as_int32 = float_data.astype(np.int32)    # Specific int type
as_bool = float_data.astype(bool)         # Non-zero → True

print(f"As int: {as_int}")
print(f"As int32: {as_int32}")
print(f"As bool: {as_bool}")

# String conversions
numbers = np.array([1, 2, 3])
as_string = numbers.astype(str)
print(f"As strings: {as_string}")

# Back to numbers
string_numbers = np.array(['10', '20', '30'])
back_to_int = string_numbers.astype(int)
print(f"Back to int: {back_to_int}")

🎯 Choosing the Right Data Type

Here's a practical guide for selecting data types:

Use Case	Recommended Type	Reason
Image pixels	`uint8`	0-255 range, minimal memory
Small integers	`int16` or `int32`	Good range, half the memory of int64
Large datasets	`float32`	Half memory of float64, sufficient precision
Scientific computing	`float64`	Maximum precision for calculations
Boolean flags	`bool`	Minimal memory, clear intent
Counting/indices	`uint32`	Positive integers, good range

import numpy as np

# Practical data type selection
print("🎯 Practical Data Type Selection")
print("=" * 35)

# Example 1: Image processing
image_height, image_width = 1080, 1920
image_pixels = image_height * image_width

uint8_image = np.zeros((image_height, image_width, 3), dtype=np.uint8)
float64_image = np.zeros((image_height, image_width, 3), dtype=np.float64)

print(f"HD Image ({image_height}×{image_width}):")
print(f"  uint8:  {uint8_image.nbytes / 1024 / 1024:.1f} MB")
print(f"  float64: {float64_image.nbytes / 1024 / 1024:.1f} MB")
print(f"  Memory saved: {(float64_image.nbytes - uint8_image.nbytes) / 1024 / 1024:.1f} MB")

# Example 2: Scientific data
measurements = np.random.randn(10000)
float32_data = measurements.astype(np.float32)
float64_data = measurements.astype(np.float64)

print(f"\nScientific data (10,000 measurements):")
print(f"  float32: {float32_data.nbytes / 1024:.1f} KB")
print(f"  float64: {float64_data.nbytes / 1024:.1f} KB")
print(f"  Precision difference: {np.abs(float32_data - float64_data).max():.2e}")

🔍 Inspecting Data Type Information

NumPy provides tools to inspect data type properties:

import numpy as np

# Data type inspection tools
array = np.array([1, 2, 3], dtype=np.int32)

print(f"Array: {array}")
print(f"Data type: {array.dtype}")
print(f"Type name: {array.dtype.name}")
print(f"Bytes per element: {array.itemsize}")
print(f"Total bytes: {array.nbytes}")

# Get detailed type information
print(f"\nDetailed int32 info:")
int32_info = np.iinfo(np.int32)
print(f"  Minimum value: {int32_info.min:,}")
print(f"  Maximum value: {int32_info.max:,}")
print(f"  Bits: {int32_info.bits}")

print(f"\nDetailed float64 info:")
float64_info = np.finfo(np.float64)
print(f"  Precision: {float64_info.precision} decimal digits")
print(f"  Smallest positive: {float64_info.tiny}")
print(f"  Largest value: {float64_info.max}")

🎯 Key Takeaways

🚀 What's Next?

Excellent! Now you understand how NumPy stores different types of data efficiently. Next, let's explore array size and indexing - learning how to access and navigate through your arrays.

Continue to: Array Size and Indexing

Ready to access your data! 📊✨

Online Python

🔢 Data Types and Memory

Track Your Learning Progress