⚠️ Handling File Errors
File loading often goes wrong! Files might be missing, corrupted, or formatted differently than expected. Let's learn how to handle these errors gracefully and troubleshoot common problems.
🛡️ Basic Error Handling
Always wrap file loading in try/except blocks:
import pandas as pd
def load_file_safely(filename):
"""Load a file with basic error handling"""
try:
df = pd.read_csv(filename)
print(f"✅ Successfully loaded {filename}")
print(f" Shape: {df.shape}")
return df
except FileNotFoundError:
print(f"❌ File '{filename}' not found!")
return None
except Exception as e:
print(f"❌ Error loading {filename}: {e}")
return None
# Test with non-existent file
df = load_file_safely('nonexistent.csv')
# Test with real file (create one first)
sample_data = "name,age\nAlice,25\nBob,30"
with open('test.csv', 'w') as f:
f.write(sample_data)
df = load_file_safely('test.csv')
if df is not None:
print(df)
🔍 Diagnosing File Problems
Before loading, check if the file exists and what's inside:
import pandas as pd
import os
def diagnose_file(filename):
"""Check file before loading"""
print(f"🔍 Diagnosing {filename}:")
# Check if file exists
if not os.path.exists(filename):
print("❌ File does not exist")
return False
# Check file size
size = os.path.getsize(filename)
print(f"📏 File size: {size} bytes")
# Peek at first few lines
try:
with open(filename, 'r') as f:
lines = [f.readline().strip() for _ in range(3)]
print("👀 First 3 lines:")
for i, line in enumerate(lines, 1):
print(f" {i}: {line}")
return True
except Exception as e:
print(f"❌ Cannot read file: {e}")
return False
# Test diagnosis
sample_data = "name,age,city\nAlice,25,NYC\nBob,30,LA\nCharlie,35,Chicago"
with open('sample.csv', 'w') as f:
f.write(sample_data)
if diagnose_file('sample.csv'):
df = pd.read_csv('sample.csv')
print("\n📊 Loaded successfully:")
print(df)
🔧 Common Error Solutions
Problem 1: Wrong Separator
import pandas as pd
# Create file with semicolon separator
data = "name;age;salary\nAlice;25;50000\nBob;30;60000"
with open('semicolon.csv', 'w') as f:
f.write(data)
print("❌ Wrong separator:")
try:
df = pd.read_csv('semicolon.csv') # Assumes comma
print(df)
except:
print("Failed to parse correctly")
print("\n✅ Correct separator:")
df = pd.read_csv('semicolon.csv', sep=';')
print(df)
Problem 2: Encoding Issues
import pandas as pd
def try_encodings(filename):
"""Try different encodings"""
encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
for encoding in encodings:
try:
df = pd.read_csv(filename, encoding=encoding)
print(f"✅ Success with encoding: {encoding}")
return df
except UnicodeDecodeError:
print(f"❌ Failed with encoding: {encoding}")
except Exception as e:
print(f"❌ Other error with {encoding}: {e}")
print("❌ All encodings failed")
return None
# Test (this will likely work with utf-8)
sample_data = "name,city\nAlice,New York\nBob,São Paulo"
with open('encoding_test.csv', 'w', encoding='utf-8') as f:
f.write(sample_data)
df = try_encodings('encoding_test.csv')
if df is not None:
print(df)
Problem 3: Excel File Issues
import pandas as pd
def load_excel_safely(filename):
"""Load Excel with error handling"""
try:
# Try basic loading
df = pd.read_excel(filename)
print(f"✅ Loaded Excel file: {df.shape}")
return df
except FileNotFoundError:
print("❌ Excel file not found")
except ImportError:
print("❌ Missing openpyxl. Install with: pip install openpyxl")
except Exception as e:
print(f"❌ Excel error: {e}")
# Try with different parameters
try:
print("🔄 Trying with different options...")
df = pd.read_excel(filename, engine='openpyxl')
print(f"✅ Success with openpyxl engine: {df.shape}")
return df
except:
print("❌ Still failed")
return None
# Simulate Excel loading
print("Excel loading examples:")
print("Common solutions:")
print("1. pip install openpyxl")
print("2. df = pd.read_excel('file.xlsx', engine='openpyxl')")
print("3. df = pd.read_excel('file.xlsx', sheet_name='Sheet1')")
🛠️ Smart File Loader
Here's a comprehensive file loader that handles most problems:
import pandas as pd
import os
def smart_file_loader(filename, **kwargs):
"""Smart file loader with multiple fallback strategies"""
# Check if file exists
if not os.path.exists(filename):
print(f"❌ File '{filename}' not found")
return None
# Get file extension
_, ext = os.path.splitext(filename.lower())
# Try loading based on extension
try:
if ext == '.csv':
return try_csv_loading(filename, **kwargs)
elif ext in ['.xlsx', '.xls']:
return try_excel_loading(filename, **kwargs)
elif ext == '.json':
return try_json_loading(filename, **kwargs)
else:
print(f"❌ Unsupported file type: {ext}")
return None
except Exception as e:
print(f"❌ Failed to load {filename}: {e}")
return None
def try_csv_loading(filename, **kwargs):
"""Try CSV with different parameters"""
# Try basic loading first
try:
df = pd.read_csv(filename, **kwargs)
print(f"✅ CSV loaded: {df.shape}")
return df
except:
pass
# Try different separators
for sep in [';', '\t', '|']:
try:
df = pd.read_csv(filename, sep=sep, **kwargs)
print(f"✅ CSV loaded with separator '{sep}': {df.shape}")
return df
except:
continue
# Try different encodings
for encoding in ['latin-1', 'cp1252']:
try:
df = pd.read_csv(filename, encoding=encoding, **kwargs)
print(f"✅ CSV loaded with encoding '{encoding}': {df.shape}")
return df
except:
continue
print("❌ All CSV loading strategies failed")
return None
def try_excel_loading(filename, **kwargs):
"""Try Excel with different engines"""
engines = ['openpyxl', 'xlrd']
for engine in engines:
try:
df = pd.read_excel(filename, engine=engine, **kwargs)
print(f"✅ Excel loaded with engine '{engine}': {df.shape}")
return df
except:
continue
print("❌ All Excel loading strategies failed")
return None
def try_json_loading(filename, **kwargs):
"""Try JSON loading"""
try:
df = pd.read_json(filename, **kwargs)
print(f"✅ JSON loaded: {df.shape}")
return df
except:
print("❌ JSON loading failed")
return None
# Test the smart loader
sample_data = "name,age\nAlice,25\nBob,30"
with open('test_smart.csv', 'w') as f:
f.write(sample_data)
df = smart_file_loader('test_smart.csv')
if df is not None:
print(df)
📋 Error Troubleshooting Guide
Error Message | Likely Cause | Solution |
---|---|---|
FileNotFoundError | Wrong path or filename | Check file exists, verify path |
UnicodeDecodeError | Wrong encoding | Try encoding='latin-1' |
ParserError | Wrong separator | Try sep=';' or sep='\t' |
ImportError: openpyxl | Missing Excel library | pip install openpyxl |
PermissionError | File open elsewhere | Close Excel, check permissions |
EmptyDataError | File is empty | Check file has content |
🎯 Key Takeaways
🚀 What's Next?
Excellent! You now know how to handle file loading errors gracefully. Next, let's learn how to explore and understand your data once it's loaded.
Continue to: Viewing Your Data
You're becoming a data loading expert! 🛡️📊
Was this helpful?
Track Your Learning Progress
Sign in to bookmark tutorials and keep track of your learning journey.
Your progress is saved automatically as you read.