⚠️ Handling File Errors

File loading often goes wrong! Files might be missing, corrupted, or formatted differently than expected. Let's learn how to handle these errors gracefully and troubleshoot common problems.

🛡️ Basic Error Handling

Always wrap file loading in try/except blocks:

import pandas as pd

def load_file_safely(filename):
    """Load a file with basic error handling"""
    try:
        df = pd.read_csv(filename)
        print(f"✅ Successfully loaded {filename}")
        print(f"   Shape: {df.shape}")
        return df
    
    except FileNotFoundError:
        print(f"❌ File '{filename}' not found!")
        return None
    
    except Exception as e:
        print(f"❌ Error loading {filename}: {e}")
        return None

# Test with non-existent file
df = load_file_safely('nonexistent.csv')

# Test with real file (create one first)
sample_data = "name,age\nAlice,25\nBob,30"
with open('test.csv', 'w') as f:
    f.write(sample_data)

df = load_file_safely('test.csv')
if df is not None:
    print(df)

🔍 Diagnosing File Problems

Before loading, check if the file exists and what's inside:

import pandas as pd
import os

def diagnose_file(filename):
    """Check file before loading"""
    print(f"🔍 Diagnosing {filename}:")
    
    # Check if file exists
    if not os.path.exists(filename):
        print("❌ File does not exist")
        return False
    
    # Check file size
    size = os.path.getsize(filename)
    print(f"📏 File size: {size} bytes")
    
    # Peek at first few lines
    try:
        with open(filename, 'r') as f:
            lines = [f.readline().strip() for _ in range(3)]
        print("👀 First 3 lines:")
        for i, line in enumerate(lines, 1):
            print(f"   {i}: {line}")
        return True
    except Exception as e:
        print(f"❌ Cannot read file: {e}")
        return False

# Test diagnosis
sample_data = "name,age,city\nAlice,25,NYC\nBob,30,LA\nCharlie,35,Chicago"
with open('sample.csv', 'w') as f:
    f.write(sample_data)

if diagnose_file('sample.csv'):
    df = pd.read_csv('sample.csv')
    print("\n📊 Loaded successfully:")
    print(df)

🔧 Common Error Solutions

Problem 1: Wrong Separator

import pandas as pd

# Create file with semicolon separator
data = "name;age;salary\nAlice;25;50000\nBob;30;60000"
with open('semicolon.csv', 'w') as f:
    f.write(data)

print("❌ Wrong separator:")
try:
    df = pd.read_csv('semicolon.csv')  # Assumes comma
    print(df)
except:
    print("Failed to parse correctly")

print("\n✅ Correct separator:")
df = pd.read_csv('semicolon.csv', sep=';')
print(df)

Problem 2: Encoding Issues

import pandas as pd

def try_encodings(filename):
    """Try different encodings"""
    encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
    
    for encoding in encodings:
        try:
            df = pd.read_csv(filename, encoding=encoding)
            print(f"✅ Success with encoding: {encoding}")
            return df
        except UnicodeDecodeError:
            print(f"❌ Failed with encoding: {encoding}")
        except Exception as e:
            print(f"❌ Other error with {encoding}: {e}")
    
    print("❌ All encodings failed")
    return None

# Test (this will likely work with utf-8)
sample_data = "name,city\nAlice,New York\nBob,São Paulo"
with open('encoding_test.csv', 'w', encoding='utf-8') as f:
    f.write(sample_data)

df = try_encodings('encoding_test.csv')
if df is not None:
    print(df)

Problem 3: Excel File Issues

import pandas as pd

def load_excel_safely(filename):
    """Load Excel with error handling"""
    try:
        # Try basic loading
        df = pd.read_excel(filename)
        print(f"✅ Loaded Excel file: {df.shape}")
        return df
        
    except FileNotFoundError:
        print("❌ Excel file not found")
        
    except ImportError:
        print("❌ Missing openpyxl. Install with: pip install openpyxl")
        
    except Exception as e:
        print(f"❌ Excel error: {e}")
        
        # Try with different parameters
        try:
            print("🔄 Trying with different options...")
            df = pd.read_excel(filename, engine='openpyxl')
            print(f"✅ Success with openpyxl engine: {df.shape}")
            return df
        except:
            print("❌ Still failed")
    
    return None

# Simulate Excel loading
print("Excel loading examples:")
print("Common solutions:")
print("1. pip install openpyxl")
print("2. df = pd.read_excel('file.xlsx', engine='openpyxl')")
print("3. df = pd.read_excel('file.xlsx', sheet_name='Sheet1')")

🛠️ Smart File Loader

Here's a comprehensive file loader that handles most problems:

import pandas as pd
import os

def smart_file_loader(filename, **kwargs):
    """Smart file loader with multiple fallback strategies"""
    
    # Check if file exists
    if not os.path.exists(filename):
        print(f"❌ File '{filename}' not found")
        return None
    
    # Get file extension
    _, ext = os.path.splitext(filename.lower())
    
    # Try loading based on extension
    try:
        if ext == '.csv':
            return try_csv_loading(filename, **kwargs)
        elif ext in ['.xlsx', '.xls']:
            return try_excel_loading(filename, **kwargs)
        elif ext == '.json':
            return try_json_loading(filename, **kwargs)
        else:
            print(f"❌ Unsupported file type: {ext}")
            return None
            
    except Exception as e:
        print(f"❌ Failed to load {filename}: {e}")
        return None

def try_csv_loading(filename, **kwargs):
    """Try CSV with different parameters"""
    
    # Try basic loading first
    try:
        df = pd.read_csv(filename, **kwargs)
        print(f"✅ CSV loaded: {df.shape}")
        return df
    except:
        pass
    
    # Try different separators
    for sep in [';', '\t', '|']:
        try:
            df = pd.read_csv(filename, sep=sep, **kwargs)
            print(f"✅ CSV loaded with separator '{sep}': {df.shape}")
            return df
        except:
            continue
    
    # Try different encodings
    for encoding in ['latin-1', 'cp1252']:
        try:
            df = pd.read_csv(filename, encoding=encoding, **kwargs)
            print(f"✅ CSV loaded with encoding '{encoding}': {df.shape}")
            return df
        except:
            continue
    
    print("❌ All CSV loading strategies failed")
    return None

def try_excel_loading(filename, **kwargs):
    """Try Excel with different engines"""
    engines = ['openpyxl', 'xlrd']
    
    for engine in engines:
        try:
            df = pd.read_excel(filename, engine=engine, **kwargs)
            print(f"✅ Excel loaded with engine '{engine}': {df.shape}")
            return df
        except:
            continue
    
    print("❌ All Excel loading strategies failed")
    return None

def try_json_loading(filename, **kwargs):
    """Try JSON loading"""
    try:
        df = pd.read_json(filename, **kwargs)
        print(f"✅ JSON loaded: {df.shape}")
        return df
    except:
        print("❌ JSON loading failed")
        return None

# Test the smart loader
sample_data = "name,age\nAlice,25\nBob,30"
with open('test_smart.csv', 'w') as f:
    f.write(sample_data)

df = smart_file_loader('test_smart.csv')
if df is not None:
    print(df)

📋 Error Troubleshooting Guide

Error MessageLikely CauseSolution
FileNotFoundErrorWrong path or filenameCheck file exists, verify path
UnicodeDecodeErrorWrong encodingTry encoding='latin-1'
ParserErrorWrong separatorTry sep=';' or sep='\t'
ImportError: openpyxlMissing Excel librarypip install openpyxl
PermissionErrorFile open elsewhereClose Excel, check permissions
EmptyDataErrorFile is emptyCheck file has content

🎯 Key Takeaways

🚀 What's Next?

Excellent! You now know how to handle file loading errors gracefully. Next, let's learn how to explore and understand your data once it's loaded.

Continue to: Viewing Your Data

You're becoming a data loading expert! 🛡️📊

Was this helpful?

😔Poor
🙁Fair
😊Good
😄Great
🤩Excellent