🗄️ Database Export
Saving your analyzed data back to databases is essential for integrating with other systems! Whether you're updating a data warehouse, feeding results to a web application, or creating data pipelines, pandas makes database export straightforward and reliable.
Think of database export like delivering your analysis results directly to the systems that need them, creating a seamless data workflow.
import pandas as pd
import sqlite3
# Sample processed data ready for database
processed_data = pd.DataFrame({
'customer_id': [1001, 1002, 1003, 1004],
'total_orders': [8, 12, 5, 15],
'total_spent': [2450.75, 3890.25, 1205.50, 4560.00],
'customer_segment': ['Premium', 'Premium', 'Standard', 'Premium'],
'last_order_date': pd.to_datetime(['2023-03-15', '2023-03-20', '2023-03-10', '2023-03-25'])
})
print("Processed customer data:")
print(processed_data)
print()
# Create connection to SQLite database (file-based, no server needed)
conn = sqlite3.connect('customer_analysis.db')
# Export DataFrame to database table
processed_data.to_sql('customer_summary', conn, if_exists='replace', index=False)
print("✅ Data exported to database table: customer_summary")
# Verify the export by reading back
verification = pd.read_sql('SELECT * FROM customer_summary', conn)
print("\nVerification - data in database:")
print(verification)
# Close connection
conn.close()
🎯 Why Export to Databases?
Database export enables powerful data integration:
🔧 Database Connection Basics
Understanding how to connect to different database types:
💾 SQLite Export (Beginner-Friendly)
SQLite is perfect for learning database operations:
import pandas as pd
import sqlite3
# Sales analysis results
sales_analysis = pd.DataFrame({
'product_category': ['Electronics', 'Books', 'Clothing', 'Home'],
'total_revenue': [125000, 45000, 78000, 52000],
'total_units': [450, 890, 620, 340],
'avg_price': [277.78, 50.56, 125.81, 152.94],
'profit_margin': [0.22, 0.35, 0.28, 0.31]
})
print("Sales analysis results:")
print(sales_analysis)
print()
# Connect to SQLite database
conn = sqlite3.connect('sales_analysis.db')
# Export to database table
sales_analysis.to_sql('product_performance',
conn,
if_exists='replace', # Replace table if exists
index=False) # Don't include pandas index
print("✅ Exported to database table: product_performance")
# Query the data back to verify
query = "SELECT product_category, total_revenue FROM product_performance ORDER BY total_revenue DESC"
top_categories = pd.read_sql(query, conn)
print("\nTop performing categories (from database):")
print(top_categories)
# Close connection
conn.close()
Multiple Tables Export
import pandas as pd
import sqlite3
# Create multiple related datasets
customers = pd.DataFrame({
'customer_id': [1, 2, 3, 4],
'name': ['Alice', 'Bob', 'Charlie', 'Diana'],
'segment': ['Premium', 'Standard', 'Standard', 'Premium']
})
orders = pd.DataFrame({
'order_id': [101, 102, 103, 104, 105],
'customer_id': [1, 2, 1, 3, 4],
'amount': [299.99, 149.50, 89.99, 199.99, 449.99],
'order_date': pd.to_datetime(['2023-01-15', '2023-01-20', '2023-02-01', '2023-02-05', '2023-02-10'])
})
print("Customers table:")
print(customers)
print("\nOrders table:")
print(orders)
# Connect and export multiple tables
conn = sqlite3.connect('business_data.db')
# Export both tables
customers.to_sql('customers', conn, if_exists='replace', index=False)
orders.to_sql('orders', conn, if_exists='replace', index=False)
print("\n✅ Exported multiple tables to database")
# Query with JOIN to verify relationships
query = """
SELECT c.name, c.segment, COUNT(o.order_id) as order_count, SUM(o.amount) as total_spent
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.name, c.segment
ORDER BY total_spent DESC
"""
customer_summary = pd.read_sql(query, conn)
print("\nCustomer summary (joined from multiple tables):")
print(customer_summary)
conn.close()
🌐 Connection String Patterns
For production databases, you'll use connection strings:
Using SQLAlchemy (Production Pattern)
import pandas as pd
import sqlite3
# For production, you would use: from sqlalchemy import create_engine
# Simulate production data export workflow
monthly_report = pd.DataFrame({
'report_month': ['2023-01', '2023-02', '2023-03'],
'total_sales': [125000, 135000, 145000],
'new_customers': [45, 52, 38],
'customer_retention': [0.92, 0.88, 0.90],
'avg_order_value': [285.50, 295.75, 310.25]
})
print("Monthly report data:")
print(monthly_report)
print()
# Using SQLite for this example (in production, use SQLAlchemy)
conn = sqlite3.connect('reporting.db')
# Export with data type specification
monthly_report.to_sql('monthly_reports',
conn,
if_exists='append', # Add to existing data
index=False,
dtype={
'report_month': 'TEXT',
'total_sales': 'INTEGER',
'new_customers': 'INTEGER',
'customer_retention': 'REAL',
'avg_order_value': 'REAL'
})
print("✅ Exported with specified data types")
# Verify the data types
schema_query = "PRAGMA table_info(monthly_reports)"
schema_info = pd.read_sql(schema_query, conn)
print("\nTable schema:")
print(schema_info[['name', 'type']])
conn.close()
🔄 Update Strategies
Different approaches for updating database data:
import pandas as pd
import sqlite3
# Initial data
initial_inventory = pd.DataFrame({
'product_id': [1, 2, 3],
'product_name': ['Laptop', 'Mouse', 'Keyboard'],
'current_stock': [15, 50, 25],
'last_updated': pd.to_datetime(['2023-03-01', '2023-03-01', '2023-03-01'])
})
# New inventory update
inventory_update = pd.DataFrame({
'product_id': [2, 3, 4], # Product 2 & 3 updates, Product 4 is new
'product_name': ['Mouse', 'Keyboard', 'Monitor'],
'current_stock': [45, 30, 20], # Updated quantities
'last_updated': pd.to_datetime(['2023-03-15', '2023-03-15', '2023-03-15'])
})
print("Initial inventory:")
print(initial_inventory)
print("\nInventory update:")
print(inventory_update)
# Set up database
conn = sqlite3.connect('inventory.db')
# Load initial data
initial_inventory.to_sql('inventory', conn, if_exists='replace', index=False)
print("\n✅ Loaded initial inventory data")
# Append new data (this will add Product 4 and duplicate updates for 2&3)
inventory_update.to_sql('inventory', conn, if_exists='append', index=False)
print("✅ Appended inventory updates")
# Check the result
current_inventory = pd.read_sql('SELECT * FROM inventory ORDER BY product_id', conn)
print("\nCurrent inventory in database:")
print(current_inventory)
print("\n⚠️ Note: Products 2 and 3 now have duplicate entries")
print("In production, you'd use upsert operations to handle this properly")
conn.close()
📊 Real-World Database Export Example
Complete workflow for business intelligence export:
import pandas as pd
import sqlite3
# Comprehensive business intelligence data
customer_metrics = pd.DataFrame({
'customer_id': [1001, 1002, 1003, 1004, 1005],
'acquisition_date': pd.to_datetime(['2022-01-15', '2022-03-20', '2022-06-10', '2022-08-05', '2022-11-12']),
'total_orders': [15, 8, 22, 12, 18],
'total_revenue': [4500.75, 2390.50, 6780.25, 3450.00, 5220.80],
'avg_order_value': [300.05, 298.81, 308.19, 287.50, 290.04],
'last_order_date': pd.to_datetime(['2023-03-20', '2023-02-15', '2023-03-25', '2023-03-10', '2023-03-22']),
'customer_lifetime_days': [430, 360, 288, 217, 130],
'predicted_churn_risk': [0.15, 0.35, 0.08, 0.25, 0.12]
})
product_performance = pd.DataFrame({
'product_id': [101, 102, 103, 104, 105],
'product_name': ['Laptop Pro', 'Wireless Mouse', 'Mechanical Keyboard', '4K Monitor', 'Webcam HD'],
'category': ['Computers', 'Accessories', 'Accessories', 'Monitors', 'Accessories'],
'units_sold_ytd': [145, 320, 180, 95, 240],
'revenue_ytd': [217500, 16000, 22500, 47500, 14400],
'profit_margin': [0.22, 0.35, 0.28, 0.25, 0.40],
'inventory_turnover': [4.2, 8.5, 6.1, 3.8, 9.2]
})
print("Customer metrics:")
print(customer_metrics.head())
print("\nProduct performance:")
print(product_performance.head())
# Connect to business intelligence database
conn = sqlite3.connect('business_intelligence.db')
# Export comprehensive business data
customer_metrics.to_sql('customer_analytics',
conn,
if_exists='replace',
index=False)
product_performance.to_sql('product_analytics',
conn,
if_exists='replace',
index=False)
print("\n✅ Exported comprehensive business intelligence data")
# Create a business summary view with SQL
summary_query = """
SELECT
'Customer Metrics' as data_type,
COUNT(*) as record_count,
AVG(total_revenue) as avg_value
FROM customer_analytics
UNION ALL
SELECT
'Product Performance' as data_type,
COUNT(*) as record_count,
AVG(revenue_ytd) as avg_value
FROM product_analytics
"""
summary_stats = pd.read_sql(summary_query, conn)
print("\nBusiness intelligence summary:")
print(summary_stats)
# Export summary back to database for dashboards
summary_stats.to_sql('bi_summary', conn, if_exists='replace', index=False)
print("\n✅ Created summary table for dashboard consumption")
conn.close()
🛡️ Security and Best Practices
Essential practices for production database exports:
🎯 Key Takeaways
🎉 Congratulations!
You've completed the pandas tutorial series! You now have the skills to:
- ✅ Load data from various sources
- ✅ Clean and transform your data
- ✅ Perform powerful analysis and grouping
- ✅ Work with dates and time series
- ✅ Export results to multiple formats
What's Next?
- Practice with real datasets
- Explore advanced pandas features
- Learn data visualization with matplotlib/seaborn
- Build data pipelines and automation
Keep analyzing data! 🐼📊🎯
Was this helpful?
Track Your Learning Progress
Sign in to bookmark tutorials and keep track of your learning journey.
Your progress is saved automatically as you read.