-
Notifications
You must be signed in to change notification settings - Fork 41
Expand file tree
/
Copy pathmain.py
More file actions
61 lines (46 loc) · 1.81 KB
/
main.py
File metadata and controls
61 lines (46 loc) · 1.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python3
"""
AirLife ETL Pipeline - Simple Version
This script runs the complete ETL pipeline:
1. Extract airport data from CSV and flight data from API
2. Clean and transform the data
3. Load the data into PostgreSQL database
Run with: python main.py
"""
from src.extract_data import extract_airports, extract_flights
from src.transform_data import clean_airports, clean_flights, combine_data
from src.load_data import load_to_database, verify_data
def main():
"""Run the complete ETL pipeline"""
print("🛫 Starting AirLife ETL Pipeline...")
print("=" * 50)
# Step 1: Extract data
print("\n=== EXTRACTION ===")
print("📥 Extracting data from sources...")
# TODO: Call the extraction functions
# airports = extract_airports()
# flights = extract_flights()
# Uncomment the lines above once you've implemented the functions
print("⚠️ Extraction functions not yet implemented")
return
# Step 2: Transform data
print("\n=== TRANSFORMATION ===")
print("🔄 Cleaning and transforming data...")
# TODO: Call the transformation functions
# clean_airports_data = clean_airports(airports)
# clean_flights_data = clean_flights(flights)
# final_airports, final_flights = combine_data(clean_airports_data, clean_flights_data)
# Step 3: Load data
print("\n=== LOADING ===")
print("💾 Loading data to database...")
# TODO: Call the loading function
# load_to_database(final_airports, final_flights)
# Step 4: Verify everything worked
print("\n=== VERIFICATION ===")
print("✅ Verifying data was loaded correctly...")
# TODO: Call the verification function
# verify_data()
print("\n🎉 ETL Pipeline completed!")
print("=" * 50)
if __name__ == "__main__":
main()