@@ -46,8 +46,9 @@ def _fetch_data(outfile):
46
46
def _transform_to_csv (infile , outfile ):
47
47
pathlib .Path ("/tmp/data/stg" ).mkdir (parents = True , exist_ok = True )
48
48
data = pd .read_json (infile )
49
- data = data .set_index ("date_of_interest" )
50
- data .to_csv (outfile )
49
+ data ['date_of_interest' ] = data ['date_of_interest' ]\
50
+ .apply (lambda date : date .strip ('T00:00:00.000' ))
51
+ data .to_csv (outfile , index = False , header = False , sep = "\t " , encoding = "utf-8" )
51
52
logging .info (f"INFO: Processed { infile } and moved it to { outfile } " )
52
53
53
54
@@ -66,7 +67,7 @@ def _transform_to_csv(infile, outfile):
66
67
dag = dag ,
67
68
op_kwargs = {
68
69
"infile" : "/tmp/data/raw/covid_data_{{ ds }}.json" ,
69
- "outfile" : "/tmp/data/raw /covid_data_{{ ds }}.csv" ,
70
+ "outfile" : "/tmp/data/stg /covid_data_{{ ds }}.csv" ,
70
71
},
71
72
)
72
73
@@ -83,53 +84,17 @@ def _transform_to_csv(infile, outfile):
83
84
create_covid_data_table = PostgresOperator (
84
85
task_id = "create_table_covid" ,
85
86
postgres_conn_id = "covid_postgres" ,
86
- sql = "sql/create_table .sql" ,
87
+ sql = "sql/create_test .sql" ,
87
88
dag = dag ,
88
89
)
89
90
90
91
load_csv_to_postgres_dwh = LoadCsvtoPostgresOperator (
91
92
task_id = 'load_to_covid_data_table' ,
92
93
postgres_conn_id = "covid_postgres" ,
93
- table = "covid_data " ,
94
+ table = "covid_table " ,
94
95
file_path = "/tmp/data/stg/covid_data_{{ ds }}.csv" ,
95
96
dag = dag ,
96
97
)
97
98
98
99
# fetch_data >> transform_to_csv >> normalize_covid_csv >> create_covid_data_table>> load_csv_to_postgres_dwh
99
-
100
- def _test_transform (infile , outfile ):
101
- pathlib .Path ("/tmp/data/stg" ).mkdir (parents = True , exist_ok = True )
102
- data = pd .read_json (infile )
103
- data = data .set_index ("date_of_interest" )
104
- data = data [['date_of_interest' , 'case_count' ]]
105
- data .to_csv (outfile )
106
- logging .info (f"INFO: Processed { infile } and moved it to { outfile } " )
107
-
108
- test_transform = PythonOperator (
109
- task_id = "test_transform" ,
110
- python_callable = _test_transform ,
111
- dag = dag ,
112
- op_kwargs = {
113
- "infile" : "/tmp/data/raw/covid_data_{{ ds }}.json" ,
114
- "outfile" : "/tmp/data/raw/covid_data_{{ ds }}.csv" ,
115
- },
116
-
117
- )
118
-
119
- create_covid_test_table = PostgresOperator (
120
- task_id = "create_covid_test_table" ,
121
- postgres_conn_id = "covid_postgres" ,
122
- sql = "sql/create_test.sql" ,
123
- dag = dag
124
- )
125
-
126
-
127
- test_data_load = LoadCsvtoPostgresOperator (
128
- task_id = 'test_data_load' ,
129
- postgres_conn_id = "covid_postgres" ,
130
- table = "covid_test" ,
131
- file_path = "/tmp/data/stg/covid_data_{{ ds }}.csv" ,
132
- dag = dag ,
133
- )
134
-
135
- fetch_data >> test_transform >> normalize_covid_csv >> create_covid_test_table >> test_data_load
100
+ fetch_data >> transform_to_csv >> create_covid_data_table >> load_csv_to_postgres_dwh
0 commit comments