1
0
Fork 0
mirror of https://github.com/qurator-spk/modstool.git synced 2025-08-16 13:09:53 +02:00

🚧 Restore types before saving as Parquet

This commit is contained in:
Mike Gerber 2025-06-04 21:10:10 +02:00
parent 14172e3b81
commit ebe988cfff
4 changed files with 26 additions and 21 deletions

View file

@ -355,3 +355,24 @@ def insert_into_db(con, table, d: Dict):
def insert_into_db_multiple(con, table, ld: List[Dict]):
for d in ld:
insert_into_db(con, table, d)
def convert_db_to_parquet(con, table, index_col, output_file):
df = pd.read_sql_query(f"SELECT * FROM {table}", con, index_col)
# Convert Python column type into Pandas type
for c in df.columns:
column_type = current_columns_types[table][c]
if column_type == "str":
continue
elif column_type == "int":
df[c] = df[c].astype("Int64")
elif column_type == "float64":
df[c] = df[c].astype("Float64")
elif column_type == "set":
# TODO WIP
continue
else:
raise NotImplementedError(f"Column type {column_type} not implemented yet.")
df.to_parquet(output_file)