mirror of
https://github.com/qurator-spk/modstool.git
synced 2025-06-09 19:59:57 +02:00
🚧 Write out page_info
This commit is contained in:
parent
e51fa5750f
commit
c5332ae80d
2 changed files with 15 additions and 2 deletions
|
@ -300,7 +300,7 @@ def flatten(d: MutableMapping, parent='', separator='_'):
|
||||||
return dict(items)
|
return dict(items)
|
||||||
|
|
||||||
|
|
||||||
def dicts_to_df(data_list: List[Dict], *, index_column: str) -> pd.DataFrame:
|
def dicts_to_df(data_list: List[Dict], *, index_column) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
Convert the given list of dicts to a Pandas DataFrame.
|
Convert the given list of dicts to a Pandas DataFrame.
|
||||||
|
|
||||||
|
@ -318,7 +318,13 @@ def dicts_to_df(data_list: List[Dict], *, index_column: str) -> pd.DataFrame:
|
||||||
data = [[m.get(c) for c in columns] for m in data_list]
|
data = [[m.get(c) for c in columns] for m in data_list]
|
||||||
|
|
||||||
# Build index
|
# Build index
|
||||||
index = [m[index_column] for m in data_list]
|
if isinstance(index_column, str):
|
||||||
|
index = [m[index_column] for m in data_list]
|
||||||
|
elif isinstance(index_column, tuple):
|
||||||
|
index = [[m[c] for m in data_list] for c in index_column]
|
||||||
|
index = pd.MultiIndex.from_arrays(index, names=index_column)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"index_column must")
|
||||||
|
|
||||||
df = pd.DataFrame(data=data, index=index, columns=columns)
|
df = pd.DataFrame(data=data, index=index, columns=columns)
|
||||||
return df
|
return df
|
||||||
|
|
|
@ -415,6 +415,13 @@ def process(mets_files: List[str], output_file: str, output_csv: str, output_xls
|
||||||
logger.info('Writing Excel .xlsx to {}'.format(output_xlsx))
|
logger.info('Writing Excel .xlsx to {}'.format(output_xlsx))
|
||||||
mods_info_df.to_excel(output_xlsx)
|
mods_info_df.to_excel(output_xlsx)
|
||||||
|
|
||||||
|
# Convert page_info
|
||||||
|
# XXX hardcoded filenames + other formats
|
||||||
|
page_info_df = dicts_to_df(page_info, index_column=("ppn", "ID"))
|
||||||
|
# Pickle the DataFrame
|
||||||
|
logger.info('Writing DataFrame to {}'.format("page_info_df.pkl"))
|
||||||
|
page_info_df.to_pickle("page_info_df.pkl")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue