diff --git a/src/mods4pandas/lib.py b/src/mods4pandas/lib.py index 627d504..0567c3c 100644 --- a/src/mods4pandas/lib.py +++ b/src/mods4pandas/lib.py @@ -452,3 +452,12 @@ def convert_db_to_parquet(con, table, index_col, output_file): ) df.to_parquet(output_file) + + +def sqlite3_column_exists(con, table, col): + """Check if column col exists in table.""" + cur = con.execute( + "SELECT 1 FROM pragma_table_info(?) WHERE name = ? LIMIT 1", + (table, col) + ) + return cur.fetchone() is not None diff --git a/src/mods4pandas/mods4pandas.py b/src/mods4pandas/mods4pandas.py index e9c87b9..e357391 100755 --- a/src/mods4pandas/mods4pandas.py +++ b/src/mods4pandas/mods4pandas.py @@ -20,6 +20,7 @@ from .lib import ( insert_into_db_multiple, ns, sorted_groupby, + sqlite3_column_exists, ) @@ -643,17 +644,16 @@ def process(mets_files: list[str], output_file: str, output_page_info: str, mets logger.exception("Exception in {}".format(mets_file)) logger.info("Writing DataFrame to {}".format(output_file)) + considered_indexes = ("recordInfo_recordIdentifier", "recordIdentifier-zdb") success = False for considered_index in considered_indexes: - try: + if sqlite3_column_exists(con, "mods_info", considered_index): convert_db_to_parquet(con, "mods_info", considered_index, output_file) success = True break - except: - pass if not success: - raise ValueError(f"None of {considered_indexes} found") + raise ValueError(f"Can't set index, none of {considered_indexes} found") if output_page_info: logger.info("Writing DataFrame to {}".format(output_page_info))