diff --git a/src/mods4pandas/alto4pandas.py b/src/mods4pandas/alto4pandas.py index 2ab7728..34373c7 100755 --- a/src/mods4pandas/alto4pandas.py +++ b/src/mods4pandas/alto4pandas.py @@ -3,6 +3,7 @@ import contextlib import csv import os import sqlite3 +import sys import warnings from operator import attrgetter from typing import List @@ -19,6 +20,7 @@ from .lib import ( insert_into_db, ns, sorted_groupby, + sqlite3_table_exists, ) @@ -239,6 +241,11 @@ def process(alto_files: List[str], output_file: str): traceback.print_exc() + # Check if table exists + if not sqlite3_table_exists(con, "alto_info"): + logger.error("Table alto_info does not exist, empty input?") + sys.exit(1) + # Convert the alto_info SQL to a pandas DataFrame logger.info("Writing DataFrame to {}".format(output_file)) convert_db_to_parquet(con, "alto_info", "alto_file", output_file) diff --git a/src/mods4pandas/lib.py b/src/mods4pandas/lib.py index 693e15b..1286049 100644 --- a/src/mods4pandas/lib.py +++ b/src/mods4pandas/lib.py @@ -469,6 +469,12 @@ def convert_db_to_parquet(con, table, index_col, output_file): df.to_parquet(output_file) +def sqlite3_table_exists(con, table): + """Check if table exists.""" + cur = con.execute("SELECT 1 FROM pragma_table_info(?) LIMIT 1", (table,)) + return cur.fetchone() is not None + + def sqlite3_column_exists(con, table, col): """Check if column col exists in table.""" cur = con.execute( diff --git a/src/mods4pandas/mods4pandas.py b/src/mods4pandas/mods4pandas.py index 34b58df..3b80438 100755 --- a/src/mods4pandas/mods4pandas.py +++ b/src/mods4pandas/mods4pandas.py @@ -3,6 +3,7 @@ import contextlib import csv import os import sqlite3 +import sys import warnings from operator import attrgetter from typing import Dict, List @@ -21,6 +22,7 @@ from .lib import ( ns, sorted_groupby, sqlite3_column_exists, + sqlite3_table_exists, ) @@ -653,6 +655,11 @@ def process( except Exception: logger.exception("Exception in {}".format(mets_file)) + # Check if table exists + if not sqlite3_table_exists(con, "mods_info"): + logger.error("Table mods_info does not exist, empty input?") + sys.exit(1) + logger.info("Writing DataFrame to {}".format(output_file)) considered_indexes = ("recordInfo_recordIdentifier", "recordIdentifier-zdb")