mirror of
https://github.com/qurator-spk/modstool.git
synced 2025-08-14 03:59:53 +02:00
✨ Add --mets-files-list option to give a list of input files
This commit is contained in:
parent
4178f1e380
commit
0855ccb66b
1 changed files with 21 additions and 5 deletions
|
@ -513,8 +513,14 @@ def pages_to_dict(mets, raise_errors=True) -> List[Dict]:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def validate_mets_files(ctx, param, mets_files):
|
||||||
|
if not mets_files and "mets_files_list" not in ctx.params:
|
||||||
|
raise click.BadParameter("Neither mets_files nor mets_files_list given")
|
||||||
|
return mets_files
|
||||||
|
|
||||||
|
|
||||||
@click.command()
|
@click.command()
|
||||||
@click.argument("mets_files", type=click.Path(exists=True), required=True, nargs=-1)
|
@click.argument("mets_files", type=click.Path(exists=True), nargs=-1, callback=validate_mets_files)
|
||||||
@click.option(
|
@click.option(
|
||||||
"--output",
|
"--output",
|
||||||
"-o",
|
"-o",
|
||||||
|
@ -527,7 +533,10 @@ def pages_to_dict(mets, raise_errors=True) -> List[Dict]:
|
||||||
@click.option(
|
@click.option(
|
||||||
"--output-page-info", type=click.Path(), help="Output page info Parquet file"
|
"--output-page-info", type=click.Path(), help="Output page info Parquet file"
|
||||||
)
|
)
|
||||||
def process_command(mets_files: list[str], output_file: str, output_page_info: str):
|
@click.option(
|
||||||
|
"--mets-files-list", type=click.Path(), help="Read list of METS files from this file"
|
||||||
|
)
|
||||||
|
def process_command(mets_files: list[str], output_file: str, output_page_info: str, mets_files_list: str):
|
||||||
"""
|
"""
|
||||||
A tool to convert the MODS metadata in INPUT to a pandas DataFrame.
|
A tool to convert the MODS metadata in INPUT to a pandas DataFrame.
|
||||||
|
|
||||||
|
@ -538,12 +547,19 @@ def process_command(mets_files: list[str], output_file: str, output_page_info: s
|
||||||
|
|
||||||
Per-page information (e.g. structure information) can be output to a separate Parquet file.
|
Per-page information (e.g. structure information) can be output to a separate Parquet file.
|
||||||
"""
|
"""
|
||||||
process(mets_files, output_file, output_page_info)
|
process(mets_files, output_file, output_page_info, mets_files_list)
|
||||||
|
|
||||||
|
|
||||||
def process(mets_files: list[str], output_file: str, output_page_info: str):
|
|
||||||
# Extend file list if directories are given
|
|
||||||
|
def process(mets_files: list[str], output_file: str, output_page_info: str, mets_files_list: str):
|
||||||
mets_files_real: list[str] = []
|
mets_files_real: list[str] = []
|
||||||
|
|
||||||
|
if mets_files_list:
|
||||||
|
with open(mets_files_list) as f:
|
||||||
|
mets_files_real = [line.strip() for line in f.readlines()]
|
||||||
|
|
||||||
|
# Extend file list if directories are given
|
||||||
for m in mets_files:
|
for m in mets_files:
|
||||||
if os.path.isdir(m):
|
if os.path.isdir(m):
|
||||||
logger.info("Scanning directory {}".format(m))
|
logger.info("Scanning directory {}".format(m))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue