diff --git a/qurator/tsvtools/cli.py b/qurator/tsvtools/cli.py index 8706706..410e7d7 100644 --- a/qurator/tsvtools/cli.py +++ b/qurator/tsvtools/cli.py @@ -403,6 +403,14 @@ def page2tsv(page_xml_file, tsv_out_file, purpose, image_url, ner_rest_endpoint, tsv = [] line_info = [] + if image_url == 'http://empty': + creator = pcgts.Metadata.get_Creator() + + neat_url_ma = re.match(r"(.*)\|NEAT_URL:(.*?)\|.*", creator) + + if neat_url_ma: + image_url = neat_url_ma.group(2) + _unicode_normalize = unicode_normalize if normalization_file is not None: diff --git a/requirements.txt b/requirements.txt index 4ceb92e..984ccf2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ ocrd >= 2.23.2 pandas -qurator-sbb-utils @ git+https://github.com/qurator-spk/sbb_utils.git +qurator-sbb-utils @ git+https://github.com/qurator-spk/sbb_utils.git \ No newline at end of file