mirror of
https://github.com/qurator-spk/page2tsv.git
synced 2026-03-15 11:42:06 +01:00
add support for image url in page meta data
This commit is contained in:
parent
5d9d9f03cf
commit
b90d1fe56a
2 changed files with 9 additions and 1 deletions
|
|
@ -403,6 +403,14 @@ def page2tsv(page_xml_file, tsv_out_file, purpose, image_url, ner_rest_endpoint,
|
||||||
tsv = []
|
tsv = []
|
||||||
line_info = []
|
line_info = []
|
||||||
|
|
||||||
|
if image_url == 'http://empty':
|
||||||
|
creator = pcgts.Metadata.get_Creator()
|
||||||
|
|
||||||
|
neat_url_ma = re.match(r"(.*)\|NEAT_URL:(.*?)\|.*", creator)
|
||||||
|
|
||||||
|
if neat_url_ma:
|
||||||
|
image_url = neat_url_ma.group(2)
|
||||||
|
|
||||||
_unicode_normalize = unicode_normalize
|
_unicode_normalize = unicode_normalize
|
||||||
|
|
||||||
if normalization_file is not None:
|
if normalization_file is not None:
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,3 @@
|
||||||
ocrd >= 2.23.2
|
ocrd >= 2.23.2
|
||||||
pandas
|
pandas
|
||||||
qurator-sbb-utils @ git+https://github.com/qurator-spk/sbb_utils.git
|
qurator-sbb-utils @ git+https://github.com/qurator-spk/sbb_utils.git
|
||||||
Loading…
Add table
Add a link
Reference in a new issue