mirror of
https://github.com/qurator-spk/page2tsv.git
synced 2026-03-15 11:42:06 +01:00
add support for image url in page meta data
This commit is contained in:
parent
5d9d9f03cf
commit
b90d1fe56a
2 changed files with 9 additions and 1 deletions
|
|
@ -403,6 +403,14 @@ def page2tsv(page_xml_file, tsv_out_file, purpose, image_url, ner_rest_endpoint,
|
|||
tsv = []
|
||||
line_info = []
|
||||
|
||||
if image_url == 'http://empty':
|
||||
creator = pcgts.Metadata.get_Creator()
|
||||
|
||||
neat_url_ma = re.match(r"(.*)\|NEAT_URL:(.*?)\|.*", creator)
|
||||
|
||||
if neat_url_ma:
|
||||
image_url = neat_url_ma.group(2)
|
||||
|
||||
_unicode_normalize = unicode_normalize
|
||||
|
||||
if normalization_file is not None:
|
||||
|
|
|
|||
|
|
@ -1,3 +1,3 @@
|
|||
ocrd >= 2.23.2
|
||||
pandas
|
||||
qurator-sbb-utils @ git+https://github.com/qurator-spk/sbb_utils.git
|
||||
qurator-sbb-utils @ git+https://github.com/qurator-spk/sbb_utils.git
|
||||
Loading…
Add table
Add a link
Reference in a new issue