tests
parent
60a07c6310
commit
81ba7cff82
@ -1,2 +1,4 @@
|
||||
*.egg-info
|
||||
__pycache__
|
||||
tests/**/*.jpg
|
||||
tests/**/*.png
|
||||
|
@ -0,0 +1,46 @@
|
||||
from pathlib import Path
|
||||
from shutil import copytree
|
||||
from pytest import fixture
|
||||
|
||||
from ocrd_utils import pushd_popd
|
||||
from ocrd_models.ocrd_page import parse
|
||||
from ocrd import Resolver
|
||||
|
||||
from tsvtools.ocrd_processors import OcrdNeatExportProcessor, OcrdNeatImportProcessor
|
||||
|
||||
@fixture
|
||||
def testws(tmpdir):
|
||||
copytree('tests/testws', f'{tmpdir}/ws')
|
||||
return Resolver().workspace_from_url(f'{tmpdir}/ws/mets.xml')
|
||||
|
||||
def test_imexport(testws):
|
||||
wsdir = testws.directory
|
||||
exporter = OcrdNeatExportProcessor(workspace=testws, input_file_grp='TESS', output_file_grp='OUT')
|
||||
exporter.process()
|
||||
outfile = Path(wsdir, 'OUT/FILE_0005_OUT.tsv')
|
||||
assert outfile.exists()
|
||||
assert 'Ein Welt-Stantenbund 0 174 1116 169 280 region0000_line0001' in outfile.read_text()
|
||||
assert outfile.read_text().splitlines()[1] == '# https://content.staatsbibliothek-berlin.de/dc/PPN680203753-0005/left,top,width,height/full/0/default.jpg'
|
||||
|
||||
outfile.write_text(outfile.read_text().replace('Stantenbund', 'Staatenbund'))
|
||||
|
||||
importer = OcrdNeatImportProcessor(workspace=testws, input_file_grp='TESS,OUT', output_file_grp='TESS-CORRECTED')
|
||||
importer.process()
|
||||
|
||||
origfile = Path(wsdir, 'TESS/FILE_0005_TESS.xml')
|
||||
corrfile = Path(wsdir, 'TESS-CORRECTED/FILE_0005_TESS-CORRECTED.xml')
|
||||
|
||||
assert origfile.exists()
|
||||
assert corrfile.exists()
|
||||
|
||||
origpage = parse(origfile)
|
||||
corrpage = parse(corrfile)
|
||||
|
||||
origline = origpage.get_Page().get_TextRegion()[0].get_TextLine()[1].get_TextEquiv()[0].Unicode
|
||||
corrline = corrpage.get_Page().get_TextRegion()[0].get_TextLine()[1].get_TextEquiv()[0].Unicode
|
||||
|
||||
assert 'Stantenbund' in origline
|
||||
assert 'Stantenbund' not in corrline
|
||||
|
||||
assert 'Staatenbund' not in origline
|
||||
assert 'Staatenbund' in corrline
|
@ -0,0 +1,48 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pc:PcGts xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd" pcGtsId="FILE_0001_TESS">
|
||||
<pc:Metadata>
|
||||
<pc:Creator>OCR-D/core 2.34.0</pc:Creator>
|
||||
<pc:Created>2022-05-30T16:41:01.833765</pc:Created>
|
||||
<pc:LastChange>2022-05-30T16:41:01.833765</pc:LastChange>
|
||||
<pc:MetadataItem type="processingStep" name="layout/segmentation/region" value="ocrd-tesserocr-recognize">
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="parameters">
|
||||
<pc:Label value="False" type="find_tables"/>
|
||||
<pc:Label value="line" type="textequiv_level"/>
|
||||
<pc:Label value="region" type="segmentation_level"/>
|
||||
<pc:Label value="deu" type="model"/>
|
||||
<pc:Label value="0" type="dpi"/>
|
||||
<pc:Label value="0" type="padding"/>
|
||||
<pc:Label value="False" type="overwrite_segments"/>
|
||||
<pc:Label value="True" type="overwrite_text"/>
|
||||
<pc:Label value="False" type="shrink_polygons"/>
|
||||
<pc:Label value="False" type="block_polygons"/>
|
||||
<pc:Label value="False" type="find_staves"/>
|
||||
<pc:Label value="False" type="sparse_text"/>
|
||||
<pc:Label value="False" type="raw_lines"/>
|
||||
<pc:Label value="" type="char_whitelist"/>
|
||||
<pc:Label value="" type="char_blacklist"/>
|
||||
<pc:Label value="" type="char_unblacklist"/>
|
||||
<pc:Label value="{}" type="tesseract_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_model"/>
|
||||
<pc:Label value="False" type="auto_model"/>
|
||||
<pc:Label value="DEFAULT" type="oem"/>
|
||||
</pc:Labels>
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="version">
|
||||
<pc:Label value="0.13.6 (tesseract 5.1.0)" type="ocrd-tesserocr-recognize"/>
|
||||
<pc:Label value="2.34.0" type="ocrd/core"/>
|
||||
</pc:Labels>
|
||||
</pc:MetadataItem>
|
||||
</pc:Metadata>
|
||||
<pc:Page imageFilename="DEFAULT/FILE_0001_DEFAULT.jpg" imageWidth="1485" imageHeight="2124">
|
||||
<pc:AlternativeImage filename="TESS/FILE_0001_TESS.IMG-BIN.png" comments=",binarized,clipped"/>
|
||||
<pc:ReadingOrder>
|
||||
<pc:OrderedGroup id="reading-order">
|
||||
<pc:RegionRefIndexed index="0" regionRef="region0000"/>
|
||||
</pc:OrderedGroup>
|
||||
</pc:ReadingOrder>
|
||||
<pc:ImageRegion id="region0000" orientation="0.">
|
||||
<pc:Coords points="0,0 1485,0 1485,2124 0,2124"/>
|
||||
</pc:ImageRegion>
|
||||
</pc:Page>
|
||||
</pc:PcGts>
|
@ -0,0 +1,48 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pc:PcGts xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd" pcGtsId="FILE_0002_TESS">
|
||||
<pc:Metadata>
|
||||
<pc:Creator>OCR-D/core 2.34.0</pc:Creator>
|
||||
<pc:Created>2022-05-30T16:41:03.031763</pc:Created>
|
||||
<pc:LastChange>2022-05-30T16:41:03.031763</pc:LastChange>
|
||||
<pc:MetadataItem type="processingStep" name="layout/segmentation/region" value="ocrd-tesserocr-recognize">
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="parameters">
|
||||
<pc:Label value="False" type="find_tables"/>
|
||||
<pc:Label value="line" type="textequiv_level"/>
|
||||
<pc:Label value="region" type="segmentation_level"/>
|
||||
<pc:Label value="deu" type="model"/>
|
||||
<pc:Label value="0" type="dpi"/>
|
||||
<pc:Label value="0" type="padding"/>
|
||||
<pc:Label value="False" type="overwrite_segments"/>
|
||||
<pc:Label value="True" type="overwrite_text"/>
|
||||
<pc:Label value="False" type="shrink_polygons"/>
|
||||
<pc:Label value="False" type="block_polygons"/>
|
||||
<pc:Label value="False" type="find_staves"/>
|
||||
<pc:Label value="False" type="sparse_text"/>
|
||||
<pc:Label value="False" type="raw_lines"/>
|
||||
<pc:Label value="" type="char_whitelist"/>
|
||||
<pc:Label value="" type="char_blacklist"/>
|
||||
<pc:Label value="" type="char_unblacklist"/>
|
||||
<pc:Label value="{}" type="tesseract_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_model"/>
|
||||
<pc:Label value="False" type="auto_model"/>
|
||||
<pc:Label value="DEFAULT" type="oem"/>
|
||||
</pc:Labels>
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="version">
|
||||
<pc:Label value="0.13.6 (tesseract 5.1.0)" type="ocrd-tesserocr-recognize"/>
|
||||
<pc:Label value="2.34.0" type="ocrd/core"/>
|
||||
</pc:Labels>
|
||||
</pc:MetadataItem>
|
||||
</pc:Metadata>
|
||||
<pc:Page imageFilename="DEFAULT/FILE_0002_DEFAULT.jpg" imageWidth="1462" imageHeight="2080">
|
||||
<pc:AlternativeImage filename="TESS/FILE_0002_TESS.IMG-BIN.png" comments=",binarized,clipped"/>
|
||||
<pc:ReadingOrder>
|
||||
<pc:OrderedGroup id="reading-order">
|
||||
<pc:RegionRefIndexed index="0" regionRef="region0000"/>
|
||||
</pc:OrderedGroup>
|
||||
</pc:ReadingOrder>
|
||||
<pc:ImageRegion id="region0000" orientation="0.">
|
||||
<pc:Coords points="0,0 1462,0 1462,2080 0,2080"/>
|
||||
</pc:ImageRegion>
|
||||
</pc:Page>
|
||||
</pc:PcGts>
|
@ -0,0 +1,48 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pc:PcGts xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd" pcGtsId="FILE_0003_TESS">
|
||||
<pc:Metadata>
|
||||
<pc:Creator>OCR-D/core 2.34.0</pc:Creator>
|
||||
<pc:Created>2022-05-30T16:41:04.040750</pc:Created>
|
||||
<pc:LastChange>2022-05-30T16:41:04.040750</pc:LastChange>
|
||||
<pc:MetadataItem type="processingStep" name="layout/segmentation/region" value="ocrd-tesserocr-recognize">
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="parameters">
|
||||
<pc:Label value="False" type="find_tables"/>
|
||||
<pc:Label value="line" type="textequiv_level"/>
|
||||
<pc:Label value="region" type="segmentation_level"/>
|
||||
<pc:Label value="deu" type="model"/>
|
||||
<pc:Label value="0" type="dpi"/>
|
||||
<pc:Label value="0" type="padding"/>
|
||||
<pc:Label value="False" type="overwrite_segments"/>
|
||||
<pc:Label value="True" type="overwrite_text"/>
|
||||
<pc:Label value="False" type="shrink_polygons"/>
|
||||
<pc:Label value="False" type="block_polygons"/>
|
||||
<pc:Label value="False" type="find_staves"/>
|
||||
<pc:Label value="False" type="sparse_text"/>
|
||||
<pc:Label value="False" type="raw_lines"/>
|
||||
<pc:Label value="" type="char_whitelist"/>
|
||||
<pc:Label value="" type="char_blacklist"/>
|
||||
<pc:Label value="" type="char_unblacklist"/>
|
||||
<pc:Label value="{}" type="tesseract_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_model"/>
|
||||
<pc:Label value="False" type="auto_model"/>
|
||||
<pc:Label value="DEFAULT" type="oem"/>
|
||||
</pc:Labels>
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="version">
|
||||
<pc:Label value="0.13.6 (tesseract 5.1.0)" type="ocrd-tesserocr-recognize"/>
|
||||
<pc:Label value="2.34.0" type="ocrd/core"/>
|
||||
</pc:Labels>
|
||||
</pc:MetadataItem>
|
||||
</pc:Metadata>
|
||||
<pc:Page imageFilename="DEFAULT/FILE_0003_DEFAULT.jpg" imageWidth="1461" imageHeight="2124">
|
||||
<pc:AlternativeImage filename="TESS/FILE_0003_TESS.IMG-BIN.png" comments=",binarized,clipped"/>
|
||||
<pc:ReadingOrder>
|
||||
<pc:OrderedGroup id="reading-order">
|
||||
<pc:RegionRefIndexed index="0" regionRef="region0000"/>
|
||||
</pc:OrderedGroup>
|
||||
</pc:ReadingOrder>
|
||||
<pc:ImageRegion id="region0000" orientation="0.">
|
||||
<pc:Coords points="0,0 1461,0 1461,2124 0,2124"/>
|
||||
</pc:ImageRegion>
|
||||
</pc:Page>
|
||||
</pc:PcGts>
|
@ -0,0 +1,48 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pc:PcGts xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd" pcGtsId="FILE_0004_TESS">
|
||||
<pc:Metadata>
|
||||
<pc:Creator>OCR-D/core 2.34.0</pc:Creator>
|
||||
<pc:Created>2022-05-30T16:41:05.028576</pc:Created>
|
||||
<pc:LastChange>2022-05-30T16:41:05.028576</pc:LastChange>
|
||||
<pc:MetadataItem type="processingStep" name="layout/segmentation/region" value="ocrd-tesserocr-recognize">
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="parameters">
|
||||
<pc:Label value="False" type="find_tables"/>
|
||||
<pc:Label value="line" type="textequiv_level"/>
|
||||
<pc:Label value="region" type="segmentation_level"/>
|
||||
<pc:Label value="deu" type="model"/>
|
||||
<pc:Label value="0" type="dpi"/>
|
||||
<pc:Label value="0" type="padding"/>
|
||||
<pc:Label value="False" type="overwrite_segments"/>
|
||||
<pc:Label value="True" type="overwrite_text"/>
|
||||
<pc:Label value="False" type="shrink_polygons"/>
|
||||
<pc:Label value="False" type="block_polygons"/>
|
||||
<pc:Label value="False" type="find_staves"/>
|
||||
<pc:Label value="False" type="sparse_text"/>
|
||||
<pc:Label value="False" type="raw_lines"/>
|
||||
<pc:Label value="" type="char_whitelist"/>
|
||||
<pc:Label value="" type="char_blacklist"/>
|
||||
<pc:Label value="" type="char_unblacklist"/>
|
||||
<pc:Label value="{}" type="tesseract_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_model"/>
|
||||
<pc:Label value="False" type="auto_model"/>
|
||||
<pc:Label value="DEFAULT" type="oem"/>
|
||||
</pc:Labels>
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="version">
|
||||
<pc:Label value="0.13.6 (tesseract 5.1.0)" type="ocrd-tesserocr-recognize"/>
|
||||
<pc:Label value="2.34.0" type="ocrd/core"/>
|
||||
</pc:Labels>
|
||||
</pc:MetadataItem>
|
||||
</pc:Metadata>
|
||||
<pc:Page imageFilename="DEFAULT/FILE_0004_DEFAULT.jpg" imageWidth="1323" imageHeight="1959">
|
||||
<pc:AlternativeImage filename="TESS/FILE_0004_TESS.IMG-BIN.png" comments=",binarized,clipped"/>
|
||||
<pc:ReadingOrder>
|
||||
<pc:OrderedGroup id="reading-order">
|
||||
<pc:RegionRefIndexed index="0" regionRef="region0000"/>
|
||||
</pc:OrderedGroup>
|
||||
</pc:ReadingOrder>
|
||||
<pc:ImageRegion id="region0000" orientation="0.">
|
||||
<pc:Coords points="0,0 1320,0 1320,1959 0,1959"/>
|
||||
</pc:ImageRegion>
|
||||
</pc:Page>
|
||||
</pc:PcGts>
|
@ -0,0 +1,131 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pc:PcGts xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd" pcGtsId="FILE_0005_TESS">
|
||||
<pc:Metadata>
|
||||
<pc:Creator>OCR-D/core 2.34.0</pc:Creator>
|
||||
<pc:Created>2022-05-30T16:41:06.023712</pc:Created>
|
||||
<pc:LastChange>2022-05-30T16:41:06.023712</pc:LastChange>
|
||||
<pc:MetadataItem type="processingStep" name="layout/segmentation/region" value="ocrd-tesserocr-recognize">
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="parameters">
|
||||
<pc:Label value="False" type="find_tables"/>
|
||||
<pc:Label value="line" type="textequiv_level"/>
|
||||
<pc:Label value="region" type="segmentation_level"/>
|
||||
<pc:Label value="deu" type="model"/>
|
||||
<pc:Label value="0" type="dpi"/>
|
||||
<pc:Label value="0" type="padding"/>
|
||||
<pc:Label value="False" type="overwrite_segments"/>
|
||||
<pc:Label value="True" type="overwrite_text"/>
|
||||
<pc:Label value="False" type="shrink_polygons"/>
|
||||
<pc:Label value="False" type="block_polygons"/>
|
||||
<pc:Label value="False" type="find_staves"/>
|
||||
<pc:Label value="False" type="sparse_text"/>
|
||||
<pc:Label value="False" type="raw_lines"/>
|
||||
<pc:Label value="" type="char_whitelist"/>
|
||||
<pc:Label value="" type="char_blacklist"/>
|
||||
<pc:Label value="" type="char_unblacklist"/>
|
||||
<pc:Label value="{}" type="tesseract_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_model"/>
|
||||
<pc:Label value="False" type="auto_model"/>
|
||||
<pc:Label value="DEFAULT" type="oem"/>
|
||||
</pc:Labels>
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="version">
|
||||
<pc:Label value="0.13.6 (tesseract 5.1.0)" type="ocrd-tesserocr-recognize"/>
|
||||
<pc:Label value="2.34.0" type="ocrd/core"/>
|
||||
</pc:Labels>
|
||||
</pc:MetadataItem>
|
||||
</pc:Metadata>
|
||||
<pc:Page imageFilename="DEFAULT/FILE_0005_DEFAULT.jpg" imageWidth="1323" imageHeight="1959">
|
||||
<pc:AlternativeImage filename="TESS/FILE_0005_TESS.IMG-BIN.png" comments=",binarized,clipped"/>
|
||||
<pc:ReadingOrder>
|
||||
<pc:OrderedGroup id="reading-order">
|
||||
<pc:RegionRefIndexed index="0" regionRef="region0000"/>
|
||||
<pc:RegionRefIndexed index="1" regionRef="region0001"/>
|
||||
<pc:RegionRefIndexed index="2" regionRef="region0002"/>
|
||||
<pc:RegionRefIndexed index="3" regionRef="region0003"/>
|
||||
</pc:OrderedGroup>
|
||||
</pc:ReadingOrder>
|
||||
<pc:TextRegion id="region0000" orientation="0." readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="174,117 1116,117 1116,553 174,553"/>
|
||||
<pc:TextLine id="region0000_line0000">
|
||||
<pc:Coords points="254,117 788,117 788,192 254,192"/>
|
||||
<pc:TextEquiv conf="0.">
|
||||
<pc:Unicode>- Paraf faer</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextLine id="region0000_line0001">
|
||||
<pc:Coords points="174,169 1116,169 1116,280 174,280"/>
|
||||
<pc:TextEquiv conf="0.858832244873047">
|
||||
<pc:Unicode>Ein Welt-Stantenbund</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextLine id="region0000_line0002">
|
||||
<pc:Coords points="219,352 1068,352 1068,432 219,432"/>
|
||||
<pc:TextEquiv conf="0.550223922729492">
|
||||
<pc:Unicode>als fiherhes Mittel zur Befeitigung</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextLine id="region0000_line0003">
|
||||
<pc:Coords points="500,481 790,481 790,553 500,553"/>
|
||||
<pc:TextEquiv conf="0.919975433349609">
|
||||
<pc:Unicode>des Krieges.</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.832257900238037">
|
||||
<pc:Unicode>- Paraf faer
|
||||
Ein Welt-Stantenbund
|
||||
als fiherhes Mittel zur Befeitigung
|
||||
des Krieges.</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
<pc:TextRegion id="region0001" orientation="0." readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="614,871 675,871 675,899 614,899"/>
|
||||
<pc:TextLine id="region0001_line0000">
|
||||
<pc:Coords points="614,871 675,871 675,899 614,899"/>
|
||||
<pc:TextEquiv conf="0.813062591552734">
|
||||
<pc:Unicode>Von</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.813062591552734">
|
||||
<pc:Unicode>Von</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
<pc:TextRegion id="region0002" orientation="0." readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="294,924 988,924 988,988 294,988"/>
|
||||
<pc:TextLine id="region0002_line0000">
|
||||
<pc:Coords points="294,924 988,924 988,988 294,988"/>
|
||||
<pc:TextEquiv conf="0.709815826416016">
|
||||
<pc:Unicode>Eduard Zoewenthal, Dr. phil.</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.709815826416016">
|
||||
<pc:Unicode>Eduard Zoewenthal, Dr. phil.</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
<pc:TextRegion id="region0003" orientation="0." readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="422,1545 852,1545 852,1727 422,1727"/>
|
||||
<pc:TextLine id="region0003_line0000">
|
||||
<pc:Coords points="544,1545 743,1545 743,1594 544,1594"/>
|
||||
<pc:TextEquiv conf="0.320938034057617">
|
||||
<pc:Unicode>Berlin,</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextLine id="region0003_line0001">
|
||||
<pc:Coords points="422,1614 852,1614 852,1666 422,1666"/>
|
||||
<pc:TextEquiv conf="0.285389556884766">
|
||||
<pc:Unicode>Reform-Berlag,.</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextLine id="region0003_line0002">
|
||||
<pc:Coords points="570,1689 704,1689 704,1727 570,1727"/>
|
||||
<pc:TextEquiv conf="0.730131225585937">
|
||||
<pc:Unicode>1896,</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.445486272176107">
|
||||
<pc:Unicode>Berlin,
|
||||
Reform-Berlag,.
|
||||
1896,</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
</pc:Page>
|
||||
</pc:PcGts>
|
@ -0,0 +1,48 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pc:PcGts xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd" pcGtsId="FILE_0006_TESS">
|
||||
<pc:Metadata>
|
||||
<pc:Creator>OCR-D/core 2.34.0</pc:Creator>
|
||||
<pc:Created>2022-05-30T16:41:07.086662</pc:Created>
|
||||
<pc:LastChange>2022-05-30T16:41:07.086662</pc:LastChange>
|
||||
<pc:MetadataItem type="processingStep" name="layout/segmentation/region" value="ocrd-tesserocr-recognize">
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="parameters">
|
||||
<pc:Label value="False" type="find_tables"/>
|
||||
<pc:Label value="line" type="textequiv_level"/>
|
||||
<pc:Label value="region" type="segmentation_level"/>
|
||||
<pc:Label value="deu" type="model"/>
|
||||
<pc:Label value="0" type="dpi"/>
|
||||
<pc:Label value="0" type="padding"/>
|
||||
<pc:Label value="False" type="overwrite_segments"/>
|
||||
<pc:Label value="True" type="overwrite_text"/>
|
||||
<pc:Label value="False" type="shrink_polygons"/>
|
||||
<pc:Label value="False" type="block_polygons"/>
|
||||
<pc:Label value="False" type="find_staves"/>
|
||||
<pc:Label value="False" type="sparse_text"/>
|
||||
<pc:Label value="False" type="raw_lines"/>
|
||||
<pc:Label value="" type="char_whitelist"/>
|
||||
<pc:Label value="" type="char_blacklist"/>
|
||||
<pc:Label value="" type="char_unblacklist"/>
|
||||
<pc:Label value="{}" type="tesseract_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_model"/>
|
||||
<pc:Label value="False" type="auto_model"/>
|
||||
<pc:Label value="DEFAULT" type="oem"/>
|
||||
</pc:Labels>
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="version">
|
||||
<pc:Label value="0.13.6 (tesseract 5.1.0)" type="ocrd-tesserocr-recognize"/>
|
||||
<pc:Label value="2.34.0" type="ocrd/core"/>
|
||||
</pc:Labels>
|
||||
</pc:MetadataItem>
|
||||
</pc:Metadata>
|
||||
<pc:Page imageFilename="DEFAULT/FILE_0006_DEFAULT.jpg" imageWidth="1323" imageHeight="1959">
|
||||
<pc:AlternativeImage filename="TESS/FILE_0006_TESS.IMG-BIN.png" comments=",binarized,clipped"/>
|
||||
<pc:ReadingOrder>
|
||||
<pc:OrderedGroup id="reading-order">
|
||||
<pc:RegionRefIndexed index="0" regionRef="region0000"/>
|
||||
</pc:OrderedGroup>
|
||||
</pc:ReadingOrder>
|
||||
<pc:ImageRegion id="region0000" orientation="0.">
|
||||
<pc:Coords points="0,0 1316,0 1316,1959 0,1959"/>
|
||||
</pc:ImageRegion>
|
||||
</pc:Page>
|
||||
</pc:PcGts>
|
@ -0,0 +1,143 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pc:PcGts xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd" pcGtsId="FILE_0007_TESS">
|
||||
<pc:Metadata>
|
||||
<pc:Creator>OCR-D/core 2.34.0</pc:Creator>
|
||||
<pc:Created>2022-05-30T16:41:08.259374</pc:Created>
|
||||
<pc:LastChange>2022-05-30T16:41:08.259374</pc:LastChange>
|
||||
<pc:MetadataItem type="processingStep" name="layout/segmentation/region" value="ocrd-tesserocr-recognize">
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="parameters">
|
||||
<pc:Label value="False" type="find_tables"/>
|
||||
<pc:Label value="line" type="textequiv_level"/>
|
||||
<pc:Label value="region" type="segmentation_level"/>
|
||||
<pc:Label value="deu" type="model"/>
|
||||
<pc:Label value="0" type="dpi"/>
|
||||
<pc:Label value="0" type="padding"/>
|
||||
<pc:Label value="False" type="overwrite_segments"/>
|
||||
<pc:Label value="True" type="overwrite_text"/>
|
||||
<pc:Label value="False" type="shrink_polygons"/>
|
||||
<pc:Label value="False" type="block_polygons"/>
|
||||
<pc:Label value="False" type="find_staves"/>
|
||||
<pc:Label value="False" type="sparse_text"/>
|
||||
<pc:Label value="False" type="raw_lines"/>
|
||||
<pc:Label value="" type="char_whitelist"/>
|
||||
<pc:Label value="" type="char_blacklist"/>
|
||||
<pc:Label value="" type="char_unblacklist"/>
|
||||
<pc:Label value="{}" type="tesseract_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_model"/>
|
||||
<pc:Label value="False" type="auto_model"/>
|
||||
<pc:Label value="DEFAULT" type="oem"/>
|
||||
</pc:Labels>
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="version">
|
||||
<pc:Label value="0.13.6 (tesseract 5.1.0)" type="ocrd-tesserocr-recognize"/>
|
||||
<pc:Label value="2.34.0" type="ocrd/core"/>
|
||||
</pc:Labels>
|
||||
</pc:MetadataItem>
|
||||
</pc:Metadata>
|
||||
<pc:Page imageFilename="DEFAULT/FILE_0007_DEFAULT.jpg" imageWidth="1323" imageHeight="1959">
|
||||
<pc:AlternativeImage filename="TESS/FILE_0007_TESS.IMG-BIN.png" comments=",binarized,clipped"/>
|
||||
<pc:ReadingOrder>
|
||||
<pc:OrderedGroup id="reading-order">
|
||||
<pc:RegionRefIndexed index="0" regionRef="region0000"/>
|
||||
<pc:RegionRefIndexed index="1" regionRef="region0001"/>
|
||||
<pc:RegionRefIndexed index="2" regionRef="region0002"/>
|
||||
<pc:RegionRefIndexed index="3" regionRef="region0003"/>
|
||||
<pc:RegionRefIndexed index="4" regionRef="region0004"/>
|
||||
<pc:RegionRefIndexed index="5" regionRef="region0005"/>
|
||||
</pc:OrderedGroup>
|
||||
</pc:ReadingOrder>
|
||||
<pc:TextRegion id="region0000" orientation="0." readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="199,173 1142,173 1142,277 199,277"/>
|
||||
<pc:TextLine id="region0000_line0000">
|
||||
<pc:Coords points="199,173 1142,173 1142,277 199,277"/>
|
||||
<pc:TextEquiv conf="0.764367523193359">
|
||||
<pc:Unicode>Fin Welt-Stantenbund</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.764367523193359">
|
||||
<pc:Unicode>Fin Welt-Stantenbund</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
<pc:TextRegion id="region0001" orientation="0." readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="247,351 1094,351 1094,550 247,550"/>
|
||||
<pc:TextLine id="region0001_line0000">
|
||||
<pc:Coords points="247,351 1094,351 1094,424 247,424"/>
|
||||
<pc:TextEquiv conf="0.639941787719727">
|
||||
<pc:Unicode>als fiherftes Mittel zur Befeitinung</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextLine id="region0001_line0001">
|
||||
<pc:Coords points="529,480 818,480 818,550 529,550"/>
|
||||
<pc:TextEquiv conf="0.889262237548828">
|
||||
<pc:Unicode>des Krieges.</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.764602012634277">
|
||||
<pc:Unicode>als fiherftes Mittel zur Befeitinung
|
||||
des Krieges.</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
<pc:TextRegion id="region0002" orientation="0." readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="643,869 704,869 704,897 643,897"/>
|
||||
<pc:TextLine id="region0002_line0000">
|
||||
<pc:Coords points="643,869 704,869 704,897 643,897"/>
|
||||
<pc:TextEquiv conf="0.671919708251953">
|
||||
<pc:Unicode>Von</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.671919708251953">
|
||||
<pc:Unicode>Von</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
<pc:TextRegion id="region0003" orientation="0." readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="324,922 1015,922 1015,981 324,981"/>
|
||||
<pc:TextLine id="region0003_line0000">
|
||||
<pc:Coords points="324,922 1015,922 1015,981 324,981"/>
|
||||
<pc:TextEquiv conf="0.615652885437012">
|
||||
<pc:Unicode>@dunrd Loewenthal, Dr. phil.</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.615652885437012">
|
||||
<pc:Unicode>@dunrd Loewenthal, Dr. phil.</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
<pc:TextRegion id="region0004" orientation="0." readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="365,1247 978,1247 978,1274 365,1274"/>
|
||||
<pc:TextLine id="region0004_line0000">
|
||||
<pc:Coords points="365,1247 978,1247 978,1274 365,1274"/>
|
||||
<pc:TextEquiv conf="0.481728401184082">
|
||||
<pc:Unicode>ET —</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.481728401184082">
|
||||
<pc:Unicode>ET —</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
<pc:TextRegion id="region0005" orientation="0." readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="458,1542 884,1542 884,1721 458,1721"/>
|
||||
<pc:TextLine id="region0005_line0000">
|
||||
<pc:Coords points="577,1542 776,1542 776,1592 577,1592"/>
|
||||
<pc:TextEquiv conf="0.792164154052734">
|
||||
<pc:Unicode>Berlin.</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextLine id="region0005_line0001">
|
||||
<pc:Coords points="458,1610 884,1610 884,1660 458,1660"/>
|
||||
<pc:TextEquiv conf="0.449111976623535">
|
||||
<pc:Unicode>NReform-Nerlag.</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextLine id="region0005_line0002">
|
||||
<pc:Coords points="604,1686 737,1686 737,1721 604,1721"/>
|
||||
<pc:TextEquiv conf="0.822743530273437">
|
||||
<pc:Unicode>1896,</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.688006553649902">
|
||||
<pc:Unicode>Berlin.
|
||||
NReform-Nerlag.
|
||||
1896,</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
</pc:Page>
|
||||
</pc:PcGts>
|
@ -0,0 +1,48 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pc:PcGts xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd" pcGtsId="FILE_0008_TESS">
|
||||
<pc:Metadata>
|
||||
<pc:Creator>OCR-D/core 2.34.0</pc:Creator>
|
||||
<pc:Created>2022-05-30T16:41:09.407368</pc:Created>
|
||||
<pc:LastChange>2022-05-30T16:41:09.407368</pc:LastChange>
|
||||
<pc:MetadataItem type="processingStep" name="layout/segmentation/region" value="ocrd-tesserocr-recognize">
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="parameters">
|
||||
<pc:Label value="False" type="find_tables"/>
|
||||
<pc:Label value="line" type="textequiv_level"/>
|
||||
<pc:Label value="region" type="segmentation_level"/>
|
||||
<pc:Label value="deu" type="model"/>
|
||||
<pc:Label value="0" type="dpi"/>
|
||||
<pc:Label value="0" type="padding"/>
|
||||
<pc:Label value="False" type="overwrite_segments"/>
|
||||
<pc:Label value="True" type="overwrite_text"/>
|
||||
<pc:Label value="False" type="shrink_polygons"/>
|
||||
<pc:Label value="False" type="block_polygons"/>
|
||||
<pc:Label value="False" type="find_staves"/>
|
||||
<pc:Label value="False" type="sparse_text"/>
|
||||
<pc:Label value="False" type="raw_lines"/>
|
||||
<pc:Label value="" type="char_whitelist"/>
|
||||
<pc:Label value="" type="char_blacklist"/>
|
||||
<pc:Label value="" type="char_unblacklist"/>
|
||||
<pc:Label value="{}" type="tesseract_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_model"/>
|
||||
<pc:Label value="False" type="auto_model"/>
|
||||
<pc:Label value="DEFAULT" type="oem"/>
|
||||
</pc:Labels>
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="version">
|
||||
<pc:Label value="0.13.6 (tesseract 5.1.0)" type="ocrd-tesserocr-recognize"/>
|
||||
<pc:Label value="2.34.0" type="ocrd/core"/>
|
||||
</pc:Labels>
|
||||
</pc:MetadataItem>
|
||||
</pc:Metadata>
|
||||
<pc:Page imageFilename="DEFAULT/FILE_0008_DEFAULT.jpg" imageWidth="1323" imageHeight="1959">
|
||||
<pc:AlternativeImage filename="TESS/FILE_0008_TESS.IMG-BIN.png" comments=",binarized,clipped"/>
|
||||
<pc:ReadingOrder>
|
||||
<pc:OrderedGroup id="reading-order">
|
||||
<pc:RegionRefIndexed index="0" regionRef="region0000"/>
|
||||
</pc:OrderedGroup>
|
||||
</pc:ReadingOrder>
|
||||
<pc:ImageRegion id="region0000" orientation="0.">
|
||||
<pc:Coords points="0,0 1323,0 1323,1959 0,1959"/>
|
||||
</pc:ImageRegion>
|
||||
</pc:Page>
|
||||
</pc:PcGts>
|
@ -0,0 +1,48 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pc:PcGts xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd" pcGtsId="FILE_0017_TESS">
|
||||
<pc:Metadata>
|
||||
<pc:Creator>OCR-D/core 2.34.0</pc:Creator>
|
||||
<pc:Created>2022-05-30T16:41:32.782638</pc:Created>
|
||||
<pc:LastChange>2022-05-30T16:41:32.782638</pc:LastChange>
|
||||
<pc:MetadataItem type="processingStep" name="layout/segmentation/region" value="ocrd-tesserocr-recognize">
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="parameters">
|
||||
<pc:Label value="False" type="find_tables"/>
|
||||
<pc:Label value="line" type="textequiv_level"/>
|
||||
<pc:Label value="region" type="segmentation_level"/>
|
||||
<pc:Label value="deu" type="model"/>
|
||||
<pc:Label value="0" type="dpi"/>
|
||||
<pc:Label value="0" type="padding"/>
|
||||
<pc:Label value="False" type="overwrite_segments"/>
|
||||
<pc:Label value="True" type="overwrite_text"/>
|
||||
<pc:Label value="False" type="shrink_polygons"/>
|
||||
<pc:Label value="False" type="block_polygons"/>
|
||||
<pc:Label value="False" type="find_staves"/>
|
||||
<pc:Label value="False" type="sparse_text"/>
|
||||
<pc:Label value="False" type="raw_lines"/>
|
||||
<pc:Label value="" type="char_whitelist"/>
|
||||
<pc:Label value="" type="char_blacklist"/>
|
||||
<pc:Label value="" type="char_unblacklist"/>
|
||||
<pc:Label value="{}" type="tesseract_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_model"/>
|
||||
<pc:Label value="False" type="auto_model"/>
|
||||
<pc:Label value="DEFAULT" type="oem"/>
|
||||
</pc:Labels>
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="version">
|
||||
<pc:Label value="0.13.6 (tesseract 5.1.0)" type="ocrd-tesserocr-recognize"/>
|
||||
<pc:Label value="2.34.0" type="ocrd/core"/>
|
||||
</pc:Labels>
|
||||
</pc:MetadataItem>
|
||||
</pc:Metadata>
|
||||
<pc:Page imageFilename="DEFAULT/FILE_0017_DEFAULT.jpg" imageWidth="1294" imageHeight="1959">
|
||||
<pc:AlternativeImage filename="TESS/FILE_0017_TESS.IMG-BIN.png" comments=",binarized,clipped"/>
|
||||
<pc:ReadingOrder>
|
||||
<pc:OrderedGroup id="reading-order">
|
||||
<pc:RegionRefIndexed index="0" regionRef="region0000"/>
|
||||
</pc:OrderedGroup>
|
||||
</pc:ReadingOrder>
|
||||
<pc:ImageRegion id="region0000" orientation="0.">
|
||||
<pc:Coords points="0,0 1284,0 1284,1959 0,1959"/>
|
||||
</pc:ImageRegion>
|
||||
</pc:Page>
|
||||
</pc:PcGts>
|
@ -0,0 +1,48 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pc:PcGts xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd" pcGtsId="FILE_0018_TESS">
|
||||
<pc:Metadata>
|
||||
<pc:Creator>OCR-D/core 2.34.0</pc:Creator>
|
||||
<pc:Created>2022-05-30T16:41:33.894354</pc:Created>
|
||||
<pc:LastChange>2022-05-30T16:41:33.894354</pc:LastChange>
|
||||
<pc:MetadataItem type="processingStep" name="layout/segmentation/region" value="ocrd-tesserocr-recognize">
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="parameters">
|
||||
<pc:Label value="False" type="find_tables"/>
|
||||
<pc:Label value="line" type="textequiv_level"/>
|
||||
<pc:Label value="region" type="segmentation_level"/>
|
||||
<pc:Label value="deu" type="model"/>
|
||||
<pc:Label value="0" type="dpi"/>
|
||||
<pc:Label value="0" type="padding"/>
|
||||
<pc:Label value="False" type="overwrite_segments"/>
|
||||
<pc:Label value="True" type="overwrite_text"/>
|
||||
<pc:Label value="False" type="shrink_polygons"/>
|
||||
<pc:Label value="False" type="block_polygons"/>
|
||||
<pc:Label value="False" type="find_staves"/>
|
||||
<pc:Label value="False" type="sparse_text"/>
|
||||
<pc:Label value="False" type="raw_lines"/>
|
||||
<pc:Label value="" type="char_whitelist"/>
|
||||
<pc:Label value="" type="char_blacklist"/>
|
||||
<pc:Label value="" type="char_unblacklist"/>
|
||||
<pc:Label value="{}" type="tesseract_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_model"/>
|
||||
<pc:Label value="False" type="auto_model"/>
|
||||
<pc:Label value="DEFAULT" type="oem"/>
|
||||
</pc:Labels>
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="version">
|
||||
<pc:Label value="0.13.6 (tesseract 5.1.0)" type="ocrd-tesserocr-recognize"/>
|
||||
<pc:Label value="2.34.0" type="ocrd/core"/>
|
||||
</pc:Labels>
|
||||
</pc:MetadataItem>
|
||||
</pc:Metadata>
|
||||
<pc:Page imageFilename="DEFAULT/FILE_0018_DEFAULT.jpg" imageWidth="1474" imageHeight="2094">
|
||||
<pc:AlternativeImage filename="TESS/FILE_0018_TESS.IMG-BIN.png" comments=",binarized,clipped"/>
|
||||
<pc:ReadingOrder>
|
||||
<pc:OrderedGroup id="reading-order">
|
||||
<pc:RegionRefIndexed index="0" regionRef="region0000"/>
|
||||
</pc:OrderedGroup>
|
||||
</pc:ReadingOrder>
|
||||
<pc:ImageRegion id="region0000" orientation="0.">
|
||||
<pc:Coords points="0,0 1474,0 1474,2094 0,2094"/>
|
||||
</pc:ImageRegion>
|
||||
</pc:Page>
|
||||
</pc:PcGts>
|
@ -0,0 +1,120 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pc:PcGts xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd" pcGtsId="FILE_0019_TESS">
|
||||
<pc:Metadata>
|
||||
<pc:Creator>OCR-D/core 2.34.0</pc:Creator>
|
||||
<pc:Created>2022-05-30T16:41:34.918467</pc:Created>
|
||||
<pc:LastChange>2022-05-30T16:41:34.918467</pc:LastChange>
|
||||
<pc:MetadataItem type="processingStep" name="layout/segmentation/region" value="ocrd-tesserocr-recognize">
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="parameters">
|
||||
<pc:Label value="False" type="find_tables"/>
|
||||
<pc:Label value="line" type="textequiv_level"/>
|
||||
<pc:Label value="region" type="segmentation_level"/>
|
||||
<pc:Label value="deu" type="model"/>
|
||||
<pc:Label value="0" type="dpi"/>
|
||||
<pc:Label value="0" type="padding"/>
|
||||
<pc:Label value="False" type="overwrite_segments"/>
|
||||
<pc:Label value="True" type="overwrite_text"/>
|
||||
<pc:Label value="False" type="shrink_polygons"/>
|
||||
<pc:Label value="False" type="block_polygons"/>
|
||||
<pc:Label value="False" type="find_staves"/>
|
||||
<pc:Label value="False" type="sparse_text"/>
|
||||
<pc:Label value="False" type="raw_lines"/>
|
||||
<pc:Label value="" type="char_whitelist"/>
|
||||
<pc:Label value="" type="char_blacklist"/>
|
||||
<pc:Label value="" type="char_unblacklist"/>
|
||||
<pc:Label value="{}" type="tesseract_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_model"/>
|
||||
<pc:Label value="False" type="auto_model"/>
|
||||
<pc:Label value="DEFAULT" type="oem"/>
|
||||
</pc:Labels>
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="version">
|
||||
<pc:Label value="0.13.6 (tesseract 5.1.0)" type="ocrd-tesserocr-recognize"/>
|
||||
<pc:Label value="2.34.0" type="ocrd/core"/>
|
||||
</pc:Labels>
|
||||
</pc:MetadataItem>
|
||||
</pc:Metadata>
|
||||
<pc:Page imageFilename="DEFAULT/FILE_0019_DEFAULT.jpg" imageWidth="1461" imageHeight="2112">
|
||||
<pc:AlternativeImage filename="TESS/FILE_0019_TESS.IMG-BIN.png" comments=",binarized,clipped"/>
|
||||
<pc:ReadingOrder>
|
||||
<pc:OrderedGroup id="reading-order">
|
||||
<pc:RegionRefIndexed index="0" regionRef="region0000"/>
|
||||
<pc:RegionRefIndexed index="1" regionRef="region0001"/>
|
||||
<pc:RegionRefIndexed index="2" regionRef="region0002"/>
|
||||
<pc:RegionRefIndexed index="3" regionRef="region0003"/>
|
||||
<pc:RegionRefIndexed index="4" regionRef="region0004"/>
|
||||
<pc:RegionRefIndexed index="5" regionRef="region0005"/>
|
||||
</pc:OrderedGroup>
|
||||
</pc:ReadingOrder>
|
||||
<pc:TextRegion id="region0000" orientation="0." readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="740,1662 1180,1662 1180,1688 740,1688"/>
|
||||
<pc:TextLine id="region0000_line0000">
|
||||
<pc:Coords points="740,1662 1180,1662 1180,1688 740,1688"/>
|
||||
<pc:TextEquiv conf="0.153651885986328">
|
||||
<pc:Unicode>— nn ——</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.153651885986328">
|
||||
<pc:Unicode>— nn ——</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
<pc:TextRegion id="region0001" orientation="0." readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="815,1699 1149,1699 1149,1734 815,1734"/>
|
||||
<pc:TextLine id="region0001_line0000">
|
||||
<pc:Coords points="815,1699 1149,1699 1149,1734 815,1734"/>
|
||||
<pc:TextEquiv conf="0.568986701965332">
|
||||
<pc:Unicode>ZEIB Entsäuerung</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.568986701965332">
|
||||
<pc:Unicode>ZEIB Entsäuerung</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
<pc:TextRegion id="region0002" orientation="0." readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="937,1797 1114,1797 1114,1833 937,1833"/>
|
||||
<pc:TextLine id="region0002_line0000">
|
||||
<pc:Coords points="996,1797 1112,1797 1112,1816 996,1816"/>
|
||||
<pc:TextEquiv conf="0.353407897949219">
|
||||
<pc:Unicode>N ann</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextLine id="region0002_line0001">
|
||||
<pc:Coords points="937,1797 1114,1797 1114,1833 937,1833"/>
|
||||
<pc:TextEquiv conf="0.897406616210938">
|
||||
<pc:Unicode>Juni 2001</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.625407257080078">
|
||||
<pc:Unicode>N ann
|
||||
Juni 2001</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
<pc:TextRegion id="region0003" orientation="0." readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="358,1817 1238,1817 1238,1966 358,1966"/>
|
||||
<pc:TextLine id="region0003_line0000">
|
||||
<pc:Coords points="358,1817 1238,1817 1238,1966 358,1966"/>
|
||||
<pc:TextEquiv conf="0.397178726196289">
|
||||
<pc:Unicode>N</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.397178726196289">
|
||||
<pc:Unicode>N</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
<pc:TextRegion id="region0004" orientation="0." readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="363,1966 734,1966 734,2012 363,2012"/>
|
||||
<pc:TextLine id="region0004_line0000">
|
||||
<pc:Coords points="363,1966 734,1966 734,2012 363,2012"/>
|
||||
<pc:TextEquiv conf="0.905004119873047">
|
||||
<pc:Unicode>N12<140996825010</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.905004119873047">
|
||||
<pc:Unicode>N12<140996825010</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
<pc:ImageRegion id="region0005" orientation="0.">
|
||||
<pc:Coords points="1332,0 1461,0 1461,2112 1332,2112"/>
|
||||
</pc:ImageRegion>
|
||||
</pc:Page>
|
||||
</pc:PcGts>
|
@ -0,0 +1,54 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pc:PcGts xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd" pcGtsId="FILE_0020_TESS">
|
||||
<pc:Metadata>
|
||||
<pc:Creator>OCR-D/core 2.34.0</pc:Creator>
|
||||
<pc:Created>2022-05-30T16:41:36.106681</pc:Created>
|
||||
<pc:LastChange>2022-05-30T16:41:36.106681</pc:LastChange>
|
||||
<pc:MetadataItem type="processingStep" name="layout/segmentation/region" value="ocrd-tesserocr-recognize">
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="parameters">
|
||||
<pc:Label value="False" type="find_tables"/>
|
||||
<pc:Label value="line" type="textequiv_level"/>
|
||||
<pc:Label value="region" type="segmentation_level"/>
|
||||
<pc:Label value="deu" type="model"/>
|
||||
<pc:Label value="0" type="dpi"/>
|
||||
<pc:Label value="0" type="padding"/>
|
||||
<pc:Label value="False" type="overwrite_segments"/>
|
||||
<pc:Label value="True" type="overwrite_text"/>
|
||||
<pc:Label value="False" type="shrink_polygons"/>
|
||||
<pc:Label value="False" type="block_polygons"/>
|
||||
<pc:Label value="False" type="find_staves"/>
|
||||
<pc:Label value="False" type="sparse_text"/>
|
||||
<pc:Label value="False" type="raw_lines"/>
|
||||
<pc:Label value="" type="char_whitelist"/>
|
||||
<pc:Label value="" type="char_blacklist"/>
|
||||
<pc:Label value="" type="char_unblacklist"/>
|
||||
<pc:Label value="{}" type="tesseract_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_model"/>
|
||||
<pc:Label value="False" type="auto_model"/>
|
||||
<pc:Label value="DEFAULT" type="oem"/>
|
||||
</pc:Labels>
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="version">
|
||||
<pc:Label value="0.13.6 (tesseract 5.1.0)" type="ocrd-tesserocr-recognize"/>
|
||||
<pc:Label value="2.34.0" type="ocrd/core"/>
|
||||
</pc:Labels>
|
||||
</pc:MetadataItem>
|
||||
</pc:Metadata>
|
||||
<pc:Page imageFilename="DEFAULT/FILE_0020_DEFAULT.jpg" imageWidth="1486" imageHeight="2124">
|
||||
<pc:AlternativeImage filename="TESS/FILE_0020_TESS.IMG-BIN.png" comments=",binarized,clipped"/>
|
||||
<pc:ReadingOrder>
|
||||
<pc:OrderedGroup id="reading-order">
|
||||
<pc:RegionRefIndexed index="2" regionRef="region0002"/>
|
||||
</pc:OrderedGroup>
|
||||
</pc:ReadingOrder>
|
||||
<pc:ImageRegion id="region0002" orientation="0.">
|
||||
<pc:Coords points="0,0 1486,0 1486,2124 0,2124"/>
|
||||
</pc:ImageRegion>
|
||||
<pc:SeparatorRegion id="region0000">
|
||||
<pc:Coords points="643,57 1351,57 1351,62 643,62"/>
|
||||
</pc:SeparatorRegion>
|
||||
<pc:SeparatorRegion id="region0001">
|
||||
<pc:Coords points="121,62 502,62 502,65 121,65"/>
|
||||
</pc:SeparatorRegion>
|
||||
</pc:Page>
|
||||
</pc:PcGts>
|
@ -0,0 +1,141 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pc:PcGts xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd" pcGtsId="FILE_0021_TESS">
|
||||
<pc:Metadata>
|
||||
<pc:Creator>OCR-D/core 2.34.0</pc:Creator>
|
||||
<pc:Created>2022-05-30T16:41:37.316914</pc:Created>
|
||||
<pc:LastChange>2022-05-30T16:41:37.316914</pc:LastChange>
|
||||
<pc:MetadataItem type="processingStep" name="layout/segmentation/region" value="ocrd-tesserocr-recognize">
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="parameters">
|
||||
<pc:Label value="False" type="find_tables"/>
|
||||
<pc:Label value="line" type="textequiv_level"/>
|
||||
<pc:Label value="region" type="segmentation_level"/>
|
||||
<pc:Label value="deu" type="model"/>
|
||||
<pc:Label value="0" type="dpi"/>
|
||||
<pc:Label value="0" type="padding"/>
|
||||
<pc:Label value="False" type="overwrite_segments"/>
|
||||
<pc:Label value="True" type="overwrite_text"/>
|
||||
<pc:Label value="False" type="shrink_polygons"/>
|
||||
<pc:Label value="False" type="block_polygons"/>
|
||||
<pc:Label value="False" type="find_staves"/>
|
||||
<pc:Label value="False" type="sparse_text"/>
|
||||
<pc:Label value="False" type="raw_lines"/>
|
||||
<pc:Label value="" type="char_whitelist"/>
|
||||
<pc:Label value="" type="char_blacklist"/>
|
||||
<pc:Label value="" type="char_unblacklist"/>
|
||||
<pc:Label value="{}" type="tesseract_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_parameters"/>
|
||||
<pc:Label value="{}" type="xpath_model"/>
|
||||
<pc:Label value="False" type="auto_model"/>
|
||||
<pc:Label value="DEFAULT" type="oem"/>
|
||||
</pc:Labels>
|
||||
<pc:Labels externalModel="ocrd-tool" externalId="version">
|
||||
<pc:Label value="0.13.6 (tesseract 5.1.0)" type="ocrd-tesserocr-recognize"/>
|
||||
<pc:Label value="2.34.0" type="ocrd/core"/>
|
||||
</pc:Labels>
|
||||
</pc:MetadataItem>
|
||||
</pc:Metadata>
|
||||
<pc:Page imageFilename="DEFAULT/FILE_0021_DEFAULT.jpg" imageWidth="1506" imageHeight="2234">
|
||||
<pc:AlternativeImage filename="TESS/FILE_0021_TESS.IMG-BIN.png" comments=",binarized,clipped"/>
|
||||
<pc:ReadingOrder>
|
||||
<pc:OrderedGroup id="reading-order">
|
||||
<pc:RegionRefIndexed index="0" regionRef="region0000"/>
|
||||
<pc:RegionRefIndexed index="1" regionRef="region0001"/>
|
||||
<pc:RegionRefIndexed index="2" regionRef="region0002"/>
|
||||
<pc:RegionRefIndexed index="3" regionRef="region0003"/>
|
||||
<pc:RegionRefIndexed index="4" regionRef="region0004"/>
|
||||
<pc:RegionRefIndexed index="5" regionRef="region0005"/>
|
||||
</pc:OrderedGroup>
|
||||
</pc:ReadingOrder>
|
||||
<pc:TextRegion id="region0000" orientation="0." readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="148,118 1323,118 1323,249 148,249"/>
|
||||
<pc:TextLine id="region0000_line0000">
|
||||
<pc:Coords points="148,118 1323,118 1323,249 148,249"/>
|
||||
<pc:TextEquiv conf="0.451914672851563">
|
||||
<pc:Unicode>Ein Welt-Stantenbund</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.451914672851563">
|
||||
<pc:Unicode>Ein Welt-Stantenbund</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
<pc:TextRegion id="region0001" orientation="0." readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="423,299 1272,299 1272,500 423,500"/>
|
||||
<pc:TextLine id="region0001_line0000">
|
||||
<pc:Coords points="423,299 1272,299 1272,379 423,379"/>
|
||||
<pc:TextEquiv conf="0.61743221282959">
|
||||
<pc:Unicode>als Fiherties Mittel zur Befeitigung</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextLine id="region0001_line0001">
|
||||
<pc:Coords points="704,428 992,428 992,500 704,500"/>
|
||||
<pc:TextEquiv conf="0.826985321044922">
|
||||
<pc:Unicode>des Krieges.</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.722208766937256">
|
||||
<pc:Unicode>als Fiherties Mittel zur Befeitigung
|
||||
des Krieges.</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
<pc:TextRegion id="region0002" orientation="0." readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="490,816 1184,816 1184,935 490,935"/>
|
||||
<pc:TextLine id="region0002_line0000">
|
||||
<pc:Coords points="811,816 872,816 872,844 811,844"/>
|
||||
<pc:TextEquiv conf="0.545818023681641">
|
||||
<pc:Unicode>Bon</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextLine id="region0002_line0001">
|
||||
<pc:Coords points="490,867 1184,867 1184,935 490,935"/>
|
||||
<pc:TextEquiv conf="0.889519424438477">
|
||||
<pc:Unicode>Eduard Loewenthal, Dr. phil.</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.717668724060059">
|
||||
<pc:Unicode>Bon
|
||||
Eduard Loewenthal, Dr. phil.</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
<pc:TextRegion id="region0003" orientation="0." type="caption" readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="792,1047 1390,1047 1390,1166 792,1166"/>
|
||||
<pc:TextLine id="region0003_line0000">
|
||||
<pc:Coords points="792,1047 1390,1047 1390,1166 792,1166"/>
|
||||
<pc:TextEquiv conf="0.534808006286621">
|
||||
<pc:Unicode>NN ul Il un</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.534808006286621">
|
||||
<pc:Unicode>NN ul Il un</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
<pc:TextRegion id="region0004" orientation="0." readingDirection="left-to-right" textLineOrder="top-to-bottom">
|
||||
<pc:Coords points="309,2008 797,2008 797,2159 309,2159"/>
|
||||
<pc:TextLine id="region0004_line0000">
|
||||
<pc:Coords points="309,2008 648,2008 648,2043 309,2043"/>
|
||||
<pc:TextEquiv conf="0.961395034790039">
|
||||
<pc:Unicode>Staatsbibliothek</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextLine id="region0004_line0001">
|
||||
<pc:Coords points="310,2061 492,2061 492,2094 310,2094"/>
|
||||
<pc:TextEquiv conf="0.963343048095703">
|
||||
<pc:Unicode>zu Berlin</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextLine id="region0004_line0002">
|
||||
<pc:Coords points="310,2122 797,2122 797,2159 310,2159"/>
|
||||
<pc:TextEquiv conf="0.966184768676758">
|
||||
<pc:Unicode>Preußischer Kulturbesitz</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextLine>
|
||||
<pc:TextEquiv conf="0.963640950520833">
|
||||
<pc:Unicode>Staatsbibliothek
|
||||
zu Berlin
|
||||
Preußischer Kulturbesitz</pc:Unicode>
|
||||
</pc:TextEquiv>
|
||||
</pc:TextRegion>
|
||||
<pc:ImageRegion id="region0005" orientation="0.">
|
||||
<pc:Coords points="0,940 1506,940 1506,2234 0,2234"/>
|
||||
</pc:ImageRegion>
|
||||
</pc:Page>
|
||||
</pc:PcGts>
|
Loading…
Reference in New Issue