mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-06-09 11:50:00 +02:00
➡ Move dinglehopper into its own directory
This commit is contained in:
commit
89048bf55d
54 changed files with 43618 additions and 0 deletions
0
qurator/dinglehopper/tests/__init__.py
Normal file
0
qurator/dinglehopper/tests/__init__.py
Normal file
BIN
qurator/dinglehopper/tests/data/00000119.tif
Normal file
BIN
qurator/dinglehopper/tests/data/00000119.tif
Normal file
Binary file not shown.
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,289 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<PcGts xmlns="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15">
|
||||
<Metadata>
|
||||
<Creator>OCR-D/core 1.0.0b11</Creator>
|
||||
<Created>2019-08-01T15:03:17.741679</Created>
|
||||
<LastChange>2019-08-01T15:03:17.741679</LastChange>
|
||||
<MetadataItem type="processingStep" name="recognition/text-recognition" value="ocrd-tesserocr-recognize">
|
||||
<Labels>
|
||||
<Label value="frk" type="model"/>
|
||||
<Label value="line" type="textequiv_level"/>
|
||||
<Label value="False" type="overwrite_words"/>
|
||||
</Labels>
|
||||
</MetadataItem>
|
||||
</Metadata>
|
||||
<Page imageFilename="../OCR-D-IMG-BIN/OCR-D-IMG-BIN_0002" imageWidth="1386" imageHeight="2372">
|
||||
<ReadingOrder>
|
||||
<OrderedGroup id="reading-order">
|
||||
<RegionRefIndexed index="0" regionRef="region0000"/>
|
||||
<RegionRefIndexed index="1" regionRef="region0001"/>
|
||||
<RegionRefIndexed index="2" regionRef="region0002"/>
|
||||
<RegionRefIndexed index="3" regionRef="region0003"/>
|
||||
<RegionRefIndexed index="4" regionRef="region0004"/>
|
||||
<RegionRefIndexed index="5" regionRef="region0005"/>
|
||||
<RegionRefIndexed index="6" regionRef="region0006"/>
|
||||
</OrderedGroup>
|
||||
</ReadingOrder>
|
||||
<TextRegion id="region0000">
|
||||
<Coords points="488,133 1197,133 1197,193 488,193"/>
|
||||
<TextEquiv>
|
||||
<Unicode></Unicode>
|
||||
</TextEquiv>
|
||||
</TextRegion>
|
||||
<TextRegion id="region0001">
|
||||
<Coords points="40,221 1198,221 1198,626 40,626"/>
|
||||
<TextLine id="region0001_line0000">
|
||||
<Coords points="40,221 1198,221 1198,281 40,281"/>
|
||||
<TextEquiv conf="0.86">
|
||||
<Unicode>Die ſcheinen uns bald kleine Hügel - bald Hütten x Zelten und bald</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0001_line0001">
|
||||
<Coords points="768,290 879,290 879,325 768,325"/>
|
||||
<TextEquiv conf="0.62">
|
||||
<Unicode>„Bellen</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0001_line0002">
|
||||
<Coords points="86,337 1174,337 1174,396 86,396"/>
|
||||
<TextEquiv conf="0.8">
|
||||
<Unicode>Den Blicken , welche ſie durchlaufen , von weiten öfters vorzuſtellen,</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0001_line0003">
|
||||
<Coords points="88,397 841,397 841,455 88,455"/>
|
||||
<TextEquiv conf="0.84">
|
||||
<Unicode>Sieht man ein ſolch gemähtes Feld - von oben,</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0001_line0004">
|
||||
<Coords points="87,455 1142,455 1142,510 87,510"/>
|
||||
<TextEquiv conf="0.92">
|
||||
<Unicode>Sy gleicht es einem weiten Meer - worauf erhabne Wellen kobeny</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0001_line0005">
|
||||
<Coords points="87,510 1153,510 1153,570 87,570"/>
|
||||
<TextEquiv conf="0.85">
|
||||
<Unicode>Jedoch mit dieſem Unterſcheid - daß, da ſich die beſtändig rühren:</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0001_line0006">
|
||||
<Coords points="88,569 1161,569 1161,626 88,626"/>
|
||||
<TextEquiv conf="0.84">
|
||||
<Unicode>Von einiger Bewegung hier - in dieſen Wellen ; nichts zu ſpähren,</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextEquiv>
|
||||
<Unicode>Die ſcheinen uns bald kleine Hügel - bald Hütten x Zelten und bald
|
||||
„Bellen
|
||||
Den Blicken , welche ſie durchlaufen , von weiten öfters vorzuſtellen,
|
||||
Sieht man ein ſolch gemähtes Feld - von oben,
|
||||
Sy gleicht es einem weiten Meer - worauf erhabne Wellen kobeny
|
||||
Jedoch mit dieſem Unterſcheid - daß, da ſich die beſtändig rühren:
|
||||
Von einiger Bewegung hier - in dieſen Wellen ; nichts zu ſpähren,</Unicode>
|
||||
</TextEquiv>
|
||||
</TextRegion>
|
||||
<TextRegion id="region0002">
|
||||
<Coords points="517,670 745,670 745,716 517,716"/>
|
||||
<TextEquiv>
|
||||
<Unicode></Unicode>
|
||||
</TextEquiv>
|
||||
</TextRegion>
|
||||
<TextRegion id="region0003">
|
||||
<Coords points="243,739 1124,739 1124,1094 243,1094"/>
|
||||
<TextLine id="region0003_line0000">
|
||||
<Coords points="243,739 884,739 884,795 243,795"/>
|
||||
<TextEquiv conf="0.83">
|
||||
<Unicode>Was erhebt des Schöpfers Güte</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0003_line0001">
|
||||
<Coords points="244,792 972,792 972,859 244,859"/>
|
||||
<TextEquiv conf="0.8">
|
||||
<Unicode>Mehr , als dieſes Seegens Meer?</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0003_line0002">
|
||||
<Coords points="243,855 931,855 931,913 243,913"/>
|
||||
<TextEquiv conf="0.83">
|
||||
<Unicode>Kommt dies wohl von ungefehv?</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0003_line0003">
|
||||
<Coords points="244,914 918,914 918,971 244,971"/>
|
||||
<TextEquiv conf="0.84">
|
||||
<Unicode>Nein , rüſt mein erfreut Gemühte</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0003_line0004">
|
||||
<Coords points="245,972 1059,972 1059,1034 245,1034"/>
|
||||
<TextEquiv conf="0.86">
|
||||
<Unicode>Nur von GOTT komint alles hers</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0003_line0005">
|
||||
<Coords points="247,1029 1124,1029 1124,1094 247,1094"/>
|
||||
<TextEquiv conf="0.74">
|
||||
<Unicode>Ihm ſey Preiß und Dan und Ehr!</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextEquiv>
|
||||
<Unicode>Was erhebt des Schöpfers Güte
|
||||
Mehr , als dieſes Seegens Meer?
|
||||
Kommt dies wohl von ungefehv?
|
||||
Nein , rüſt mein erfreut Gemühte
|
||||
Nur von GOTT komint alles hers
|
||||
Ihm ſey Preiß und Dan und Ehr!</Unicode>
|
||||
</TextEquiv>
|
||||
</TextRegion>
|
||||
<TextRegion id="region0004">
|
||||
<Coords points="1043,1096 1204,1096 1204,1136 1043,1136"/>
|
||||
<TextLine id="region0004_line0000">
|
||||
<Coords points="1043,1096 1204,1096 1204,1136 1043,1136"/>
|
||||
<TextEquiv conf="0.8">
|
||||
<Unicode>Da Capo,</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextEquiv>
|
||||
<Unicode>Da Capo,</Unicode>
|
||||
</TextEquiv>
|
||||
</TextRegion>
|
||||
<TextRegion id="region0005">
|
||||
<Coords points="68,1183 1236,1183 1236,2056 68,2056"/>
|
||||
<TextLine id="region0005_line0000">
|
||||
<Coords points="91,1183 1170,1183 1170,1235 91,1235"/>
|
||||
<TextEquiv conf="0.65">
|
||||
<Unicode>Geht man auf einen ſolhen Felde, ſo eben erſi gemäht - ſpaßtiereny</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0005_line0001">
|
||||
<Coords points="89,1236 1182,1236 1182,1289 89,1289"/>
|
||||
<TextEquiv conf="0.73">
|
||||
<Unicode>Das man gewohnt voll Korn zu ſehn; ſo kommen wir uns gröſſer für,</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0005_line0002">
|
||||
<Coords points="89,1294 1208,1294 1208,1346 89,1346"/>
|
||||
<TextEquiv conf="0.85">
|
||||
<Unicode>Das Feld hingegen niedriger. Auch nimmt ſodean ein neuer Scheinz</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0005_line0003">
|
||||
<Coords points="90,1351 519,1351 519,1399 90,1399"/>
|
||||
<TextEquiv conf="0.92">
|
||||
<Unicode>Und eine neue Farben Zier</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0005_line0004">
|
||||
<Coords points="91,1405 561,1405 561,1457 91,1457"/>
|
||||
<TextEquiv conf="0.91">
|
||||
<Unicode>Den erſt gemähten Aker ein,</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0005_line0005">
|
||||
<Coords points="92,1459 1208,1459 1208,1510 92,1510"/>
|
||||
<TextEquiv conf="0.88">
|
||||
<Unicode>Der Grund iſt grün - die Stoppeln gelb und wenn fich unjrer Son-</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0005_line0006">
|
||||
<Coords points="782,1514 1007,1514 1007,1555 782,1555"/>
|
||||
<TextEquiv conf="0.46">
|
||||
<Unicode>nen B;Of</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0005_line0007">
|
||||
<Coords points="68,1562 1177,1562 1177,1617 68,1617"/>
|
||||
<TextEquiv conf="0.82">
|
||||
<Unicode>Un ihre runde glatte Röhren , zumahlen früh und Abends bricht;</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0005_line0008">
|
||||
<Coords points="90,1618 1236,1618 1236,1670 90,1670"/>
|
||||
<TextEquiv conf="0.79">
|
||||
<Unicode>So kann ein Gold kaum ſtärcker glänßen.- Dies macht ein liebliches</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0005_line0009">
|
||||
<Coords points="777,1671 1159,1671 1159,1716 777,1716"/>
|
||||
<TextEquiv conf="0.76">
|
||||
<Unicode>Gemiſche, |</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0005_line0010">
|
||||
<Coords points="92,1722 1211,1722 1211,1783 92,1783"/>
|
||||
<TextEquiv conf="0.7">
|
||||
<Unicode>Zutnahl wenn , in der Nachbarſchaft - ein dumfel-grünendes Gebüſche</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0005_line0011">
|
||||
<Coords points="91,1779 1210,1779 1210,1837 91,1837"/>
|
||||
<TextEquiv conf="0.84">
|
||||
<Unicode>Den gelben Schimmer noch erhöht. Wir ich nun jüngſt, zur Abend Zeif,</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0005_line0012">
|
||||
<Coords points="93,1837 1210,1837 1210,1895 93,1895"/>
|
||||
<TextEquiv conf="0.84">
|
||||
<Unicode>Durch ſo viel ſhwere Scegens-Berge, mit ſanften Schritten, hin und</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0005_line0013">
|
||||
<Coords points="800,1896 914,1896 914,1936 800,1936"/>
|
||||
<TextEquiv conf="0.52">
|
||||
<Unicode>Wieder;</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0005_line0014">
|
||||
<Coords points="92,1943 1212,1943 1212,2001 92,2001"/>
|
||||
<TextEquiv conf="0.74">
|
||||
<Unicode>Gepühret durch des Feldes Schmu, gerühret durc< die Fruchtbarkeitz</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0005_line0015">
|
||||
<Coords points="90,1998 1125,1998 1125,2056 90,2056"/>
|
||||
<TextEquiv conf="0.76">
|
||||
<Unicode>Vergmigt auf meinem Acker gieng - ertönten dieſe meine Lieder:</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextEquiv>
|
||||
<Unicode>Geht man auf einen ſolhen Felde, ſo eben erſi gemäht - ſpaßtiereny
|
||||
Das man gewohnt voll Korn zu ſehn; ſo kommen wir uns gröſſer für,
|
||||
Das Feld hingegen niedriger. Auch nimmt ſodean ein neuer Scheinz
|
||||
Und eine neue Farben Zier
|
||||
Den erſt gemähten Aker ein,
|
||||
Der Grund iſt grün - die Stoppeln gelb und wenn fich unjrer Son-
|
||||
nen B;Of
|
||||
Un ihre runde glatte Röhren , zumahlen früh und Abends bricht;
|
||||
So kann ein Gold kaum ſtärcker glänßen.- Dies macht ein liebliches
|
||||
Gemiſche, |
|
||||
Zutnahl wenn , in der Nachbarſchaft - ein dumfel-grünendes Gebüſche
|
||||
Den gelben Schimmer noch erhöht. Wir ich nun jüngſt, zur Abend Zeif,
|
||||
Durch ſo viel ſhwere Scegens-Berge, mit ſanften Schritten, hin und
|
||||
Wieder;
|
||||
Gepühret durch des Feldes Schmu, gerühret durc< die Fruchtbarkeitz
|
||||
Vergmigt auf meinem Acker gieng - ertönten dieſe meine Lieder:</Unicode>
|
||||
</TextEquiv>
|
||||
</TextRegion>
|
||||
<TextRegion id="region0006">
|
||||
<Coords points="688,2060 1216,2060 1216,2120 688,2120"/>
|
||||
<TextLine id="region0006_line0000">
|
||||
<Coords points="688,2069 787,2069 787,2120 688,2120"/>
|
||||
<TextEquiv conf="0.74">
|
||||
<Unicode>5) 2</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextLine id="region0006_line0001">
|
||||
<Coords points="1044,2060 1216,2060 1216,2105 1044,2105"/>
|
||||
<TextEquiv conf="0.89">
|
||||
<Unicode>ARIA.</Unicode>
|
||||
</TextEquiv>
|
||||
</TextLine>
|
||||
<TextEquiv>
|
||||
<Unicode>5) 2
|
||||
ARIA.</Unicode>
|
||||
</TextEquiv>
|
||||
</TextRegion>
|
||||
</Page>
|
||||
</PcGts>
|
|
@ -0,0 +1,47 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<PcGts xmlns="http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15/pagecontent.xsd">
|
||||
<Metadata>
|
||||
<Creator></Creator>
|
||||
<Created>2019-07-26T13:59:00</Created>
|
||||
<LastChange>2019-07-26T14:00:29</LastChange></Metadata>
|
||||
<Page imageFilename="lorem-ipsum-scan.tif" imageXResolution="300.00000" imageYResolution="300.00000" imageWidth="2481" imageHeight="3508">
|
||||
<TextRegion id="tempReg357564684568544579089">
|
||||
<Coords points="0,0 1,0 1,1 0,1"/>
|
||||
<TextLine id="l0">
|
||||
<Coords points="228,237 228,295 2216,295 2216,237"/>
|
||||
<TextEquiv>
|
||||
<Unicode></Unicode></TextEquiv></TextLine>
|
||||
<TextLine id="l1">
|
||||
<Coords points="228,298 228,348 2160,348 2160,298"/>
|
||||
<TextEquiv>
|
||||
<Unicode></Unicode></TextEquiv></TextLine>
|
||||
<TextLine id="l2">
|
||||
<Coords points="225,348 225,410 2178,410 2178,348"/>
|
||||
<TextEquiv>
|
||||
<Unicode></Unicode></TextEquiv></TextLine>
|
||||
<TextLine id="l3">
|
||||
<Coords points="218,413 218,463 2153,463 2153,413"/>
|
||||
<TextEquiv>
|
||||
<Unicode></Unicode></TextEquiv></TextLine>
|
||||
<TextLine id="l4">
|
||||
<Coords points="225,466 225,522 2153,522 2153,466"/>
|
||||
<TextEquiv>
|
||||
<Unicode></Unicode></TextEquiv></TextLine>
|
||||
<TextLine id="l5">
|
||||
<Coords points="216,524 216,581 2187,581 2187,524"/>
|
||||
<TextEquiv>
|
||||
<Unicode></Unicode></TextEquiv></TextLine>
|
||||
<TextLine id="l6">
|
||||
<Coords points="219,584 219,640 542,640 542,584"/>
|
||||
<TextEquiv>
|
||||
<Unicode></Unicode></TextEquiv></TextLine></TextRegion>
|
||||
<TextRegion id="r7" type="paragraph">
|
||||
<Coords points="204,212 204,651 2227,651 2227,212"/>
|
||||
<TextEquiv>
|
||||
<Unicode>Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt
|
||||
ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo
|
||||
dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit
|
||||
amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor
|
||||
invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et
|
||||
justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum
|
||||
dolor sit amet.</Unicode></TextEquiv></TextRegion></Page></PcGts>
|
|
@ -0,0 +1,139 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<alto xmlns="http://www.loc.gov/standards/alto/ns-v3#" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v3# http://www.loc.gov/alto/v3/alto-3-0.xsd">
|
||||
<Description>
|
||||
<MeasurementUnit>pixel</MeasurementUnit>
|
||||
<sourceImageInformation>
|
||||
<fileName> </fileName>
|
||||
</sourceImageInformation>
|
||||
<OCRProcessing ID="OCR_0">
|
||||
<ocrProcessingStep>
|
||||
<processingSoftware>
|
||||
<softwareName>tesseract 4.1.0-rc4</softwareName>
|
||||
</processingSoftware>
|
||||
</ocrProcessingStep>
|
||||
</OCRProcessing>
|
||||
</Description>
|
||||
<Layout>
|
||||
<Page WIDTH="2481" HEIGHT="3508" PHYSICAL_IMG_NR="0" ID="page_0">
|
||||
<PrintSpace HPOS="0" VPOS="0" WIDTH="2481" HEIGHT="3508">
|
||||
<TextBlock ID="block_0" HPOS="209" VPOS="258" WIDTH="1954" HEIGHT="437">
|
||||
<TextLine ID="line_0" HPOS="209" VPOS="258" WIDTH="1954" HEIGHT="103">
|
||||
<String ID="string_0" HPOS="209" VPOS="319" WIDTH="134" HEIGHT="34" WC="0.96" CONTENT="Lorem"/><SP WIDTH="13" VPOS="319" HPOS="343"/>
|
||||
<String ID="string_1" HPOS="356" VPOS="316" WIDTH="121" HEIGHT="45" WC="0.96" CONTENT="ipsum"/><SP WIDTH="14" VPOS="316" HPOS="477"/>
|
||||
<String ID="string_2" HPOS="491" VPOS="312" WIDTH="102" HEIGHT="36" WC="0.96" CONTENT="dolor"/><SP WIDTH="15" VPOS="312" HPOS="593"/>
|
||||
<String ID="string_3" HPOS="608" VPOS="309" WIDTH="46" HEIGHT="35" WC="0.96" CONTENT="sit"/><SP WIDTH="14" VPOS="309" HPOS="654"/>
|
||||
<String ID="string_4" HPOS="668" VPOS="311" WIDTH="106" HEIGHT="37" WC="0.96" CONTENT="amet,"/><SP WIDTH="16" VPOS="311" HPOS="774"/>
|
||||
<String ID="string_5" HPOS="790" VPOS="307" WIDTH="201" HEIGHT="32" WC="0.88" CONTENT="consetetur"/><SP WIDTH="14" VPOS="307" HPOS="991"/>
|
||||
<String ID="string_6" HPOS="1005" VPOS="297" WIDTH="205" HEIGHT="46" WC="0.96" CONTENT="sadipscing"/><SP WIDTH="15" VPOS="297" HPOS="1210"/>
|
||||
<String ID="string_7" HPOS="1225" VPOS="293" WIDTH="84" HEIGHT="42" WC="0.91" CONTENT="elitr,"/><SP WIDTH="16" VPOS="293" HPOS="1309"/>
|
||||
<String ID="string_8" HPOS="1325" VPOS="289" WIDTH="65" HEIGHT="38" WC="0.96" CONTENT="sed"/><SP WIDTH="14" VPOS="289" HPOS="1390"/>
|
||||
<String ID="string_9" HPOS="1404" VPOS="286" WIDTH="97" HEIGHT="36" WC="0.93" CONTENT="diam"/><SP WIDTH="14" VPOS="286" HPOS="1501"/>
|
||||
<String ID="string_10" HPOS="1515" VPOS="291" WIDTH="100" HEIGHT="24" WC="0.69" CONTENT="nonu"/><SP WIDTH="32" VPOS="291" HPOS="1615"/>
|
||||
<String ID="string_11" HPOS="1647" VPOS="285" WIDTH="30" HEIGHT="36" WC="0.37" CONTENT="yy"/><SP WIDTH="17" VPOS="285" HPOS="1677"/>
|
||||
<String ID="string_12" HPOS="1694" VPOS="268" WIDTH="140" HEIGHT="42" WC="0.93" CONTENT="eirmod"/><SP WIDTH="11" VPOS="268" HPOS="1834"/>
|
||||
<String ID="string_13" HPOS="1845" VPOS="273" WIDTH="139" HEIGHT="37" WC="0.96" CONTENT="tempor"/><SP WIDTH="15" VPOS="273" HPOS="1984"/>
|
||||
<String ID="string_14" HPOS="1999" VPOS="258" WIDTH="164" HEIGHT="38" WC="0.95" CONTENT="invidunt"/>
|
||||
</TextLine>
|
||||
<TextLine ID="line_1" HPOS="211" VPOS="315" WIDTH="1904" HEIGHT="102">
|
||||
<String ID="string_15" HPOS="211" VPOS="380" WIDTH="39" HEIGHT="31" WC="0.96" CONTENT="ut"/><SP WIDTH="13" VPOS="380" HPOS="250"/>
|
||||
<String ID="string_16" HPOS="263" VPOS="373" WIDTH="123" HEIGHT="44" WC="0.96" CONTENT="labore"/><SP WIDTH="16" VPOS="373" HPOS="386"/>
|
||||
<String ID="string_17" HPOS="402" VPOS="379" WIDTH="33" HEIGHT="27" WC="0.95" CONTENT="et"/><SP WIDTH="14" VPOS="379" HPOS="435"/>
|
||||
<String ID="string_18" HPOS="449" VPOS="370" WIDTH="123" HEIGHT="36" WC="0.95" CONTENT="dolore"/><SP WIDTH="15" VPOS="370" HPOS="572"/>
|
||||
<String ID="string_19" HPOS="587" VPOS="374" WIDTH="133" HEIGHT="37" WC="0.96" CONTENT="magna"/><SP WIDTH="14" VPOS="374" HPOS="720"/>
|
||||
<String ID="string_20" HPOS="734" VPOS="363" WIDTH="183" HEIGHT="43" WC="0.96" CONTENT="aliquyam"/><SP WIDTH="14" VPOS="363" HPOS="917"/>
|
||||
<String ID="string_21" HPOS="931" VPOS="360" WIDTH="82" HEIGHT="36" WC="0.95" CONTENT="erat,"/><SP WIDTH="17" VPOS="360" HPOS="1013"/>
|
||||
<String ID="string_22" HPOS="1030" VPOS="354" WIDTH="65" HEIGHT="35" WC="0.96" CONTENT="sed"/><SP WIDTH="13" VPOS="354" HPOS="1095"/>
|
||||
<String ID="string_23" HPOS="1108" VPOS="352" WIDTH="96" HEIGHT="36" WC="0.96" CONTENT="diam"/><SP WIDTH="13" VPOS="352" HPOS="1204"/>
|
||||
<String ID="string_24" HPOS="1217" VPOS="350" WIDTH="181" HEIGHT="44" WC="0.95" CONTENT="voluptua."/><SP WIDTH="13" VPOS="350" HPOS="1398"/>
|
||||
<String ID="string_25" HPOS="1411" VPOS="345" WIDTH="49" HEIGHT="34" WC="0.95" CONTENT="At"/><SP WIDTH="11" VPOS="345" HPOS="1460"/>
|
||||
<String ID="string_26" HPOS="1471" VPOS="348" WIDTH="88" HEIGHT="26" WC="0.93" CONTENT="Vero"/><SP WIDTH="16" VPOS="348" HPOS="1559"/>
|
||||
<String ID="string_27" HPOS="1575" VPOS="345" WIDTH="65" HEIGHT="26" WC="0.96" CONTENT="eos"/><SP WIDTH="15" VPOS="345" HPOS="1640"/>
|
||||
<String ID="string_28" HPOS="1655" VPOS="339" WIDTH="36" HEIGHT="29" WC="0.96" CONTENT="et"/><SP WIDTH="14" VPOS="339" HPOS="1691"/>
|
||||
<String ID="string_29" HPOS="1705" VPOS="336" WIDTH="168" HEIGHT="31" WC="0.87" CONTENT="accusam"/><SP WIDTH="15" VPOS="336" HPOS="1873"/>
|
||||
<String ID="string_30" HPOS="1888" VPOS="329" WIDTH="34" HEIGHT="28" WC="0.96" CONTENT="et"/><SP WIDTH="11" VPOS="329" HPOS="1922"/>
|
||||
<String ID="string_31" HPOS="1933" VPOS="322" WIDTH="96" HEIGHT="44" WC="0.96" CONTENT="justo"/><SP WIDTH="15" VPOS="322" HPOS="2029"/>
|
||||
<String ID="string_32" HPOS="2044" VPOS="315" WIDTH="71" HEIGHT="63" WC="0.96" CONTENT="duo"/>
|
||||
</TextLine>
|
||||
<TextLine ID="line_2" HPOS="214" VPOS="375" WIDTH="1919" HEIGHT="93">
|
||||
<String ID="string_33" HPOS="214" VPOS="431" WIDTH="144" HEIGHT="37" WC="0.96" CONTENT="dolores"/><SP WIDTH="16" VPOS="431" HPOS="358"/>
|
||||
<String ID="string_34" HPOS="374" VPOS="433" WIDTH="34" HEIGHT="31" WC="0.96" CONTENT="et"/><SP WIDTH="14" VPOS="433" HPOS="408"/>
|
||||
<String ID="string_35" HPOS="422" VPOS="437" WIDTH="42" HEIGHT="25" WC="0.96" CONTENT="ea"/><SP WIDTH="13" VPOS="437" HPOS="464"/>
|
||||
<String ID="string_36" HPOS="477" VPOS="426" WIDTH="136" HEIGHT="35" WC="0.96" CONTENT="rebum."/><SP WIDTH="18" VPOS="426" HPOS="613"/>
|
||||
<String ID="string_37" HPOS="631" VPOS="424" WIDTH="75" HEIGHT="34" WC="0.96" CONTENT="Stet"/><SP WIDTH="14" VPOS="424" HPOS="706"/>
|
||||
<String ID="string_38" HPOS="720" VPOS="419" WIDTH="85" HEIGHT="36" WC="0.96" CONTENT="clita"/><SP WIDTH="13" VPOS="419" HPOS="805"/>
|
||||
<String ID="string_39" HPOS="818" VPOS="415" WIDTH="90" HEIGHT="35" WC="0.97" CONTENT="kasd"/><SP WIDTH="14" VPOS="415" HPOS="908"/>
|
||||
<String ID="string_40" HPOS="922" VPOS="412" WIDTH="206" HEIGHT="48" WC="0.96" CONTENT="gubergren,"/><SP WIDTH="16" VPOS="412" HPOS="1128"/>
|
||||
<String ID="string_41" HPOS="1144" VPOS="417" WIDTH="47" HEIGHT="26" WC="0.97" CONTENT="no"/><SP WIDTH="16" VPOS="417" HPOS="1191"/>
|
||||
<String ID="string_42" HPOS="1207" VPOS="415" WIDTH="61" HEIGHT="25" WC="0.96" CONTENT="sea"/><SP WIDTH="13" VPOS="415" HPOS="1268"/>
|
||||
<String ID="string_43" HPOS="1281" VPOS="405" WIDTH="169" HEIGHT="36" WC="0.91" CONTENT="iakimata"/><SP WIDTH="14" VPOS="405" HPOS="1450"/>
|
||||
<String ID="string_44" HPOS="1464" VPOS="400" WIDTH="144" HEIGHT="33" WC="0.96" CONTENT="sanctus"/><SP WIDTH="16" VPOS="400" HPOS="1608"/>
|
||||
<String ID="string_45" HPOS="1624" VPOS="397" WIDTH="54" HEIGHT="29" WC="0.97" CONTENT="est"/><SP WIDTH="13" VPOS="397" HPOS="1678"/>
|
||||
<String ID="string_46" HPOS="1691" VPOS="390" WIDTH="132" HEIGHT="34" WC="0.96" CONTENT="Lorem"/><SP WIDTH="14" VPOS="390" HPOS="1823"/>
|
||||
<String ID="string_47" HPOS="1837" VPOS="383" WIDTH="120" HEIGHT="44" WC="0.96" CONTENT="ipsum"/><SP WIDTH="14" VPOS="383" HPOS="1957"/>
|
||||
<String ID="string_48" HPOS="1971" VPOS="375" WIDTH="102" HEIGHT="37" WC="0.96" CONTENT="dolor"/><SP WIDTH="15" VPOS="375" HPOS="2073"/>
|
||||
<String ID="string_49" HPOS="2088" VPOS="377" WIDTH="45" HEIGHT="31" WC="0.96" CONTENT="sit"/>
|
||||
</TextLine>
|
||||
<TextLine ID="line_3" HPOS="215" VPOS="435" WIDTH="1896" HEIGHT="93">
|
||||
<String ID="string_50" HPOS="215" VPOS="494" WIDTH="106" HEIGHT="32" WC="0.96" CONTENT="amet."/><SP WIDTH="16" VPOS="494" HPOS="321"/>
|
||||
<String ID="string_51" HPOS="337" VPOS="488" WIDTH="130" HEIGHT="33" WC="0.96" CONTENT="Lorem"/><SP WIDTH="14" VPOS="488" HPOS="467"/>
|
||||
<String ID="string_52" HPOS="481" VPOS="484" WIDTH="121" HEIGHT="44" WC="0.96" CONTENT="ipsum"/><SP WIDTH="14" VPOS="484" HPOS="602"/>
|
||||
<String ID="string_53" HPOS="616" VPOS="479" WIDTH="104" HEIGHT="37" WC="0.96" CONTENT="dolor"/><SP WIDTH="14" VPOS="479" HPOS="720"/>
|
||||
<String ID="string_54" HPOS="734" VPOS="476" WIDTH="46" HEIGHT="36" WC="0.93" CONTENT="sit"/><SP WIDTH="14" VPOS="476" HPOS="780"/>
|
||||
<String ID="string_55" HPOS="794" VPOS="477" WIDTH="104" HEIGHT="36" WC="0.75" CONTENT="armet,"/><SP WIDTH="17" VPOS="477" HPOS="898"/>
|
||||
<String ID="string_56" HPOS="915" VPOS="474" WIDTH="200" HEIGHT="30" WC="0.97" CONTENT="consetetur"/><SP WIDTH="14" VPOS="474" HPOS="1115"/>
|
||||
<String ID="string_57" HPOS="1129" VPOS="463" WIDTH="205" HEIGHT="45" WC="0.96" CONTENT="sadipscing"/><SP WIDTH="15" VPOS="463" HPOS="1334"/>
|
||||
<String ID="string_58" HPOS="1349" VPOS="457" WIDTH="86" HEIGHT="41" WC="0.96" CONTENT="elitr,"/><SP WIDTH="16" VPOS="457" HPOS="1435"/>
|
||||
<String ID="string_59" HPOS="1451" VPOS="452" WIDTH="65" HEIGHT="39" WC="0.96" CONTENT="sed"/><SP WIDTH="14" VPOS="452" HPOS="1516"/>
|
||||
<String ID="string_60" HPOS="1530" VPOS="449" WIDTH="99" HEIGHT="36" WC="0.93" CONTENT="diam"/><SP WIDTH="14" VPOS="449" HPOS="1629"/>
|
||||
<String ID="string_61" HPOS="1643" VPOS="451" WIDTH="162" HEIGHT="36" WC="0.59" CONTENT="nonurny"/><SP WIDTH="16" VPOS="451" HPOS="1805"/>
|
||||
<String ID="string_62" HPOS="1821" VPOS="435" WIDTH="138" HEIGHT="39" WC="0.96" CONTENT="eirmod"/><SP WIDTH="12" VPOS="435" HPOS="1959"/>
|
||||
<String ID="string_63" HPOS="1971" VPOS="440" WIDTH="140" HEIGHT="37" WC="0.96" CONTENT="tempor"/>
|
||||
</TextLine>
|
||||
<TextLine ID="line_4" HPOS="216" VPOS="483" WIDTH="1888" HEIGHT="97">
|
||||
<String ID="string_64" HPOS="216" VPOS="543" WIDTH="165" HEIGHT="37" WC="0.97" CONTENT="invidunt"/><SP WIDTH="13" VPOS="543" HPOS="381"/>
|
||||
<String ID="string_65" HPOS="394" VPOS="546" WIDTH="39" HEIGHT="30" WC="0.97" CONTENT="ut"/><SP WIDTH="12" VPOS="546" HPOS="433"/>
|
||||
<String ID="string_66" HPOS="445" VPOS="539" WIDTH="122" HEIGHT="36" WC="0.96" CONTENT="labore"/><SP WIDTH="16" VPOS="539" HPOS="567"/>
|
||||
<String ID="string_67" HPOS="583" VPOS="543" WIDTH="35" HEIGHT="29" WC="0.96" CONTENT="et"/><SP WIDTH="14" VPOS="543" HPOS="618"/>
|
||||
<String ID="string_68" HPOS="632" VPOS="536" WIDTH="125" HEIGHT="34" WC="0.96" CONTENT="dolore"/><SP WIDTH="14" VPOS="536" HPOS="757"/>
|
||||
<String ID="string_69" HPOS="771" VPOS="539" WIDTH="131" HEIGHT="37" WC="0.46" CONTENT="magna"/><SP WIDTH="14" VPOS="539" HPOS="902"/>
|
||||
<String ID="string_70" HPOS="916" VPOS="526" WIDTH="182" HEIGHT="45" WC="0.96" CONTENT="aliquyam"/><SP WIDTH="14" VPOS="526" HPOS="1098"/>
|
||||
<String ID="string_71" HPOS="1112" VPOS="527" WIDTH="82" HEIGHT="37" WC="0.96" CONTENT="erat,"/><SP WIDTH="17" VPOS="527" HPOS="1194"/>
|
||||
<String ID="string_72" HPOS="1211" VPOS="519" WIDTH="63" HEIGHT="36" WC="0.97" CONTENT="sed"/><SP WIDTH="14" VPOS="519" HPOS="1274"/>
|
||||
<String ID="string_73" HPOS="1288" VPOS="517" WIDTH="97" HEIGHT="37" WC="0.96" CONTENT="diam"/><SP WIDTH="11" VPOS="517" HPOS="1385"/>
|
||||
<String ID="string_74" HPOS="1396" VPOS="513" WIDTH="185" HEIGHT="44" WC="0.96" CONTENT="voluptua."/><SP WIDTH="14" VPOS="513" HPOS="1581"/>
|
||||
<String ID="string_75" HPOS="1595" VPOS="505" WIDTH="50" HEIGHT="35" WC="0.96" CONTENT="At"/><SP WIDTH="11" VPOS="505" HPOS="1645"/>
|
||||
<String ID="string_76" HPOS="1656" VPOS="511" WIDTH="89" HEIGHT="27" WC="0.96" CONTENT="vero"/><SP WIDTH="16" VPOS="511" HPOS="1745"/>
|
||||
<String ID="string_77" HPOS="1761" VPOS="508" WIDTH="63" HEIGHT="26" WC="0.96" CONTENT="eos"/><SP WIDTH="15" VPOS="508" HPOS="1824"/>
|
||||
<String ID="string_78" HPOS="1839" VPOS="501" WIDTH="35" HEIGHT="30" WC="0.97" CONTENT="et"/><SP WIDTH="13" VPOS="501" HPOS="1874"/>
|
||||
<String ID="string_79" HPOS="1887" VPOS="499" WIDTH="168" HEIGHT="53" WC="0.80" CONTENT="accusam"/><SP WIDTH="-3" VPOS="499" HPOS="2055"/>
|
||||
<String ID="string_80" HPOS="2052" VPOS="483" WIDTH="52" HEIGHT="55" WC="0.97" CONTENT="et"/>
|
||||
</TextLine>
|
||||
<TextLine ID="line_5" HPOS="215" VPOS="552" WIDTH="1941" HEIGHT="97">
|
||||
<String ID="string_81" HPOS="215" VPOS="604" WIDTH="97" HEIGHT="45" WC="0.97" CONTENT="justo"/><SP WIDTH="16" VPOS="604" HPOS="312"/>
|
||||
<String ID="string_82" HPOS="328" VPOS="600" WIDTH="71" HEIGHT="35" WC="0.97" CONTENT="duo"/><SP WIDTH="16" VPOS="600" HPOS="399"/>
|
||||
<String ID="string_83" HPOS="415" VPOS="597" WIDTH="143" HEIGHT="36" WC="0.93" CONTENT="dolores"/><SP WIDTH="16" VPOS="597" HPOS="558"/>
|
||||
<String ID="string_84" HPOS="574" VPOS="600" WIDTH="34" HEIGHT="29" WC="0.96" CONTENT="et"/><SP WIDTH="14" VPOS="600" HPOS="608"/>
|
||||
<String ID="string_85" HPOS="622" VPOS="602" WIDTH="43" HEIGHT="26" WC="0.96" CONTENT="ea"/><SP WIDTH="13" VPOS="602" HPOS="665"/>
|
||||
<String ID="string_86" HPOS="678" VPOS="590" WIDTH="136" HEIGHT="36" WC="0.96" CONTENT="rebum."/><SP WIDTH="19" VPOS="590" HPOS="814"/>
|
||||
<String ID="string_87" HPOS="833" VPOS="588" WIDTH="74" HEIGHT="34" WC="0.96" CONTENT="Stet"/><SP WIDTH="14" VPOS="588" HPOS="907"/>
|
||||
<String ID="string_88" HPOS="921" VPOS="584" WIDTH="83" HEIGHT="36" WC="0.96" CONTENT="clita"/><SP WIDTH="12" VPOS="584" HPOS="1004"/>
|
||||
<String ID="string_89" HPOS="1016" VPOS="580" WIDTH="90" HEIGHT="36" WC="0.97" CONTENT="kasd"/><SP WIDTH="15" VPOS="580" HPOS="1106"/>
|
||||
<String ID="string_90" HPOS="1121" VPOS="578" WIDTH="205" HEIGHT="47" WC="0.96" CONTENT="gubergren,"/><SP WIDTH="16" VPOS="578" HPOS="1326"/>
|
||||
<String ID="string_91" HPOS="1342" VPOS="582" WIDTH="47" HEIGHT="25" WC="0.96" CONTENT="no"/><SP WIDTH="16" VPOS="582" HPOS="1389"/>
|
||||
<String ID="string_92" HPOS="1405" VPOS="581" WIDTH="62" HEIGHT="26" WC="0.97" CONTENT="sea"/><SP WIDTH="13" VPOS="581" HPOS="1467"/>
|
||||
<String ID="string_93" HPOS="1480" VPOS="566" WIDTH="172" HEIGHT="38" WC="0.96" CONTENT="takimata"/><SP WIDTH="14" VPOS="566" HPOS="1652"/>
|
||||
<String ID="string_94" HPOS="1666" VPOS="563" WIDTH="145" HEIGHT="33" WC="0.97" CONTENT="sanctus"/><SP WIDTH="15" VPOS="563" HPOS="1811"/>
|
||||
<String ID="string_95" HPOS="1826" VPOS="558" WIDTH="54" HEIGHT="30" WC="0.97" CONTENT="est"/><SP WIDTH="12" VPOS="558" HPOS="1880"/>
|
||||
<String ID="string_96" HPOS="1892" VPOS="552" WIDTH="130" HEIGHT="34" WC="0.96" CONTENT="Lorem"/><SP WIDTH="15" VPOS="552" HPOS="2022"/>
|
||||
<String ID="string_97" HPOS="2037" VPOS="553" WIDTH="119" HEIGHT="37" WC="0.51" CONTENT="Ipsum"/>
|
||||
</TextLine>
|
||||
<TextLine ID="line_6" HPOS="219" VPOS="657" WIDTH="282" HEIGHT="38">
|
||||
<String ID="string_98" HPOS="219" VPOS="658" WIDTH="104" HEIGHT="37" WC="0.97" CONTENT="dolor"/><SP WIDTH="15" VPOS="658" HPOS="323"/>
|
||||
<String ID="string_99" HPOS="338" VPOS="657" WIDTH="45" HEIGHT="35" WC="0.97" CONTENT="sit"/><SP WIDTH="14" VPOS="657" HPOS="383"/>
|
||||
<String ID="string_100" HPOS="397" VPOS="660" WIDTH="104" HEIGHT="35" WC="0.94" CONTENT="amet."/>
|
||||
</TextLine>
|
||||
</TextBlock>
|
||||
</PrintSpace>
|
||||
</Page>
|
||||
</Layout>
|
||||
</alto>
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,47 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<PcGts xmlns="http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15/pagecontent.xsd">
|
||||
<Metadata>
|
||||
<Creator></Creator>
|
||||
<Created>2019-07-26T13:59:00</Created>
|
||||
<LastChange>2019-07-26T14:00:29</LastChange></Metadata>
|
||||
<Page imageFilename="lorem-ipsum-scan.tif" imageXResolution="300.00000" imageYResolution="300.00000" imageWidth="2481" imageHeight="3508">
|
||||
<TextRegion id="tempReg357564684568544579089">
|
||||
<Coords points="0,0 1,0 1,1 0,1"/>
|
||||
<TextLine id="l0">
|
||||
<Coords points="228,237 228,295 2216,295 2216,237"/>
|
||||
<TextEquiv>
|
||||
<Unicode></Unicode></TextEquiv></TextLine>
|
||||
<TextLine id="l1">
|
||||
<Coords points="228,298 228,348 2160,348 2160,298"/>
|
||||
<TextEquiv>
|
||||
<Unicode></Unicode></TextEquiv></TextLine>
|
||||
<TextLine id="l2">
|
||||
<Coords points="225,348 225,410 2178,410 2178,348"/>
|
||||
<TextEquiv>
|
||||
<Unicode></Unicode></TextEquiv></TextLine>
|
||||
<TextLine id="l3">
|
||||
<Coords points="218,413 218,463 2153,463 2153,413"/>
|
||||
<TextEquiv>
|
||||
<Unicode></Unicode></TextEquiv></TextLine>
|
||||
<TextLine id="l4">
|
||||
<Coords points="225,466 225,522 2153,522 2153,466"/>
|
||||
<TextEquiv>
|
||||
<Unicode></Unicode></TextEquiv></TextLine>
|
||||
<TextLine id="l5">
|
||||
<Coords points="216,524 216,581 2187,581 2187,524"/>
|
||||
<TextEquiv>
|
||||
<Unicode></Unicode></TextEquiv></TextLine>
|
||||
<TextLine id="l6">
|
||||
<Coords points="219,584 219,640 542,640 542,584"/>
|
||||
<TextEquiv>
|
||||
<Unicode></Unicode></TextEquiv></TextLine></TextRegion>
|
||||
<TextRegion id="r7" type="paragraph">
|
||||
<Coords points="204,212 204,651 2227,651 2227,212"/>
|
||||
<TextEquiv>
|
||||
<Unicode>Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt
|
||||
ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo
|
||||
dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit
|
||||
amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor
|
||||
invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et
|
||||
justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum
|
||||
dolor sit amet.</Unicode></TextEquiv></TextRegion></Page></PcGts>
|
|
@ -0,0 +1,138 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<alto xmlns="http://www.loc.gov/standards/alto/ns-v3#" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v3# http://www.loc.gov/alto/v3/alto-3-0.xsd">
|
||||
<Description>
|
||||
<MeasurementUnit>pixel</MeasurementUnit>
|
||||
<sourceImageInformation>
|
||||
<fileName> </fileName>
|
||||
</sourceImageInformation>
|
||||
<OCRProcessing ID="OCR_0">
|
||||
<ocrProcessingStep>
|
||||
<processingSoftware>
|
||||
<softwareName>tesseract 4.1.0-rc4</softwareName>
|
||||
</processingSoftware>
|
||||
</ocrProcessingStep>
|
||||
</OCRProcessing>
|
||||
</Description>
|
||||
<Layout>
|
||||
<Page WIDTH="2481" HEIGHT="3508" PHYSICAL_IMG_NR="0" ID="page_0">
|
||||
<PrintSpace HPOS="0" VPOS="0" WIDTH="2481" HEIGHT="3508">
|
||||
<TextBlock ID="block_0" HPOS="234" VPOS="244" WIDTH="1966" HEIGHT="387">
|
||||
<TextLine ID="line_0" HPOS="237" VPOS="244" WIDTH="1963" HEIGHT="48">
|
||||
<String ID="string_0" HPOS="237" VPOS="248" WIDTH="133" HEIGHT="34" WC="0.96" CONTENT="Lorem"/><SP WIDTH="14" VPOS="248" HPOS="370"/>
|
||||
<String ID="string_1" HPOS="384" VPOS="247" WIDTH="120" HEIGHT="45" WC="0.96" CONTENT="ipsum"/><SP WIDTH="15" VPOS="247" HPOS="504"/>
|
||||
<String ID="string_2" HPOS="519" VPOS="246" WIDTH="103" HEIGHT="36" WC="0.96" CONTENT="dolor"/><SP WIDTH="14" VPOS="246" HPOS="622"/>
|
||||
<String ID="string_3" HPOS="636" VPOS="247" WIDTH="46" HEIGHT="35" WC="0.96" CONTENT="sit"/><SP WIDTH="14" VPOS="247" HPOS="682"/>
|
||||
<String ID="string_4" HPOS="696" VPOS="252" WIDTH="105" HEIGHT="36" WC="0.97" CONTENT="amet,"/><SP WIDTH="17" VPOS="252" HPOS="801"/>
|
||||
<String ID="string_5" HPOS="818" VPOS="251" WIDTH="202" HEIGHT="30" WC="0.96" CONTENT="consetetur"/><SP WIDTH="14" VPOS="251" HPOS="1020"/>
|
||||
<String ID="string_6" HPOS="1034" VPOS="244" WIDTH="207" HEIGHT="46" WC="0.96" CONTENT="sadipscing"/><SP WIDTH="15" VPOS="244" HPOS="1241"/>
|
||||
<String ID="string_7" HPOS="1256" VPOS="244" WIDTH="86" HEIGHT="43" WC="0.96" CONTENT="elitr,"/><SP WIDTH="16" VPOS="244" HPOS="1342"/>
|
||||
<String ID="string_8" HPOS="1358" VPOS="244" WIDTH="65" HEIGHT="36" WC="0.96" CONTENT="sed"/><SP WIDTH="15" VPOS="244" HPOS="1423"/>
|
||||
<String ID="string_9" HPOS="1438" VPOS="244" WIDTH="99" HEIGHT="36" WC="0.96" CONTENT="diam"/><SP WIDTH="14" VPOS="244" HPOS="1537"/>
|
||||
<String ID="string_10" HPOS="1551" VPOS="255" WIDTH="164" HEIGHT="35" WC="0.97" CONTENT="nonumy"/><SP WIDTH="15" VPOS="255" HPOS="1715"/>
|
||||
<String ID="string_11" HPOS="1730" VPOS="244" WIDTH="139" HEIGHT="36" WC="0.96" CONTENT="eirmod"/><SP WIDTH="13" VPOS="244" HPOS="1869"/>
|
||||
<String ID="string_12" HPOS="1882" VPOS="250" WIDTH="140" HEIGHT="40" WC="0.96" CONTENT="tempor"/><SP WIDTH="13" VPOS="250" HPOS="2022"/>
|
||||
<String ID="string_13" HPOS="2035" VPOS="244" WIDTH="165" HEIGHT="35" WC="0.96" CONTENT="invidunt"/>
|
||||
</TextLine>
|
||||
<TextLine ID="line_1" HPOS="237" VPOS="301" WIDTH="1913" HEIGHT="49">
|
||||
<String ID="string_14" HPOS="237" VPOS="310" WIDTH="39" HEIGHT="29" WC="0.96" CONTENT="ut"/><SP WIDTH="13" VPOS="310" HPOS="276"/>
|
||||
<String ID="string_15" HPOS="289" VPOS="304" WIDTH="123" HEIGHT="44" WC="0.96" CONTENT="labore"/><SP WIDTH="16" VPOS="304" HPOS="412"/>
|
||||
<String ID="string_16" HPOS="428" VPOS="310" WIDTH="34" HEIGHT="29" WC="0.97" CONTENT="et"/><SP WIDTH="14" VPOS="310" HPOS="462"/>
|
||||
<String ID="string_17" HPOS="476" VPOS="304" WIDTH="123" HEIGHT="36" WC="0.96" CONTENT="dolore"/><SP WIDTH="15" VPOS="304" HPOS="599"/>
|
||||
<String ID="string_18" HPOS="614" VPOS="313" WIDTH="133" HEIGHT="37" WC="0.96" CONTENT="magna"/><SP WIDTH="14" VPOS="313" HPOS="747"/>
|
||||
<String ID="string_19" HPOS="761" VPOS="302" WIDTH="183" HEIGHT="46" WC="0.96" CONTENT="aliquyam"/><SP WIDTH="15" VPOS="302" HPOS="944"/>
|
||||
<String ID="string_20" HPOS="959" VPOS="308" WIDTH="81" HEIGHT="36" WC="0.96" CONTENT="erat,"/><SP WIDTH="17" VPOS="308" HPOS="1040"/>
|
||||
<String ID="string_21" HPOS="1057" VPOS="301" WIDTH="65" HEIGHT="36" WC="0.96" CONTENT="sed"/><SP WIDTH="14" VPOS="301" HPOS="1122"/>
|
||||
<String ID="string_22" HPOS="1136" VPOS="301" WIDTH="97" HEIGHT="36" WC="0.95" CONTENT="diam"/><SP WIDTH="13" VPOS="301" HPOS="1233"/>
|
||||
<String ID="string_23" HPOS="1246" VPOS="301" WIDTH="183" HEIGHT="46" WC="0.96" CONTENT="voluptua."/><SP WIDTH="13" VPOS="301" HPOS="1429"/>
|
||||
<String ID="string_24" HPOS="1442" VPOS="303" WIDTH="51" HEIGHT="34" WC="0.96" CONTENT="At"/><SP WIDTH="12" VPOS="303" HPOS="1493"/>
|
||||
<String ID="string_25" HPOS="1505" VPOS="312" WIDTH="88" HEIGHT="25" WC="0.96" CONTENT="vero"/><SP WIDTH="17" VPOS="312" HPOS="1593"/>
|
||||
<String ID="string_26" HPOS="1610" VPOS="312" WIDTH="64" HEIGHT="25" WC="0.96" CONTENT="eos"/><SP WIDTH="16" VPOS="312" HPOS="1674"/>
|
||||
<String ID="string_27" HPOS="1690" VPOS="308" WIDTH="35" HEIGHT="29" WC="0.96" CONTENT="et"/><SP WIDTH="14" VPOS="308" HPOS="1725"/>
|
||||
<String ID="string_28" HPOS="1739" VPOS="312" WIDTH="168" HEIGHT="25" WC="0.96" CONTENT="accusam"/><SP WIDTH="15" VPOS="312" HPOS="1907"/>
|
||||
<String ID="string_29" HPOS="1922" VPOS="308" WIDTH="34" HEIGHT="29" WC="0.97" CONTENT="et"/><SP WIDTH="11" VPOS="308" HPOS="1956"/>
|
||||
<String ID="string_30" HPOS="1967" VPOS="302" WIDTH="96" HEIGHT="45" WC="0.97" CONTENT="justo"/><SP WIDTH="16" VPOS="302" HPOS="2063"/>
|
||||
<String ID="string_31" HPOS="2079" VPOS="301" WIDTH="71" HEIGHT="36" WC="0.96" CONTENT="duo"/>
|
||||
</TextLine>
|
||||
<TextLine ID="line_2" HPOS="238" VPOS="359" WIDTH="1928" HEIGHT="46">
|
||||
<String ID="string_32" HPOS="238" VPOS="361" WIDTH="144" HEIGHT="36" WC="0.96" CONTENT="dolores"/><SP WIDTH="16" VPOS="361" HPOS="382"/>
|
||||
<String ID="string_33" HPOS="398" VPOS="368" WIDTH="34" HEIGHT="29" WC="0.96" CONTENT="et"/><SP WIDTH="15" VPOS="368" HPOS="432"/>
|
||||
<String ID="string_34" HPOS="447" VPOS="372" WIDTH="41" HEIGHT="25" WC="0.96" CONTENT="ea"/><SP WIDTH="14" VPOS="372" HPOS="488"/>
|
||||
<String ID="string_35" HPOS="502" VPOS="361" WIDTH="136" HEIGHT="36" WC="0.96" CONTENT="rebum."/><SP WIDTH="19" VPOS="361" HPOS="638"/>
|
||||
<String ID="string_36" HPOS="657" VPOS="363" WIDTH="75" HEIGHT="33" WC="0.97" CONTENT="Stet"/><SP WIDTH="14" VPOS="363" HPOS="732"/>
|
||||
<String ID="string_37" HPOS="746" VPOS="360" WIDTH="84" HEIGHT="36" WC="0.96" CONTENT="clita"/><SP WIDTH="13" VPOS="360" HPOS="830"/>
|
||||
<String ID="string_38" HPOS="843" VPOS="359" WIDTH="91" HEIGHT="36" WC="0.96" CONTENT="kasd"/><SP WIDTH="13" VPOS="359" HPOS="934"/>
|
||||
<String ID="string_39" HPOS="947" VPOS="359" WIDTH="208" HEIGHT="46" WC="0.96" CONTENT="gubergren,"/><SP WIDTH="16" VPOS="359" HPOS="1155"/>
|
||||
<String ID="string_40" HPOS="1171" VPOS="370" WIDTH="47" HEIGHT="24" WC="0.96" CONTENT="no"/><SP WIDTH="16" VPOS="370" HPOS="1218"/>
|
||||
<String ID="string_41" HPOS="1234" VPOS="370" WIDTH="61" HEIGHT="25" WC="0.96" CONTENT="sea"/><SP WIDTH="13" VPOS="370" HPOS="1295"/>
|
||||
<String ID="string_42" HPOS="1308" VPOS="359" WIDTH="172" HEIGHT="36" WC="0.96" CONTENT="takimata"/><SP WIDTH="15" VPOS="359" HPOS="1480"/>
|
||||
<String ID="string_43" HPOS="1495" VPOS="365" WIDTH="145" HEIGHT="30" WC="0.96" CONTENT="sanctus"/><SP WIDTH="16" VPOS="365" HPOS="1640"/>
|
||||
<String ID="string_44" HPOS="1656" VPOS="365" WIDTH="55" HEIGHT="29" WC="0.96" CONTENT="est"/><SP WIDTH="13" VPOS="365" HPOS="1711"/>
|
||||
<String ID="string_45" HPOS="1724" VPOS="361" WIDTH="131" HEIGHT="33" WC="0.96" CONTENT="Lorem"/><SP WIDTH="15" VPOS="361" HPOS="1855"/>
|
||||
<String ID="string_46" HPOS="1870" VPOS="360" WIDTH="119" HEIGHT="44" WC="0.96" CONTENT="ipsum"/><SP WIDTH="15" VPOS="360" HPOS="1989"/>
|
||||
<String ID="string_47" HPOS="2004" VPOS="359" WIDTH="103" HEIGHT="35" WC="0.96" CONTENT="dolor"/><SP WIDTH="14" VPOS="359" HPOS="2107"/>
|
||||
<String ID="string_48" HPOS="2121" VPOS="360" WIDTH="45" HEIGHT="34" WC="0.96" CONTENT="sit"/>
|
||||
</TextLine>
|
||||
<TextLine ID="line_3" HPOS="238" VPOS="416" WIDTH="1905" HEIGHT="48">
|
||||
<String ID="string_49" HPOS="238" VPOS="425" WIDTH="105" HEIGHT="29" WC="0.96" CONTENT="amet."/><SP WIDTH="16" VPOS="425" HPOS="343"/>
|
||||
<String ID="string_50" HPOS="359" VPOS="421" WIDTH="132" HEIGHT="33" WC="0.96" CONTENT="Lorem"/><SP WIDTH="13" VPOS="421" HPOS="491"/>
|
||||
<String ID="string_51" HPOS="504" VPOS="420" WIDTH="121" HEIGHT="44" WC="0.96" CONTENT="ipsum"/><SP WIDTH="15" VPOS="420" HPOS="625"/>
|
||||
<String ID="string_52" HPOS="640" VPOS="418" WIDTH="104" HEIGHT="36" WC="0.96" CONTENT="dolor"/><SP WIDTH="14" VPOS="418" HPOS="744"/>
|
||||
<String ID="string_53" HPOS="758" VPOS="419" WIDTH="45" HEIGHT="35" WC="0.97" CONTENT="sit"/><SP WIDTH="15" VPOS="419" HPOS="803"/>
|
||||
<String ID="string_54" HPOS="818" VPOS="424" WIDTH="104" HEIGHT="36" WC="0.96" CONTENT="amet,"/><SP WIDTH="17" VPOS="424" HPOS="922"/>
|
||||
<String ID="string_55" HPOS="939" VPOS="422" WIDTH="201" HEIGHT="30" WC="0.96" CONTENT="consetetur"/><SP WIDTH="15" VPOS="422" HPOS="1140"/>
|
||||
<String ID="string_56" HPOS="1155" VPOS="416" WIDTH="207" HEIGHT="46" WC="0.96" CONTENT="sadipscing"/><SP WIDTH="15" VPOS="416" HPOS="1362"/>
|
||||
<String ID="string_57" HPOS="1377" VPOS="417" WIDTH="86" HEIGHT="42" WC="0.96" CONTENT="elitr,"/><SP WIDTH="17" VPOS="417" HPOS="1463"/>
|
||||
<String ID="string_58" HPOS="1480" VPOS="416" WIDTH="66" HEIGHT="36" WC="0.96" CONTENT="sed"/><SP WIDTH="15" VPOS="416" HPOS="1546"/>
|
||||
<String ID="string_59" HPOS="1561" VPOS="416" WIDTH="98" HEIGHT="36" WC="0.96" CONTENT="diam"/><SP WIDTH="14" VPOS="416" HPOS="1659"/>
|
||||
<String ID="string_60" HPOS="1673" VPOS="427" WIDTH="163" HEIGHT="35" WC="0.96" CONTENT="nonumy"/><SP WIDTH="16" VPOS="427" HPOS="1836"/>
|
||||
<String ID="string_61" HPOS="1852" VPOS="416" WIDTH="138" HEIGHT="36" WC="0.96" CONTENT="eirmod"/><SP WIDTH="13" VPOS="416" HPOS="1990"/>
|
||||
<String ID="string_62" HPOS="2003" VPOS="422" WIDTH="140" HEIGHT="40" WC="0.96" CONTENT="tempor"/>
|
||||
</TextLine>
|
||||
<TextLine ID="line_4" HPOS="236" VPOS="474" WIDTH="1897" HEIGHT="47">
|
||||
<String ID="string_63" HPOS="236" VPOS="476" WIDTH="166" HEIGHT="35" WC="0.96" CONTENT="invidunt"/><SP WIDTH="14" VPOS="476" HPOS="402"/>
|
||||
<String ID="string_64" HPOS="416" VPOS="482" WIDTH="39" HEIGHT="29" WC="0.96" CONTENT="ut"/><SP WIDTH="12" VPOS="482" HPOS="455"/>
|
||||
<String ID="string_65" HPOS="467" VPOS="476" WIDTH="122" HEIGHT="35" WC="0.96" CONTENT="labore"/><SP WIDTH="16" VPOS="476" HPOS="589"/>
|
||||
<String ID="string_66" HPOS="605" VPOS="482" WIDTH="34" HEIGHT="29" WC="0.96" CONTENT="et"/><SP WIDTH="15" VPOS="482" HPOS="639"/>
|
||||
<String ID="string_67" HPOS="654" VPOS="475" WIDTH="125" HEIGHT="36" WC="0.96" CONTENT="dolore"/><SP WIDTH="14" VPOS="475" HPOS="779"/>
|
||||
<String ID="string_68" HPOS="793" VPOS="484" WIDTH="131" HEIGHT="37" WC="0.96" CONTENT="magna"/><SP WIDTH="15" VPOS="484" HPOS="924"/>
|
||||
<String ID="string_69" HPOS="939" VPOS="474" WIDTH="182" HEIGHT="45" WC="0.96" CONTENT="aliquyam"/><SP WIDTH="15" VPOS="474" HPOS="1121"/>
|
||||
<String ID="string_70" HPOS="1136" VPOS="480" WIDTH="81" HEIGHT="37" WC="0.96" CONTENT="erat,"/><SP WIDTH="18" VPOS="480" HPOS="1217"/>
|
||||
<String ID="string_71" HPOS="1235" VPOS="474" WIDTH="63" HEIGHT="35" WC="0.96" CONTENT="sed"/><SP WIDTH="15" VPOS="474" HPOS="1298"/>
|
||||
<String ID="string_72" HPOS="1313" VPOS="474" WIDTH="97" HEIGHT="35" WC="0.96" CONTENT="diam"/><SP WIDTH="13" VPOS="474" HPOS="1410"/>
|
||||
<String ID="string_73" HPOS="1423" VPOS="474" WIDTH="186" HEIGHT="46" WC="0.96" CONTENT="voluptua."/><SP WIDTH="14" VPOS="474" HPOS="1609"/>
|
||||
<String ID="string_74" HPOS="1623" VPOS="475" WIDTH="50" HEIGHT="34" WC="0.96" CONTENT="At"/><SP WIDTH="12" VPOS="475" HPOS="1673"/>
|
||||
<String ID="string_75" HPOS="1685" VPOS="485" WIDTH="89" HEIGHT="24" WC="0.96" CONTENT="vero"/><SP WIDTH="16" VPOS="485" HPOS="1774"/>
|
||||
<String ID="string_76" HPOS="1790" VPOS="484" WIDTH="63" HEIGHT="25" WC="0.96" CONTENT="eos"/><SP WIDTH="15" VPOS="484" HPOS="1853"/>
|
||||
<String ID="string_77" HPOS="1868" VPOS="480" WIDTH="34" HEIGHT="29" WC="0.96" CONTENT="et"/><SP WIDTH="14" VPOS="480" HPOS="1902"/>
|
||||
<String ID="string_78" HPOS="1916" VPOS="484" WIDTH="168" HEIGHT="25" WC="0.96" CONTENT="accusam"/><SP WIDTH="16" VPOS="484" HPOS="2084"/>
|
||||
<String ID="string_79" HPOS="2100" VPOS="480" WIDTH="33" HEIGHT="29" WC="0.96" CONTENT="et"/>
|
||||
</TextLine>
|
||||
<TextLine ID="line_5" HPOS="234" VPOS="531" WIDTH="1950" HEIGHT="47">
|
||||
<String ID="string_80" HPOS="234" VPOS="534" WIDTH="98" HEIGHT="44" WC="0.97" CONTENT="justo"/><SP WIDTH="16" VPOS="534" HPOS="332"/>
|
||||
<String ID="string_81" HPOS="348" VPOS="533" WIDTH="71" HEIGHT="35" WC="0.96" CONTENT="duo"/><SP WIDTH="16" VPOS="533" HPOS="419"/>
|
||||
<String ID="string_82" HPOS="435" VPOS="533" WIDTH="143" HEIGHT="35" WC="0.96" CONTENT="dolores"/><SP WIDTH="15" VPOS="533" HPOS="578"/>
|
||||
<String ID="string_83" HPOS="593" VPOS="539" WIDTH="35" HEIGHT="29" WC="0.96" CONTENT="et"/><SP WIDTH="14" VPOS="539" HPOS="628"/>
|
||||
<String ID="string_84" HPOS="642" VPOS="543" WIDTH="42" HEIGHT="25" WC="0.97" CONTENT="ea"/><SP WIDTH="14" VPOS="543" HPOS="684"/>
|
||||
<String ID="string_85" HPOS="698" VPOS="533" WIDTH="137" HEIGHT="35" WC="0.96" CONTENT="rebum."/><SP WIDTH="18" VPOS="533" HPOS="835"/>
|
||||
<String ID="string_86" HPOS="853" VPOS="534" WIDTH="74" HEIGHT="34" WC="0.96" CONTENT="Stet"/><SP WIDTH="14" VPOS="534" HPOS="927"/>
|
||||
<String ID="string_87" HPOS="941" VPOS="531" WIDTH="84" HEIGHT="36" WC="0.96" CONTENT="clita"/><SP WIDTH="13" VPOS="531" HPOS="1025"/>
|
||||
<String ID="string_88" HPOS="1038" VPOS="531" WIDTH="89" HEIGHT="35" WC="0.96" CONTENT="kasd"/><SP WIDTH="15" VPOS="531" HPOS="1127"/>
|
||||
<String ID="string_89" HPOS="1142" VPOS="531" WIDTH="208" HEIGHT="46" WC="0.96" CONTENT="gubergren,"/><SP WIDTH="16" VPOS="531" HPOS="1350"/>
|
||||
<String ID="string_90" HPOS="1366" VPOS="542" WIDTH="48" HEIGHT="25" WC="0.96" CONTENT="no"/><SP WIDTH="16" VPOS="542" HPOS="1414"/>
|
||||
<String ID="string_91" HPOS="1430" VPOS="542" WIDTH="62" HEIGHT="25" WC="0.96" CONTENT="sea"/><SP WIDTH="13" VPOS="542" HPOS="1492"/>
|
||||
<String ID="string_92" HPOS="1505" VPOS="531" WIDTH="173" HEIGHT="36" WC="0.96" CONTENT="takimata"/><SP WIDTH="15" VPOS="531" HPOS="1678"/>
|
||||
<String ID="string_93" HPOS="1693" VPOS="538" WIDTH="144" HEIGHT="29" WC="0.96" CONTENT="sanctus"/><SP WIDTH="16" VPOS="538" HPOS="1837"/>
|
||||
<String ID="string_94" HPOS="1853" VPOS="537" WIDTH="53" HEIGHT="29" WC="0.96" CONTENT="est"/><SP WIDTH="14" VPOS="537" HPOS="1906"/>
|
||||
<String ID="string_95" HPOS="1920" VPOS="533" WIDTH="130" HEIGHT="33" WC="0.96" CONTENT="Lorem"/><SP WIDTH="14" VPOS="533" HPOS="2050"/>
|
||||
<String ID="string_96" HPOS="2064" VPOS="532" WIDTH="120" HEIGHT="44" WC="0.95" CONTENT="ipsum"/>
|
||||
</TextLine>
|
||||
<TextLine ID="line_6" HPOS="237" VPOS="590" WIDTH="282" HEIGHT="41">
|
||||
<String ID="string_97" HPOS="237" VPOS="590" WIDTH="104" HEIGHT="35" WC="0.96" CONTENT="dolor"/><SP WIDTH="15" VPOS="590" HPOS="341"/>
|
||||
<String ID="string_98" HPOS="356" VPOS="591" WIDTH="45" HEIGHT="35" WC="0.96" CONTENT="sit"/><SP WIDTH="14" VPOS="591" HPOS="401"/>
|
||||
<String ID="string_99" HPOS="415" VPOS="597" WIDTH="104" HEIGHT="34" WC="0.96" CONTENT="amet."/>
|
||||
</TextLine>
|
||||
</TextBlock>
|
||||
</PrintSpace>
|
||||
</Page>
|
||||
</Layout>
|
||||
</alto>
|
BIN
qurator/dinglehopper/tests/data/lorem-ipsum/lorem-ipsum-scan.pdf
Normal file
BIN
qurator/dinglehopper/tests/data/lorem-ipsum/lorem-ipsum-scan.pdf
Normal file
Binary file not shown.
BIN
qurator/dinglehopper/tests/data/lorem-ipsum/lorem-ipsum-scan.tif
Normal file
BIN
qurator/dinglehopper/tests/data/lorem-ipsum/lorem-ipsum-scan.tif
Normal file
Binary file not shown.
BIN
qurator/dinglehopper/tests/data/lorem-ipsum/lorem-ipsum.odt
Normal file
BIN
qurator/dinglehopper/tests/data/lorem-ipsum/lorem-ipsum.odt
Normal file
Binary file not shown.
4204
qurator/dinglehopper/tests/data/order.page.xml
Normal file
4204
qurator/dinglehopper/tests/data/order.page.xml
Normal file
File diff suppressed because it is too large
Load diff
3394
qurator/dinglehopper/tests/data/test-fake-ocr.page2018.xml
Normal file
3394
qurator/dinglehopper/tests/data/test-fake-ocr.page2018.xml
Normal file
File diff suppressed because it is too large
Load diff
3394
qurator/dinglehopper/tests/data/test-gt.page2018.xml
Normal file
3394
qurator/dinglehopper/tests/data/test-gt.page2018.xml
Normal file
File diff suppressed because it is too large
Load diff
20186
qurator/dinglehopper/tests/data/test.alto1.xml
Normal file
20186
qurator/dinglehopper/tests/data/test.alto1.xml
Normal file
File diff suppressed because it is too large
Load diff
64
qurator/dinglehopper/tests/data/test.alto2.xml
Normal file
64
qurator/dinglehopper/tests/data/test.alto2.xml
Normal file
|
@ -0,0 +1,64 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<alto xmlns="http://www.loc.gov/standards/alto/ns-v2#" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v2# http://www.loc.gov/standards/alto/alto-v2.0.xsd">
|
||||
<Description>
|
||||
<MeasurementUnit>pixel</MeasurementUnit>
|
||||
<OCRProcessing ID="IdOcr"><ocrProcessingStep><processingDateTime>2017-03-27</processingDateTime><processingSoftware><softwareCreator>ABBYY</softwareCreator><softwareName>ABBYY FineReader Engine</softwareName><softwareVersion>11</softwareVersion></processingSoftware></ocrProcessingStep></OCRProcessing>
|
||||
</Description>
|
||||
<Styles><TextStyle ID="font0" FONTFAMILY="Times New Roman" FONTSIZE="7"/><TextStyle ID="font1" FONTFAMILY="Times New Roman" FONTSIZE="11"/>
|
||||
</Styles>
|
||||
<Layout>
|
||||
<Page ID="Page1" PHYSICAL_IMG_NR="1" HEIGHT="2500" WIDTH="1720">
|
||||
<TopMargin HEIGHT="172" WIDTH="1720" VPOS="0" HPOS="0">
|
||||
</TopMargin>
|
||||
<LeftMargin HEIGHT="2016" WIDTH="341" VPOS="172" HPOS="0">
|
||||
</LeftMargin>
|
||||
<RightMargin HEIGHT="2016" WIDTH="111" VPOS="172" HPOS="1609">
|
||||
</RightMargin>
|
||||
<BottomMargin HEIGHT="312" WIDTH="1720" VPOS="2188" HPOS="0">
|
||||
</BottomMargin>
|
||||
<PrintSpace HEIGHT="2016" WIDTH="1268" VPOS="172" HPOS="341">
|
||||
<TextBlock ID="Page1_Block1" HEIGHT="43" WIDTH="72" VPOS="174" HPOS="936" language="de" STYLEREFS="font1">
|
||||
<TextLine HEIGHT="31" WIDTH="60" VPOS="180" HPOS="942"><String STYLE="bold" WC="0.676666677" CONTENT="142" HEIGHT="31" WIDTH="60" VPOS="180" HPOS="942"/></TextLine>
|
||||
</TextBlock>
|
||||
<ComposedBlock ID="Page1_Block2" HEIGHT="1306" WIDTH="1266" VPOS="257" HPOS="341" TYPE="container"><Shape><Polygon POINTS="348,262 1610,262 1610,1564 348,1564 348,262"/></Shape>
|
||||
<TextBlock ID="Page1_Block3" HEIGHT="776" WIDTH="1261" VPOS="257" HPOS="343" language="de" STYLEREFS="font1"><Shape><Polygon POINTS="350,262 1610,262 1610,708 992,708 992,1034 350,1034 350,262"/></Shape>
|
||||
<TextLine HEIGHT="50" WIDTH="1223" VPOS="267" HPOS="363"><String WC="0.6899999976" CONTENT="die" HEIGHT="33" WIDTH="46" VPOS="271" HPOS="363"/><SP WIDTH="16" VPOS="272" HPOS="410"/><String WC="0.7875000238" CONTENT="Zugtiere" HEIGHT="44" WIDTH="142" VPOS="270" HPOS="427"/><SP WIDTH="20" VPOS="281" HPOS="570"/><String WC="0.9499999881" CONTENT="eines" HEIGHT="34" WIDTH="82" VPOS="271" HPOS="591"/><SP WIDTH="10" VPOS="272" HPOS="674"/><String WC="0.6349999905" CONTENT="Joches" HEIGHT="42" WIDTH="113" VPOS="272" HPOS="685"/><SP WIDTH="15" VPOS="271" HPOS="799"/><String WC="0.6009091139" CONTENT="(griechisch" HEIGHT="45" WIDTH="161" VPOS="270" HPOS="815"/><SP WIDTH="19" VPOS="271" HPOS="977"/><String WC="0.7699999809" CONTENT="zygos)," HEIGHT="44" WIDTH="126" VPOS="269" HPOS="997"/><SP WIDTH="21" VPOS="272" HPOS="1124"/><String WC="0.7099999785" CONTENT="so" HEIGHT="42" WIDTH="27" VPOS="271" HPOS="1146"/><SP WIDTH="19" VPOS="280" HPOS="1174"/><String WC="0.6679999828" CONTENT="nennt" HEIGHT="32" WIDTH="94" VPOS="272" HPOS="1194"/><SP WIDTH="19" VPOS="272" HPOS="1289"/><String WC="0.4133333266" CONTENT="man" HEIGHT="23" WIDTH="72" VPOS="281" HPOS="1309"/><SP WIDTH="21" VPOS="271" HPOS="1382"/><String WC="0.5099999905" CONTENT="die" HEIGHT="33" WIDTH="46" VPOS="271" HPOS="1404"/><SP WIDTH="15" VPOS="272" HPOS="1451"/><String WC="0.8700000048" CONTENT="Zporen" HEIGHT="43" WIDTH="119" VPOS="271" HPOS="1467"/></TextLine>
|
||||
<TextLine HEIGHT="51" WIDTH="1224" VPOS="321" HPOS="363"><String WC="0.8133333325" CONTENT="der" HEIGHT="34" WIDTH="50" VPOS="325" HPOS="363"/><SP WIDTH="24" VPOS="327" HPOS="414"/><String WC="0.8700000048" CONTENT="Tonjugaten" HEIGHT="43" WIDTH="197" VPOS="326" HPOS="439"/><SP WIDTH="32" VPOS="337" HPOS="637"/><String WC="0.6499999762" CONTENT="auch" HEIGHT="43" WIDTH="70" VPOS="326" HPOS="670"/><SP WIDTH="31" VPOS="326" HPOS="741"/><String WC="0.7120000124" CONTENT="Jochsporen" HEIGHT="43" WIDTH="185" VPOS="326" HPOS="773"/><SP WIDTH="37" VPOS="336" HPOS="959"/><String WC="0.9200000167" CONTENT="oder" HEIGHT="32" WIDTH="71" VPOS="327" HPOS="997"/><SP WIDTH="31" VPOS="326" HPOS="1069"/><String WC="0.7072727084" CONTENT="Zpgosporen." HEIGHT="44" WIDTH="203" VPOS="325" HPOS="1101"/><SP WIDTH="53" VPOS="326" HPOS="1305"/><String WC="0.5320000052" CONTENT="Daher" HEIGHT="43" WIDTH="107" VPOS="326" HPOS="1359"/><SP WIDTH="36" VPOS="325" HPOS="1467"/><String WC="0.5720000267" CONTENT="heißt" HEIGHT="43" WIDTH="83" VPOS="325" HPOS="1504"/></TextLine>
|
||||
<TextLine HEIGHT="46" WIDTH="655" VPOS="379" HPOS="363"><String WC="0.8650000095" CONTENT="auch" HEIGHT="43" WIDTH="70" VPOS="381" HPOS="363"/><SP WIDTH="29" VPOS="381" HPOS="434"/><String WC="0.6299999952" CONTENT="die" HEIGHT="33" WIDTH="46" VPOS="381" HPOS="464"/><SP WIDTH="24" VPOS="392" HPOS="511"/><String WC="0.7699999809" CONTENT="ganze" HEIGHT="33" WIDTH="94" VPOS="391" HPOS="536"/><SP WIDTH="24" VPOS="381" HPOS="631"/><String WC="0.7371428609" CONTENT="Ordnung" HEIGHT="43" WIDTH="154" VPOS="381" HPOS="656"/><SP WIDTH="24" VPOS="382" HPOS="811"/><String WC="0.800999999" CONTENT="Jochalgen." HEIGHT="43" WIDTH="182" VPOS="381" HPOS="836"/></TextLine>
|
||||
<TextLine HEIGHT="50" WIDTH="1182" VPOS="432" HPOS="406"><String WC="0.3966666758" CONTENT="Wir" HEIGHT="33" WIDTH="69" VPOS="436" HPOS="406"/><SP WIDTH="24" VPOS="446" HPOS="475"/><String WC="0.6949999928" CONTENT="wollen" HEIGHT="33" WIDTH="112" VPOS="436" HPOS="499"/><SP WIDTH="24" VPOS="445" HPOS="611"/><String WC="0.5166666508" CONTENT="nun" HEIGHT="23" WIDTH="65" VPOS="446" HPOS="635"/><SP WIDTH="24" VPOS="446" HPOS="700"/><String WC="0.7570000291" CONTENT="versuchen," HEIGHT="44" WIDTH="166" VPOS="435" HPOS="724"/><SP WIDTH="27" VPOS="446" HPOS="890"/><String WC="0.6733333468" CONTENT="uns" HEIGHT="23" WIDTH="59" VPOS="446" HPOS="917"/><SP WIDTH="25" VPOS="446" HPOS="976"/><String WC="0.6725000143" CONTENT="eine" HEIGHT="33" WIDTH="66" VPOS="436" HPOS="1001"/><SP WIDTH="25" VPOS="436" HPOS="1067"/><String WC="0.6690909266" CONTENT="Vorstellung" HEIGHT="44" WIDTH="192" VPOS="435" HPOS="1092"/><SP WIDTH="25" VPOS="446" HPOS="1284"/><String WC="0.8466666937" CONTENT="von" HEIGHT="23" WIDTH="62" VPOS="446" HPOS="1309"/><SP WIDTH="25" VPOS="436" HPOS="1371"/><String WC="0.5866666436" CONTENT="den" HEIGHT="32" WIDTH="56" VPOS="436" HPOS="1396"/><SP WIDTH="25" VPOS="436" HPOS="1452"/><String WC="0.7366666794" CONTENT="Zchon-" HEIGHT="44" WIDTH="111" VPOS="435" HPOS="1477"/></TextLine>
|
||||
<TextLine HEIGHT="50" WIDTH="1224" VPOS="486" HPOS="363"><String WC="0.7181817889" CONTENT="heitsformen" HEIGHT="45" WIDTH="199" VPOS="489" HPOS="363"/><SP WIDTH="32" VPOS="490" HPOS="563"/><String WC="0.8633333445" CONTENT="der" HEIGHT="33" WIDTH="50" VPOS="490" HPOS="596"/><SP WIDTH="31" VPOS="491" HPOS="647"/><String WC="0.7749999762" CONTENT="in" HEIGHT="33" WIDTH="30" VPOS="491" HPOS="679"/><SP WIDTH="31" VPOS="501" HPOS="710"/><String WC="0.5479999781" CONTENT="viele" HEIGHT="33" WIDTH="75" VPOS="491" HPOS="742"/><SP WIDTH="32" VPOS="502" HPOS="818"/><String WC="0.7345454693" CONTENT="artenreiche" HEIGHT="44" WIDTH="181" VPOS="490" HPOS="851"/><SP WIDTH="31" VPOS="491" HPOS="1033"/><String WC="0.7277777791" CONTENT="Gattungen" HEIGHT="43" WIDTH="181" VPOS="490" HPOS="1065"/><SP WIDTH="32" VPOS="501" HPOS="1247"/><String WC="0.7766666412" CONTENT="geteilten" HEIGHT="43" WIDTH="140" VPOS="490" HPOS="1280"/><SP WIDTH="32" VPOS="491" HPOS="1421"/><String WC="0.7514285445" CONTENT="Familie" HEIGHT="44" WIDTH="133" VPOS="489" HPOS="1454"/></TextLine>
|
||||
<TextLine HEIGHT="51" WIDTH="1225" VPOS="540" HPOS="362"><String WC="0.7633333206" CONTENT="der" HEIGHT="32" WIDTH="51" VPOS="546" HPOS="362"/><SP WIDTH="24" VPOS="544" HPOS="414"/><String WC="0.4366666675" CONTENT="OesmiäiLLeen" HEIGHT="35" WIDTH="254" VPOS="543" HPOS="439"/><SP WIDTH="29" VPOS="555" HPOS="694"/><String WC="0.8199999928" CONTENT="zu" HEIGHT="31" WIDTH="35" VPOS="556" HPOS="724"/><SP WIDTH="24" VPOS="556" HPOS="760"/><String WC="0.5699999928" CONTENT="machen." HEIGHT="44" WIDTH="131" VPOS="545" HPOS="785"/><SP WIDTH="47" VPOS="546" HPOS="917"/><String WC="0.7466666698" CONTENT="Vas" HEIGHT="33" WIDTH="68" VPOS="546" HPOS="965"/><SP WIDTH="25" VPOS="556" HPOS="1034"/><String WC="0.6685714126" CONTENT="gelingt" HEIGHT="43" WIDTH="116" VPOS="545" HPOS="1060"/><SP WIDTH="24" VPOS="545" HPOS="1177"/><String WC="0.5785714388" CONTENT="leicht," HEIGHT="43" WIDTH="95" VPOS="545" HPOS="1202"/><SP WIDTH="31" VPOS="556" HPOS="1298"/><String WC="0.6675000191" CONTENT="wenn" HEIGHT="23" WIDTH="90" VPOS="556" HPOS="1330"/><SP WIDTH="23" VPOS="556" HPOS="1421"/><String WC="0.5666666627" CONTENT="wir" HEIGHT="35" WIDTH="58" VPOS="544" HPOS="1445"/><SP WIDTH="23" VPOS="555" HPOS="1504"/><String WC="0.8000000119" CONTENT="uns" HEIGHT="23" WIDTH="59" VPOS="555" HPOS="1528"/></TextLine>
|
||||
<TextLine HEIGHT="50" WIDTH="1225" VPOS="596" HPOS="362"><String WC="0.6399999857" CONTENT="selbst" HEIGHT="42" WIDTH="84" VPOS="600" HPOS="362"/><SP WIDTH="23" VPOS="603" HPOS="447"/><String WC="0.80400002" CONTENT="etwas" HEIGHT="33" WIDTH="98" VPOS="601" HPOS="471"/><SP WIDTH="23" VPOS="601" HPOS="570"/><String WC="0.6587499976" CONTENT="Material" HEIGHT="34" WIDTH="156" VPOS="600" HPOS="594"/><SP WIDTH="24" VPOS="601" HPOS="751"/><String WC="0.7300000191" CONTENT="holen," HEIGHT="44" WIDTH="99" VPOS="600" HPOS="776"/><SP WIDTH="25" VPOS="600" HPOS="876"/><String WC="0.7516666651" CONTENT="höchst" HEIGHT="43" WIDTH="95" VPOS="600" HPOS="902"/><SP WIDTH="22" VPOS="603" HPOS="998"/><String WC="0.5454545617" CONTENT="mangelhaft," HEIGHT="44" WIDTH="206" VPOS="600" HPOS="1021"/><SP WIDTH="25" VPOS="610" HPOS="1228"/><String WC="0.7599999905" CONTENT="wenn" HEIGHT="23" WIDTH="90" VPOS="610" HPOS="1254"/><SP WIDTH="23" VPOS="610" HPOS="1345"/><String WC="0.6299999952" CONTENT="wir" HEIGHT="34" WIDTH="58" VPOS="600" HPOS="1369"/><SP WIDTH="23" VPOS="611" HPOS="1428"/><String WC="0.8100000024" CONTENT="uns" HEIGHT="24" WIDTH="59" VPOS="610" HPOS="1452"/><SP WIDTH="20" VPOS="610" HPOS="1512"/><String WC="0.5966666937" CONTENT="auf" HEIGHT="42" WIDTH="54" VPOS="600" HPOS="1533"/></TextLine>
|
||||
<TextLine HEIGHT="50" WIDTH="1224" VPOS="651" HPOS="362"><String WC="0.7933333516" CONTENT="die" HEIGHT="33" WIDTH="46" VPOS="655" HPOS="362"/><SP WIDTH="23" VPOS="655" HPOS="409"/><String WC="0.8428571224" CONTENT="Lektüre" HEIGHT="35" WIDTH="129" VPOS="654" HPOS="433"/><SP WIDTH="24" VPOS="655" HPOS="563"/><String WC="0.6150000095" CONTENT="dieses" HEIGHT="42" WIDTH="92" VPOS="655" HPOS="588"/><SP WIDTH="23" VPOS="656" HPOS="681"/><String WC="0.8766666651" CONTENT="Buches" HEIGHT="43" WIDTH="115" VPOS="655" HPOS="705"/><SP WIDTH="30" VPOS="655" HPOS="821"/><String WC="0.6575000286" CONTENT="beschränken." HEIGHT="45" WIDTH="211" VPOS="654" HPOS="852"/><SP WIDTH="46" VPOS="656" HPOS="1064"/><String WC="0.5699999928" CONTENT="Das" HEIGHT="34" WIDTH="68" VPOS="655" HPOS="1111"/><SP WIDTH="23" VPOS="656" HPOS="1180"/><String WC="0.7912499905" CONTENT="Material" HEIGHT="33" WIDTH="156" VPOS="655" HPOS="1204"/><SP WIDTH="24" VPOS="655" HPOS="1361"/><String WC="0.8199999928" CONTENT="ist" HEIGHT="42" WIDTH="33" VPOS="655" HPOS="1386"/><SP WIDTH="23" VPOS="655" HPOS="1420"/><String WC="0.6716666818" CONTENT="leicht" HEIGHT="44" WIDTH="83" VPOS="654" HPOS="1444"/><SP WIDTH="22" VPOS="657" HPOS="1528"/><String WC="0.6999999881" CONTENT="zu" HEIGHT="31" WIDTH="35" VPOS="665" HPOS="1551"/></TextLine>
|
||||
<TextLine HEIGHT="46" WIDTH="608" VPOS="707" HPOS="361"><String WC="0.6736363769" CONTENT="beschaffen." HEIGHT="43" WIDTH="175" VPOS="709" HPOS="361"/><SP WIDTH="30" VPOS="710" HPOS="537"/><String WC="0.6533333063" CONTENT="Man" HEIGHT="33" WIDTH="84" VPOS="710" HPOS="568"/><SP WIDTH="22" VPOS="710" HPOS="653"/><String WC="0.6228571534" CONTENT="sammelt" HEIGHT="42" WIDTH="137" VPOS="710" HPOS="676"/><SP WIDTH="20" VPOS="712" HPOS="814"/><String WC="0.7666666508" CONTENT="aus" HEIGHT="24" WIDTH="57" VPOS="720" HPOS="835"/><SP WIDTH="20" VPOS="710" HPOS="893"/><String WC="0.5966666937" CONTENT="den" HEIGHT="33" WIDTH="55" VPOS="710" HPOS="914"/></TextLine>
|
||||
<TextLine HEIGHT="47" WIDTH="607" VPOS="762" HPOS="364"><String WC="0.7990909219" CONTENT="Torflöchern" HEIGHT="44" WIDTH="195" VPOS="763" HPOS="364"/><SP WIDTH="16" VPOS="764" HPOS="559"/><String WC="0.9300000072" CONTENT="der" HEIGHT="33" WIDTH="52" VPOS="764" HPOS="575"/><SP WIDTH="8" VPOS="764" HPOS="627"/><String WC="0.7636363506" CONTENT="Niedermoore" HEIGHT="34" WIDTH="217" VPOS="764" HPOS="635"/><SP WIDTH="11" VPOS="765" HPOS="852"/><String WC="0.7620000243" CONTENT="Moose" HEIGHT="42" WIDTH="108" VPOS="765" HPOS="863"/></TextLine>
|
||||
<TextLine HEIGHT="48" WIDTH="608" VPOS="817" HPOS="363"><String WC="1." CONTENT="oder" HEIGHT="33" WIDTH="70" VPOS="819" HPOS="363"/><SP WIDTH="28" VPOS="819" HPOS="434"/><String WC="0.6233333349" CONTENT="höhere" HEIGHT="45" WIDTH="111" VPOS="818" HPOS="463"/><SP WIDTH="28" VPOS="820" HPOS="575"/><String WC="0.6035714149" CONTENT="Wasserpflanzen" HEIGHT="44" WIDTH="260" VPOS="818" HPOS="604"/><SP WIDTH="29" VPOS="818" HPOS="865"/><String WC="0.7839999795" CONTENT="(sehr" HEIGHT="45" WIDTH="76" VPOS="818" HPOS="895"/></TextLine>
|
||||
<TextLine HEIGHT="46" WIDTH="609" VPOS="872" HPOS="362"><String WC="0.6299999952" CONTENT="ist" HEIGHT="42" WIDTH="35" VPOS="874" HPOS="362"/><SP WIDTH="25" VPOS="875" HPOS="398"/><String WC="0.9666666389" CONTENT="der" HEIGHT="33" WIDTH="51" VPOS="875" HPOS="424"/><SP WIDTH="25" VPOS="875" HPOS="476"/><String WC="0.5278571248" CONTENT="Wasserschlauch" HEIGHT="44" WIDTH="245" VPOS="874" HPOS="502"/><SP WIDTH="25" VPOS="874" HPOS="748"/><String WC="0.8245454431" CONTENT="Utricularia" HEIGHT="36" WIDTH="197" VPOS="873" HPOS="774"/></TextLine>
|
||||
<TextLine HEIGHT="47" WIDTH="608" VPOS="927" HPOS="361"><String WC="0.7950000167" CONTENT="zu" HEIGHT="32" WIDTH="36" VPOS="939" HPOS="361"/><SP WIDTH="24" VPOS="939" HPOS="398"/><String WC="0.7300000191" CONTENT="empfehlen)," HEIGHT="44" WIDTH="194" VPOS="928" HPOS="423"/><SP WIDTH="32" VPOS="930" HPOS="618"/><String WC="0.9433333278" CONTENT="die" HEIGHT="33" WIDTH="46" VPOS="929" HPOS="651"/><SP WIDTH="29" VPOS="940" HPOS="698"/><String WC="0.5666666627" CONTENT="mit" HEIGHT="33" WIDTH="56" VPOS="930" HPOS="728"/><SP WIDTH="23" VPOS="930" HPOS="785"/><String WC="0.7674999833" CONTENT="braunem," HEIGHT="44" WIDTH="160" VPOS="929" HPOS="809"/></TextLine>
|
||||
<TextLine HEIGHT="49" WIDTH="606" VPOS="980" HPOS="362"><String WC="0.6863636374" CONTENT="schlickigem" HEIGHT="43" WIDTH="176" VPOS="984" HPOS="362"/><SP WIDTH="32" VPOS="981" HPOS="539"/><String WC="0.6887500286" CONTENT="Überzüge" HEIGHT="45" WIDTH="157" VPOS="981" HPOS="572"/><SP WIDTH="31" VPOS="984" HPOS="730"/><String WC="0.5857142806" CONTENT="besetzt" HEIGHT="45" WIDTH="101" VPOS="983" HPOS="762"/><SP WIDTH="32" VPOS="985" HPOS="864"/><String WC="0.8379999995" CONTENT="sind." HEIGHT="42" WIDTH="71" VPOS="984" HPOS="897"/></TextLine>
|
||||
</TextBlock>
|
||||
<Illustration ID="Page1_Block4" HEIGHT="232" WIDTH="604" VPOS="1131" HPOS="374"><Shape><Polygon POINTS="378,1134 982,1134 982,1364 378,1364 378,1134"/></Shape></Illustration>
|
||||
<Illustration ID="Page1_Block5" HEIGHT="664" WIDTH="539" VPOS="732" HPOS="1013"><Shape><Polygon POINTS="1019,737 1556,737 1556,1399 1019,1399 1019,737"/></Shape></Illustration>
|
||||
<TextBlock ID="Page1_Block6" HEIGHT="140" WIDTH="1258" VPOS="1423" HPOS="345" language="de" STYLEREFS="font0"><Shape><Polygon POINTS="348,1428 1606,1428 1606,1564 348,1564 348,1428"/></Shape>
|
||||
<TextLine HEIGHT="32" WIDTH="1225" VPOS="1429" HPOS="362"><String WC="0.4325000048" CONTENT="Fig." HEIGHT="26" WIDTH="46" VPOS="1435" HPOS="362"/><SP WIDTH="22" VPOS="1438" HPOS="409"/><String WC="0.3540000021" CONTENT="J54;." HEIGHT="22" WIDTH="44" VPOS="1438" HPOS="432"/><SP WIDTH="33" VPOS="1434" HPOS="477"/><String WC="0.7620000243" CONTENT="Cosmarium." HEIGHT="22" WIDTH="139" VPOS="1433" HPOS="511"/><SP WIDTH="32" VPOS="1432" HPOS="651"/><String WC="0.4550000131" CONTENT="A." HEIGHT="21" WIDTH="30" VPOS="1432" HPOS="684"/><SP WIDTH="19" VPOS="1432" HPOS="715"/><String WC="0.7699999809" CONTENT="C." HEIGHT="21" WIDTH="25" VPOS="1432" HPOS="735"/><SP WIDTH="23" VPOS="1439" HPOS="761"/><String WC="0.6628571153" CONTENT="margaritaceum," HEIGHT="28" WIDTH="184" VPOS="1431" HPOS="785"/><SP WIDTH="30" VPOS="1432" HPOS="970"/><String WC="0.4524999857" CONTENT="Fig." HEIGHT="27" WIDTH="46" VPOS="1432" HPOS="1001"/><SP WIDTH="15" VPOS="1435" HPOS="1048"/><String WC="0.5400000215" CONTENT="J35." HEIGHT="23" WIDTH="44" VPOS="1435" HPOS="1064"/><SP WIDTH="31" VPOS="1432" HPOS="1109"/><String WC="0.7572727203" CONTENT="Clostcrium." HEIGHT="23" WIDTH="134" VPOS="1430" HPOS="1141"/><SP WIDTH="27" VPOS="1431" HPOS="1276"/><String WC="0.5199999809" CONTENT="A" HEIGHT="19" WIDTH="22" VPOS="1431" HPOS="1304"/><SP WIDTH="18" VPOS="1430" HPOS="1327"/><String WC="0.6366666555" CONTENT="CI." HEIGHT="21" WIDTH="33" VPOS="1430" HPOS="1346"/><SP WIDTH="16" VPOS="1430" HPOS="1380"/><String WC="0.6342856884" CONTENT="lunula," HEIGHT="25" WIDTH="86" VPOS="1430" HPOS="1397"/><SP WIDTH="21" VPOS="1429" HPOS="1484"/><String WC="0.6314285994" CONTENT="Linzel-" HEIGHT="26" WIDTH="81" VPOS="1429" HPOS="1506"/></TextLine>
|
||||
<TextLine HEIGHT="32" WIDTH="1225" VPOS="1461" HPOS="361"><String WC="0.5600000024" CONTENT="a" HEIGHT="13" WIDTH="13" VPOS="1474" HPOS="361"/><SP WIDTH="14" VPOS="1468" HPOS="375"/><String WC="0.5083333254" CONTENT="Lnizelzellp," HEIGHT="26" WIDTH="128" VPOS="1467" HPOS="390"/><SP WIDTH="15" VPOS="1467" HPOS="519"/><String WC="0.25" CONTENT="b" HEIGHT="20" WIDTH="13" VPOS="1466" HPOS="535"/><SP WIDTH="14" VPOS="1466" HPOS="549"/><String WC="0.5822222233" CONTENT="Iochspore" HEIGHT="26" WIDTH="112" VPOS="1465" HPOS="564"/><SP WIDTH="14" VPOS="1471" HPOS="677"/><String WC="0.3700000048" CONTENT="mit" HEIGHT="20" WIDTH="39" VPOS="1465" HPOS="692"/><SP WIDTH="10" VPOS="1465" HPOS="732"/><String WC="0.3100000024" CONTENT="den" HEIGHT="20" WIDTH="37" VPOS="1465" HPOS="743"/><SP WIDTH="13" VPOS="1471" HPOS="781"/><String WC="0.4350000024" CONTENT="entleerten" HEIGHT="21" WIDTH="111" VPOS="1464" HPOS="795"/><SP WIDTH="8" VPOS="1464" HPOS="907"/><String WC="0.7940000296" CONTENT="Zell-" HEIGHT="27" WIDTH="55" VPOS="1464" HPOS="916"/><SP WIDTH="28" VPOS="1471" HPOS="972"/><String WC="0.6333333254" CONTENT="zelle," HEIGHT="25" WIDTH="54" VPOS="1465" HPOS="1001"/><SP WIDTH="15" VPOS="1464" HPOS="1056"/><String WC="0.2800000012" CONTENT="B" HEIGHT="20" WIDTH="18" VPOS="1464" HPOS="1072"/><SP WIDTH="14" VPOS="1464" HPOS="1091"/><String WC="0.9233333468" CONTENT="CI." HEIGHT="21" WIDTH="32" VPOS="1464" HPOS="1106"/><SP WIDTH="15" VPOS="1471" HPOS="1139"/><String WC="0.8188889027" CONTENT="rostratum" HEIGHT="19" WIDTH="111" VPOS="1465" HPOS="1155"/><SP WIDTH="12" VPOS="1463" HPOS="1267"/><String WC="0.2399999946" CONTENT="(nad?" HEIGHT="25" WIDTH="62" VPOS="1463" HPOS="1280"/><SP WIDTH="8" VPOS="1464" HPOS="1343"/><String WC="0.2949999869" CONTENT="Präparat" HEIGHT="26" WIDTH="110" VPOS="1463" HPOS="1352"/><SP WIDTH="10" VPOS="1465" HPOS="1463"/><String WC="0.1566666663" CONTENT="uon" HEIGHT="16" WIDTH="41" VPOS="1467" HPOS="1474"/><SP WIDTH="8" VPOS="1463" HPOS="1516"/><String WC="0.3420000076" CONTENT="pvof." HEIGHT="27" WIDTH="61" VPOS="1461" HPOS="1525"/></TextLine>
|
||||
<TextLine HEIGHT="33" WIDTH="1224" VPOS="1493" HPOS="362"><String WC="0.6571428776" CONTENT="häuten." HEIGHT="27" WIDTH="88" VPOS="1499" HPOS="362"/><SP WIDTH="27" VPOS="1499" HPOS="451"/><String WC="0.400000006" CONTENT="B" HEIGHT="20" WIDTH="18" VPOS="1499" HPOS="479"/><SP WIDTH="15" VPOS="1499" HPOS="498"/><String WC="0.6918181777" CONTENT="Linzelzelle" HEIGHT="27" WIDTH="120" VPOS="1497" HPOS="514"/><SP WIDTH="22" VPOS="1503" HPOS="635"/><String WC="0.453333348" CONTENT="von" HEIGHT="14" WIDTH="42" VPOS="1503" HPOS="658"/><SP WIDTH="21" VPOS="1497" HPOS="701"/><String WC="0.9250000119" CONTENT="C." HEIGHT="20" WIDTH="24" VPOS="1497" HPOS="723"/><SP WIDTH="15" VPOS="1497" HPOS="748"/><String WC="0.8562499881" CONTENT="botrytis" HEIGHT="26" WIDTH="89" VPOS="1497" HPOS="764"/><SP WIDTH="18" VPOS="1502" HPOS="854"/><String WC="0.4499999881" CONTENT="mit" HEIGHT="21" WIDTH="40" VPOS="1496" HPOS="873"/><SP WIDTH="19" VPOS="1498" HPOS="914"/><String WC="0.6700000167" CONTENT="un-" HEIGHT="15" WIDTH="38" VPOS="1502" HPOS="934"/><SP WIDTH="29" VPOS="1496" HPOS="973"/><String WC="0.5155555606" CONTENT="Homfeld)," HEIGHT="27" WIDTH="115" VPOS="1496" HPOS="1003"/><SP WIDTH="20" VPOS="1497" HPOS="1119"/><String WC="0.3355555534" CONTENT=")ochspore" HEIGHT="28" WIDTH="112" VPOS="1495" HPOS="1140"/><SP WIDTH="14" VPOS="1501" HPOS="1253"/><String WC="0.853333354" CONTENT="mit" HEIGHT="20" WIDTH="39" VPOS="1495" HPOS="1268"/><SP WIDTH="13" VPOS="1495" HPOS="1308"/><String WC="0.5233333111" CONTENT="den" HEIGHT="20" WIDTH="37" VPOS="1495" HPOS="1322"/><SP WIDTH="13" VPOS="1494" HPOS="1360"/><String WC="0.4783333242" CONTENT="leeren" HEIGHT="22" WIDTH="65" VPOS="1494" HPOS="1374"/><SP WIDTH="10" VPOS="1494" HPOS="1440"/><String WC="0.6600000262" CONTENT="Zellhäuten," HEIGHT="28" WIDTH="135" VPOS="1493" HPOS="1451"/></TextLine>
|
||||
<TextLine HEIGHT="29" WIDTH="839" VPOS="1527" HPOS="568"><String WC="0.4187499881" CONTENT="gleichen" HEIGHT="27" WIDTH="90" VPOS="1529" HPOS="568"/><SP WIDTH="14" VPOS="1529" HPOS="659"/><String WC="0.6687499881" CONTENT="Hälften." HEIGHT="27" WIDTH="97" VPOS="1529" HPOS="674"/><SP WIDTH="411" VPOS="1527" HPOS="772"/><String WC="0.7599999905" CONTENT="in" HEIGHT="21" WIDTH="22" VPOS="1527" HPOS="1184"/><SP WIDTH="13" VPOS="1534" HPOS="1207"/><String WC="0.4300000072" CONTENT="zwei" HEIGHT="26" WIDTH="50" VPOS="1527" HPOS="1221"/><SP WIDTH="15" VPOS="1527" HPOS="1272"/><String WC="0.6629999876" CONTENT="Ansichten." HEIGHT="26" WIDTH="119" VPOS="1527" HPOS="1288"/></TextLine>
|
||||
</TextBlock></ComposedBlock>
|
||||
<TextBlock ID="Page1_Block7" HEIGHT="610" WIDTH="1241" VPOS="1578" HPOS="354" language="de" STYLEREFS="font1"><Shape><Polygon POINTS="357,1583 1596,1583 1596,2189 357,2189 357,1583"/></Shape>
|
||||
<TextLine HEIGHT="49" WIDTH="1224" VPOS="1583" HPOS="363"><String WC="0.6650000215" CONTENT="Zu" HEIGHT="34" WIDTH="45" VPOS="1589" HPOS="363"/><SP WIDTH="37" VPOS="1590" HPOS="409"/><String WC="0.7360000014" CONTENT="hause" HEIGHT="43" WIDTH="97" VPOS="1589" HPOS="447"/><SP WIDTH="37" VPOS="1588" HPOS="545"/><String WC="0.7419999838" CONTENT="spült" HEIGHT="43" WIDTH="77" VPOS="1587" HPOS="583"/><SP WIDTH="32" VPOS="1589" HPOS="661"/><String WC="0.6266666651" CONTENT="man" HEIGHT="24" WIDTH="75" VPOS="1597" HPOS="694"/><SP WIDTH="37" VPOS="1587" HPOS="770"/><String WC="0.9300000072" CONTENT="die" HEIGHT="34" WIDTH="46" VPOS="1587" HPOS="808"/><SP WIDTH="36" VPOS="1596" HPOS="855"/><String WC="0.8169230819" CONTENT="mitgenommenen" HEIGHT="43" WIDTH="280" VPOS="1586" HPOS="892"/><SP WIDTH="38" VPOS="1586" HPOS="1173"/><String WC="0.7077777982" CONTENT="Pröbchen," HEIGHT="43" WIDTH="172" VPOS="1585" HPOS="1212"/><SP WIDTH="39" VPOS="1584" HPOS="1385"/><String WC="0.5366666913" CONTENT="die" HEIGHT="35" WIDTH="46" VPOS="1584" HPOS="1425"/><SP WIDTH="40" VPOS="1594" HPOS="1472"/><String WC="0.6233333349" CONTENT="man" HEIGHT="24" WIDTH="74" VPOS="1594" HPOS="1513"/></TextLine>
|
||||
<TextLine HEIGHT="48" WIDTH="1224" VPOS="1639" HPOS="363"><String WC="0.6377778053" CONTENT="natürlich" HEIGHT="43" WIDTH="148" VPOS="1644" HPOS="363"/><SP WIDTH="43" VPOS="1643" HPOS="512"/><String WC="0.5960000157" CONTENT="nicht" HEIGHT="43" WIDTH="75" VPOS="1642" HPOS="556"/><SP WIDTH="41" VPOS="1642" HPOS="632"/><String WC="0.7549999952" CONTENT="literweise" HEIGHT="43" WIDTH="157" VPOS="1642" HPOS="674"/><SP WIDTH="42" VPOS="1642" HPOS="832"/><String WC="0.6299999952" CONTENT="sammelt," HEIGHT="43" WIDTH="156" VPOS="1641" HPOS="875"/><SP WIDTH="43" VPOS="1641" HPOS="1032"/><String WC="1." CONTENT="in" HEIGHT="34" WIDTH="30" VPOS="1641" HPOS="1076"/><SP WIDTH="41" VPOS="1651" HPOS="1107"/><String WC="0.6600000262" CONTENT="wenig" HEIGHT="44" WIDTH="102" VPOS="1640" HPOS="1149"/><SP WIDTH="37" VPOS="1641" HPOS="1252"/><String WC="0.6949999928" CONTENT="Wasser" HEIGHT="42" WIDTH="118" VPOS="1640" HPOS="1290"/><SP WIDTH="37" VPOS="1650" HPOS="1409"/><String WC="0.8700000048" CONTENT="ab" HEIGHT="33" WIDTH="39" VPOS="1640" HPOS="1447"/><SP WIDTH="38" VPOS="1639" HPOS="1487"/><String WC="0.3733333349" CONTENT="und" HEIGHT="33" WIDTH="61" VPOS="1639" HPOS="1526"/></TextLine>
|
||||
<TextLine HEIGHT="48" WIDTH="1226" VPOS="1693" HPOS="362"><String WC="0.7250000238" CONTENT="bringt" HEIGHT="42" WIDTH="107" VPOS="1699" HPOS="362"/><SP WIDTH="43" VPOS="1700" HPOS="469"/><String WC="0.6857143044" CONTENT="winzige" HEIGHT="44" WIDTH="131" VPOS="1697" HPOS="512"/><SP WIDTH="36" VPOS="1698" HPOS="643"/><String WC="0.7214285731" CONTENT="Partien" HEIGHT="43" WIDTH="129" VPOS="1697" HPOS="679"/><SP WIDTH="46" VPOS="1697" HPOS="808"/><String WC="0.7133333087" CONTENT="des" HEIGHT="35" WIDTH="53" VPOS="1696" HPOS="854"/><SP WIDTH="46" VPOS="1706" HPOS="907"/><String WC="0.7216666937" CONTENT="abgeklopften" HEIGHT="43" WIDTH="222" VPOS="1696" HPOS="953"/><SP WIDTH="38" VPOS="1696" HPOS="1175"/><String WC="0.5181818008" CONTENT="Scf]lid?es-" HEIGHT="43" WIDTH="151" VPOS="1695" HPOS="1213"/><SP WIDTH="32" VPOS="1705" HPOS="1364"/><String WC="0.7933333516" CONTENT="mit" HEIGHT="35" WIDTH="57" VPOS="1694" HPOS="1396"/><SP WIDTH="37" VPOS="1696" HPOS="1453"/><String WC="0.7400000095" CONTENT="einem" HEIGHT="35" WIDTH="98" VPOS="1694" HPOS="1490"/></TextLine>
|
||||
<TextLine HEIGHT="47" WIDTH="1224" VPOS="1749" HPOS="363"><String WC="0.7430769205" CONTENT="Wassertropfen" HEIGHT="43" WIDTH="240" VPOS="1753" HPOS="363"/><SP WIDTH="32" VPOS="1763" HPOS="604"/><String WC="0.6000000238" CONTENT="auf" HEIGHT="42" WIDTH="55" VPOS="1752" HPOS="637"/><SP WIDTH="29" VPOS="1752" HPOS="693"/><String WC="0.6359999776" CONTENT="einen" HEIGHT="34" WIDTH="87" VPOS="1752" HPOS="723"/><SP WIDTH="31" VPOS="1753" HPOS="811"/><String WC="0.7069230676" CONTENT="Objektträger." HEIGHT="44" WIDTH="233" VPOS="1751" HPOS="843"/><SP WIDTH="51" VPOS="1752" HPOS="1077"/><String WC="0.6866666675" CONTENT="Mit" HEIGHT="35" WIDTH="65" VPOS="1750" HPOS="1129"/><SP WIDTH="29" VPOS="1752" HPOS="1195"/><String WC="0.6750000119" CONTENT="zwei" HEIGHT="42" WIDTH="75" VPOS="1750" HPOS="1225"/><SP WIDTH="30" VPOS="1750" HPOS="1301"/><String WC="0.7866666913" CONTENT="feinen" HEIGHT="42" WIDTH="101" VPOS="1750" HPOS="1332"/><SP WIDTH="30" VPOS="1751" HPOS="1434"/><String WC="0.6683333516" CONTENT="Nadeln" HEIGHT="35" WIDTH="122" VPOS="1749" HPOS="1465"/></TextLine>
|
||||
<TextLine HEIGHT="48" WIDTH="1224" VPOS="1804" HPOS="363"><String WC="0.7785714269" CONTENT="breitet" HEIGHT="33" WIDTH="109" VPOS="1809" HPOS="363"/><SP WIDTH="23" VPOS="1810" HPOS="473"/><String WC="0.4099999964" CONTENT="man" HEIGHT="24" WIDTH="74" VPOS="1818" HPOS="497"/><SP WIDTH="24" VPOS="1808" HPOS="572"/><String WC="0.8100000024" CONTENT="das" HEIGHT="33" WIDTH="56" VPOS="1808" HPOS="597"/><SP WIDTH="19" VPOS="1808" HPOS="654"/><String WC="0.7633333206" CONTENT="Klümpchen" HEIGHT="43" WIDTH="186" VPOS="1807" HPOS="674"/><SP WIDTH="24" VPOS="1817" HPOS="861"/><String WC="0.678888917" CONTENT="möglichst" HEIGHT="44" WIDTH="151" VPOS="1806" HPOS="886"/><SP WIDTH="23" VPOS="1809" HPOS="1038"/><String WC="0.6850000024" CONTENT="weit" HEIGHT="34" WIDTH="71" VPOS="1806" HPOS="1062"/><SP WIDTH="23" VPOS="1809" HPOS="1134"/><String WC="0.6025000215" CONTENT="aus," HEIGHT="33" WIDTH="68" VPOS="1816" HPOS="1158"/><SP WIDTH="25" VPOS="1805" HPOS="1227"/><String WC="0.7080000043" CONTENT="damit" HEIGHT="34" WIDTH="98" VPOS="1805" HPOS="1253"/><SP WIDTH="23" VPOS="1807" HPOS="1352"/><String WC="1." CONTENT="es" HEIGHT="24" WIDTH="31" VPOS="1815" HPOS="1376"/><SP WIDTH="25" VPOS="1807" HPOS="1408"/><String WC="0.8366666436" CONTENT="übersicht" HEIGHT="44" WIDTH="140" VPOS="1804" HPOS="1434" SUBS_TYPE="HypPart1" SUBS_CONTENT="übersichtlich"/><HYP CONTENT=""/></TextLine>
|
||||
<TextLine HEIGHT="48" WIDTH="1224" VPOS="1859" HPOS="363"><String WC="0.6650000215" CONTENT="lich" HEIGHT="43" WIDTH="52" VPOS="1864" HPOS="363" SUBS_TYPE="HypPart2" SUBS_CONTENT="übersichtlich"/><SP WIDTH="31" VPOS="1864" HPOS="416"/><String WC="0.5849999785" CONTENT="wird" HEIGHT="33" WIDTH="76" VPOS="1864" HPOS="448"/><SP WIDTH="31" VPOS="1863" HPOS="525"/><String WC="0.9066666961" CONTENT="und" HEIGHT="34" WIDTH="61" VPOS="1862" HPOS="557"/><SP WIDTH="31" VPOS="1862" HPOS="619"/><String WC="0.8728571534" CONTENT="bedeckt" HEIGHT="34" WIDTH="119" VPOS="1862" HPOS="651"/><SP WIDTH="30" VPOS="1863" HPOS="771"/><String WC="0.7833333611" CONTENT="das" HEIGHT="33" WIDTH="57" VPOS="1863" HPOS="802"/><SP WIDTH="24" VPOS="1862" HPOS="860"/><String WC="0.7537500262" CONTENT="Präparat" HEIGHT="43" WIDTH="161" VPOS="1862" HPOS="885"/><SP WIDTH="27" VPOS="1863" HPOS="1047"/><String WC="0.7566666603" CONTENT="mit" HEIGHT="34" WIDTH="56" VPOS="1861" HPOS="1075"/><SP WIDTH="24" VPOS="1863" HPOS="1132"/><String WC="0.7179999948" CONTENT="einem" HEIGHT="34" WIDTH="96" VPOS="1861" HPOS="1157"/><SP WIDTH="24" VPOS="1861" HPOS="1254"/><String WC="0.6629999876" CONTENT="veckglase." HEIGHT="42" WIDTH="171" VPOS="1861" HPOS="1279"/><SP WIDTH="47" VPOS="1860" HPOS="1451"/><String WC="1." CONTENT="Beim" HEIGHT="34" WIDTH="88" VPOS="1859" HPOS="1499"/></TextLine>
|
||||
<TextLine HEIGHT="48" WIDTH="1223" VPOS="1914" HPOS="364"><String WC="0.5649999976" CONTENT="Züchen" HEIGHT="43" WIDTH="115" VPOS="1919" HPOS="364"/><SP WIDTH="24" VPOS="1929" HPOS="480"/><String WC="0.8666666746" CONTENT="mit" HEIGHT="35" WIDTH="55" VPOS="1918" HPOS="505"/><SP WIDTH="22" VPOS="1919" HPOS="561"/><String WC="0.8566666842" CONTENT="mittlerer" HEIGHT="33" WIDTH="148" VPOS="1918" HPOS="584"/><SP WIDTH="24" VPOS="1918" HPOS="733"/><String WC="0.6583333611" CONTENT="Vergrößerung" HEIGHT="44" WIDTH="238" VPOS="1917" HPOS="758"/><SP WIDTH="31" VPOS="1927" HPOS="997"/><String WC="0.4524999857" CONTENT="wird" HEIGHT="34" WIDTH="78" VPOS="1916" HPOS="1029"/><SP WIDTH="24" VPOS="1917" HPOS="1108"/><String WC="0.6800000072" CONTENT="man" HEIGHT="25" WIDTH="73" VPOS="1926" HPOS="1133"/><SP WIDTH="25" VPOS="1916" HPOS="1207"/><String WC="0.6316666603" CONTENT="Formen" HEIGHT="42" WIDTH="132" VPOS="1916" HPOS="1233"/><SP WIDTH="24" VPOS="1915" HPOS="1366"/><String WC="0.7300000191" CONTENT="finden," HEIGHT="43" WIDTH="116" VPOS="1914" HPOS="1391"/><SP WIDTH="32" VPOS="1915" HPOS="1508"/><String WC="0.8633333445" CONTENT="die" HEIGHT="33" WIDTH="46" VPOS="1915" HPOS="1541"/></TextLine>
|
||||
<TextLine HEIGHT="48" WIDTH="1222" VPOS="1969" HPOS="365"><String WC="0.6333333254" CONTENT="aus" HEIGHT="23" WIDTH="58" VPOS="1984" HPOS="365"/><SP WIDTH="29" VPOS="1984" HPOS="424"/><String WC="0.7825000286" CONTENT="zwei" HEIGHT="41" WIDTH="74" VPOS="1974" HPOS="454"/><SP WIDTH="31" VPOS="1973" HPOS="529"/><String WC="0.6437500119" CONTENT="einander" HEIGHT="34" WIDTH="147" VPOS="1972" HPOS="561"/><SP WIDTH="30" VPOS="1983" HPOS="709"/><String WC="0.6938889027" CONTENT="gegenüberstehenden" HEIGHT="44" WIDTH="336" VPOS="1972" HPOS="740"/><SP WIDTH="32" VPOS="1972" HPOS="1077"/><String WC="0.5190908909" CONTENT="Halbkreisen" HEIGHT="45" WIDTH="198" VPOS="1970" HPOS="1110"/><SP WIDTH="33" VPOS="1970" HPOS="1309"/><String WC="0.2849999964" CONTENT="in" HEIGHT="34" WIDTH="31" VPOS="1970" HPOS="1343"/><SP WIDTH="33" VPOS="1970" HPOS="1375"/><String WC="0.8033333421" CONTENT="der" HEIGHT="33" WIDTH="51" VPOS="1970" HPOS="1409"/><SP WIDTH="30" VPOS="1971" HPOS="1461"/><String WC="0.8820000291" CONTENT="Mitte" HEIGHT="35" WIDTH="95" VPOS="1969" HPOS="1492"/></TextLine>
|
||||
<TextLine HEIGHT="49" WIDTH="1227" VPOS="2023" HPOS="361"><String WC="0.6323529482" CONTENT="zusammengewachsen" HEIGHT="43" WIDTH="350" VPOS="2028" HPOS="361"/><SP WIDTH="32" VPOS="2038" HPOS="711"/><String WC="0.7599999905" CONTENT="erscheinen" HEIGHT="44" WIDTH="163" VPOS="2027" HPOS="743"/><SP WIDTH="26" VPOS="2025" HPOS="906"/><String WC="0.8854545355" CONTENT="(Cosmarium," HEIGHT="44" WIDTH="238" VPOS="2025" HPOS="932"/><SP WIDTH="31" VPOS="2026" HPOS="1170"/><String WC="0.7774999738" CONTENT="Fig." HEIGHT="42" WIDTH="68" VPOS="2026" HPOS="1201"/><SP WIDTH="30" VPOS="2029" HPOS="1269"/><String WC="0.6140000224" CONTENT="134)," HEIGHT="43" WIDTH="84" VPOS="2024" HPOS="1299"/><SP WIDTH="34" VPOS="2035" HPOS="1383"/><String WC="0.6825000048" CONTENT="oder" HEIGHT="33" WIDTH="72" VPOS="2025" HPOS="1417"/><SP WIDTH="24" VPOS="2034" HPOS="1489"/><String WC="0.6833333373" CONTENT="man" HEIGHT="24" WIDTH="75" VPOS="2034" HPOS="1513"/></TextLine>
|
||||
<TextLine HEIGHT="47" WIDTH="1223" VPOS="2079" HPOS="365"><String WC="0.7799999714" CONTENT="findet" HEIGHT="41" WIDTH="94" VPOS="2083" HPOS="365"/><SP WIDTH="18" VPOS="2083" HPOS="460"/><String WC="0.8355555534" CONTENT="türkische" HEIGHT="44" WIDTH="142" VPOS="2082" HPOS="479"/><SP WIDTH="15" VPOS="2083" HPOS="622"/><String WC="0.6140000224" CONTENT="Halbmonde," HEIGHT="43" WIDTH="203" VPOS="2082" HPOS="638"/><SP WIDTH="20" VPOS="2083" HPOS="842"/><String WC="0.7233333588" CONTENT="die" HEIGHT="34" WIDTH="46" VPOS="2082" HPOS="863"/><SP WIDTH="21" VPOS="2092" HPOS="910"/><String WC="0.5899999738" CONTENT="genau" HEIGHT="33" WIDTH="101" VPOS="2091" HPOS="932"/><SP WIDTH="17" VPOS="2081" HPOS="1034"/><String WC="0.6620000005" CONTENT="durch" HEIGHT="43" WIDTH="86" VPOS="2081" HPOS="1052"/><SP WIDTH="20" VPOS="2081" HPOS="1139"/><String WC="0.6340000033" CONTENT="einen" HEIGHT="35" WIDTH="87" VPOS="2080" HPOS="1160"/><SP WIDTH="15" VPOS="2081" HPOS="1248"/><String WC="0.7910000086" CONTENT="Ouerstrich" HEIGHT="43" WIDTH="168" VPOS="2080" HPOS="1264"/><SP WIDTH="21" VPOS="2080" HPOS="1433"/><String WC="0.5950000286" CONTENT="halbiert" HEIGHT="44" WIDTH="133" VPOS="2079" HPOS="1455"/></TextLine>
|
||||
<TextLine HEIGHT="50" WIDTH="1222" VPOS="2133" HPOS="365"><String WC="0.5674999952" CONTENT="sind" HEIGHT="43" WIDTH="62" VPOS="2137" HPOS="365"/><SP WIDTH="37" VPOS="2137" HPOS="428"/><String WC="0.8000000119" CONTENT="und" HEIGHT="34" WIDTH="61" VPOS="2137" HPOS="466"/><SP WIDTH="38" VPOS="2136" HPOS="528"/><String WC="0.6499999762" CONTENT="an" HEIGHT="24" WIDTH="40" VPOS="2147" HPOS="567"/><SP WIDTH="33" VPOS="2137" HPOS="608"/><String WC="0.8183333278" CONTENT="beiden" HEIGHT="35" WIDTH="107" VPOS="2137" HPOS="642"/><SP WIDTH="34" VPOS="2138" HPOS="750"/><String WC="0.4499999881" CONTENT="Enden" HEIGHT="34" WIDTH="106" VPOS="2137" HPOS="785"/><SP WIDTH="36" VPOS="2137" HPOS="892"/><String WC="0.8600000143" CONTENT="je" HEIGHT="44" WIDTH="27" VPOS="2137" HPOS="929"/><SP WIDTH="34" VPOS="2146" HPOS="957"/><String WC="0.7225000262" CONTENT="eine" HEIGHT="34" WIDTH="64" VPOS="2136" HPOS="992"/><SP WIDTH="33" VPOS="2136" HPOS="1057"/><String WC="0.9139999747" CONTENT="kreisrunde" HEIGHT="35" WIDTH="180" VPOS="2135" HPOS="1091"/><SP WIDTH="33" VPOS="2135" HPOS="1272"/><String WC="0.6079999804" CONTENT="Blase" HEIGHT="43" WIDTH="89" VPOS="2135" HPOS="1306"/><SP WIDTH="33" VPOS="2145" HPOS="1396"/><String WC="0.7266666889" CONTENT="enthalten" HEIGHT="46" WIDTH="157" VPOS="2133" HPOS="1430"/></TextLine>
|
||||
</TextBlock><GraphicalElement ID="Page1_Block8" HEIGHT="184" WIDTH="8" VPOS="900" HPOS="1258"/><GraphicalElement ID="Page1_Block9" HEIGHT="90" WIDTH="3" VPOS="896" HPOS="1427"/><GraphicalElement ID="Page1_Block10" HEIGHT="146" WIDTH="7" VPOS="885" HPOS="1544"/>
|
||||
</PrintSpace>
|
||||
</Page>
|
||||
</Layout>
|
||||
</alto>
|
37
qurator/dinglehopper/tests/data/test.alto3.xml
Normal file
37
qurator/dinglehopper/tests/data/test.alto3.xml
Normal file
|
@ -0,0 +1,37 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<alto xmlns="http://www.loc.gov/standards/alto/ns-v3#">
|
||||
<Layout>
|
||||
<Page WIDTH="1148" HEIGHT="1852" PHYSICAL_IMG_NR="0" ID="page_0">
|
||||
<PrintSpace HPOS="0" VPOS="0" WIDTH="1148" HEIGHT="1852">
|
||||
<TextBlock ID="block_3" HPOS="135" VPOS="251" WIDTH="741" HEIGHT="47">
|
||||
<TextLine ID="line_3" HPOS="135" VPOS="251" WIDTH="741" HEIGHT="47">
|
||||
<String ID="string_5" HPOS="135" VPOS="251" WIDTH="65" HEIGHT="34" WC="0.89" CONTENT="über"/><SP WIDTH="19" VPOS="251" HPOS="200"/>
|
||||
<String ID="string_6" HPOS="219" VPOS="256" WIDTH="41" HEIGHT="31" WC="0.96" CONTENT="die"/><SP WIDTH="23" VPOS="256" HPOS="260"/>
|
||||
<String ID="string_7" HPOS="283" VPOS="258" WIDTH="87" HEIGHT="30" WC="0.87" CONTENT="vielen"/><SP WIDTH="16" VPOS="258" HPOS="370"/>
|
||||
<String ID="string_8" HPOS="386" VPOS="259" WIDTH="118" HEIGHT="37" WC="0.96" CONTENT="Sorgen"/><SP WIDTH="14" VPOS="259" HPOS="504"/>
|
||||
<String ID="string_9" HPOS="518" VPOS="265" WIDTH="90" HEIGHT="32" WC="0.21" CONTENT="wegen"/><SP WIDTH="12" VPOS="265" HPOS="608"/>
|
||||
<String ID="string_10" HPOS="620" VPOS="254" WIDTH="130" HEIGHT="42" WC="0.21" CONTENT="deſſelben"/><SP WIDTH="24" VPOS="254" HPOS="750"/>
|
||||
<String ID="string_11" HPOS="774" VPOS="255" WIDTH="102" HEIGHT="43" WC="0.74" CONTENT="vergaß"/>
|
||||
</TextLine>
|
||||
</TextBlock>
|
||||
<TextBlock ID="block_4" HPOS="134" VPOS="304" WIDTH="740" HEIGHT="40">
|
||||
<TextLine ID="line_4" HPOS="134" VPOS="304" WIDTH="740" HEIGHT="40">
|
||||
<String ID="string_12" HPOS="134" VPOS="304" WIDTH="203" HEIGHT="40" WC="0.75" CONTENT="Hartkopf,"/><SP WIDTH="30" VPOS="304" HPOS="337"/>
|
||||
<String ID="string_13" HPOS="367" VPOS="310" WIDTH="45" HEIGHT="27" WC="0.93" CONTENT="der"/><SP WIDTH="24" VPOS="310" HPOS="412"/>
|
||||
<String ID="string_14" HPOS="436" VPOS="309" WIDTH="74" HEIGHT="35" WC="0.59" CONTENT="Frau"/><SP WIDTH="22" VPOS="309" HPOS="510"/>
|
||||
<String ID="string_15" HPOS="532" VPOS="306" WIDTH="189" HEIGHT="36" WC="0.23" CONTENT="Amtmännin"/><SP WIDTH="16" VPOS="306" HPOS="721"/>
|
||||
<String ID="string_16" HPOS="737" VPOS="307" WIDTH="66" HEIGHT="34" WC="0.52" CONTENT="das"/><SP WIDTH="16" VPOS="307" HPOS="803"/>
|
||||
<String ID="string_17" HPOS="819" VPOS="318" WIDTH="55" HEIGHT="24" WC="0.0" CONTENT="ver-"/>
|
||||
</TextLine>
|
||||
</TextBlock>
|
||||
<TextBlock ID="block_5" HPOS="134" VPOS="356" WIDTH="761" HEIGHT="46">
|
||||
<TextLine ID="line_5" HPOS="134" VPOS="356" WIDTH="761" HEIGHT="46">
|
||||
<String ID="string_18" HPOS="134" VPOS="356" WIDTH="137" HEIGHT="37" WC="0.92" CONTENT="ſprochene"/><SP WIDTH="31" VPOS="356" HPOS="271"/>
|
||||
<String ID="string_19" HPOS="302" VPOS="365" WIDTH="32" HEIGHT="30" WC="0.73" CONTENT="zu"/><SP WIDTH="29" VPOS="365" HPOS="334"/>
|
||||
<String ID="string_20" HPOS="363" VPOS="356" WIDTH="170" HEIGHT="39" WC="0.52" CONTENT="überliefern."/><SP WIDTH="28" VPOS="356" HPOS="533"/>
|
||||
</TextLine>
|
||||
</TextBlock>
|
||||
</PrintSpace>
|
||||
</Page>
|
||||
</Layout>
|
||||
</alto>
|
3394
qurator/dinglehopper/tests/data/test.page2018.xml
Normal file
3394
qurator/dinglehopper/tests/data/test.page2018.xml
Normal file
File diff suppressed because it is too large
Load diff
1
qurator/dinglehopper/tests/data/test.txt
Normal file
1
qurator/dinglehopper/tests/data/test.txt
Normal file
|
@ -0,0 +1 @@
|
|||
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
|
63
qurator/dinglehopper/tests/test_align.py
Normal file
63
qurator/dinglehopper/tests/test_align.py
Normal file
|
@ -0,0 +1,63 @@
|
|||
from .util import unzip
|
||||
from .. import align
|
||||
|
||||
|
||||
def test_left_empty():
|
||||
result = list(align('', 'foo'))
|
||||
expected = [(None, 'f'), (None, 'o'), (None, 'o')]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_right_empty():
|
||||
result = list(align('foo', ''))
|
||||
expected = [('f', None), ('o', None), ('o', None)]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_left_longer():
|
||||
result = list(align('food', 'foo'))
|
||||
expected = [('f', 'f'), ('o', 'o'), ('o', 'o'), ('d', None)]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_right_longer():
|
||||
result = list(align('foo', 'food'))
|
||||
expected = [('f', 'f'), ('o', 'o'), ('o', 'o'), (None, 'd')]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_some_diff():
|
||||
result = list(align('abcde', 'aaadef'))
|
||||
left, right = unzip(result)
|
||||
assert list(left) == ['a', 'b', 'c', 'd', 'e', None]
|
||||
assert list(right) == ['a', 'a', 'a', 'd', 'e', 'f']
|
||||
|
||||
|
||||
def test_longer():
|
||||
s1 = 'Dies ist eine Tst!'
|
||||
s2 = 'Dies ist ein Test.'
|
||||
|
||||
result = list(align(s1, s2)) # ; diffprint(*unzip(result))
|
||||
expected = [('D', 'D'), ('i', 'i'), ('e', 'e'), ('s', 's'), (' ', ' '),
|
||||
('i', 'i'), ('s', 's'), ('t', 't'), (' ', ' '),
|
||||
('e', 'e'), ('i', 'i'), ('n', 'n'), ('e', None), (' ', ' '),
|
||||
('T', 'T'), (None, 'e'), ('s', 's'), ('t', 't'), ('!', '.')]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_completely_different():
|
||||
assert len(list(align('abcde', 'fghij'))) == 5
|
||||
|
||||
|
||||
def test_with_some_fake_ocr_errors():
|
||||
result = list(align('Über die vielen Sorgen wegen desselben vergaß',
|
||||
'SomeJunk MoreJunk Übey die vielen Sorgen wegen AdditionalJunk deffelben vcrgab'))
|
||||
left, right = unzip(result)
|
||||
|
||||
# Beginning
|
||||
assert list(left[:18]) == [None]*18
|
||||
assert list(right[:18]) == list('SomeJunk MoreJunk ')
|
||||
|
||||
# End
|
||||
assert list(left[-1:]) == ['ß']
|
||||
assert list(right[-1:]) == ['b']
|
37
qurator/dinglehopper/tests/test_character_error_rate.py
Normal file
37
qurator/dinglehopper/tests/test_character_error_rate.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
from __future__ import division, print_function
|
||||
|
||||
import math
|
||||
import unicodedata
|
||||
|
||||
from .. import character_error_rate
|
||||
|
||||
|
||||
def test_character_error_rate():
|
||||
assert character_error_rate('a', 'a') == 0
|
||||
assert character_error_rate('a', 'b') == 1/1
|
||||
assert character_error_rate('Foo', 'Bar') == 3/3
|
||||
|
||||
assert character_error_rate('Foo', '') == 3/3
|
||||
|
||||
assert character_error_rate('', '') == 0
|
||||
assert math.isinf(character_error_rate('', 'Foo'))
|
||||
|
||||
assert character_error_rate('Foo', 'Food') == 1/3
|
||||
assert character_error_rate('Fnord', 'Food') == 2/5
|
||||
assert character_error_rate('Müll', 'Mull') == 1/4
|
||||
assert character_error_rate('Abstand', 'Sand') == 4/7
|
||||
|
||||
|
||||
def test_character_error_rate_hard():
|
||||
s1 = unicodedata.normalize('NFC', 'Schlyñ lorem ipsum.')
|
||||
s2 = unicodedata.normalize('NFD', 'Schlyñ lorem ipsum!') # Different, decomposed!
|
||||
assert character_error_rate(s1, s2) == 1/19
|
||||
|
||||
s1 = 'Schlyñ'
|
||||
assert len(s1) == 6 # This ends with LATIN SMALL LETTER N WITH TILDE, so 6 code points
|
||||
s2 = 'Schlym̃'
|
||||
assert len(s2) == 7 # This, OTOH, ends with LATIN SMALL LETTER M + COMBINING TILDE, 7 code points
|
||||
|
||||
# Both strings have the same length in terms of grapheme clusters. So the CER should be symmetrical.
|
||||
assert character_error_rate(s2, s1) == 1/6
|
||||
assert character_error_rate(s1, s2) == 1/6
|
40
qurator/dinglehopper/tests/test_edit_distance.py
Normal file
40
qurator/dinglehopper/tests/test_edit_distance.py
Normal file
|
@ -0,0 +1,40 @@
|
|||
from __future__ import division, print_function
|
||||
|
||||
import unicodedata
|
||||
|
||||
from .. import levenshtein, distance
|
||||
|
||||
|
||||
def test_levenshtein():
|
||||
assert levenshtein('a', 'a') == 0
|
||||
assert levenshtein('a', 'b') == 1
|
||||
assert levenshtein('Foo', 'Bar') == 3
|
||||
|
||||
assert levenshtein('', '') == 0
|
||||
assert levenshtein('Foo', '') == 3
|
||||
assert levenshtein('', 'Foo') == 3
|
||||
|
||||
assert levenshtein('Foo', 'Food') == 1
|
||||
assert levenshtein('Fnord', 'Food') == 2
|
||||
assert levenshtein('Müll', 'Mull') == 1
|
||||
assert levenshtein('Abstand', 'Sand') == 4
|
||||
|
||||
|
||||
def test_levenshtein_other_sequences():
|
||||
assert levenshtein(['a', 'ab'], ['a', 'ab', 'c']) == 1
|
||||
assert levenshtein(['a', 'ab'], ['a', 'c']) == 1
|
||||
|
||||
|
||||
def test_distance():
|
||||
assert distance('Fnord', 'Food') == 2
|
||||
assert distance('Müll', 'Mull') == 1
|
||||
|
||||
word1 = unicodedata.normalize('NFC', 'Schlyñ')
|
||||
word2 = unicodedata.normalize('NFD', 'Schlyñ') # Different, decomposed!
|
||||
assert distance(word1, word2) == 0
|
||||
|
||||
word1 = 'Schlyñ'
|
||||
assert len(word1) == 6 # This ends with LATIN SMALL LETTER N WITH TILDE, so 6 code points
|
||||
word2 = 'Schlym̃'
|
||||
assert len(word2) == 7 # This, OTOH, ends with LATIN SMALL LETTER M + COMBINING TILDE, 7 code points
|
||||
assert distance(word1, word2) == 1
|
38
qurator/dinglehopper/tests/test_editops.py
Normal file
38
qurator/dinglehopper/tests/test_editops.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
from .. import seq_editops, editops
|
||||
|
||||
|
||||
def test_trivial():
|
||||
assert seq_editops('abc', 'abc') == []
|
||||
assert seq_editops('', '') == []
|
||||
|
||||
|
||||
def test_insert():
|
||||
assert seq_editops('bc', 'abc') == [('insert', 0, 0)]
|
||||
assert seq_editops('ac', 'abc') == [('insert', 1, 1)]
|
||||
assert seq_editops('ab', 'abc') == [('insert', 2, 2)]
|
||||
assert seq_editops('', 'a') == [('insert', 0, 0)]
|
||||
|
||||
|
||||
def test_multiple():
|
||||
assert seq_editops('bcd', 'abce') == [('insert', 0, 0), ('replace', 2, 3)]
|
||||
|
||||
|
||||
def test_delete():
|
||||
assert seq_editops('abcdef', 'cdef') == [('delete', 0, 0), ('delete', 1, 0)]
|
||||
assert seq_editops('Xabcdef', 'Xcdef') == [('delete', 1, 1), ('delete', 2, 1)]
|
||||
assert seq_editops('abcdefg', 'acdefX') == [('delete', 1, 1), ('replace', 6, 5)]
|
||||
assert seq_editops('abcde', 'aabcd') == [('insert', 1, 1), ('delete', 4, 5)]
|
||||
assert seq_editops('Foo', '') == [('delete', 0, 0), ('delete', 1, 0), ('delete', 2, 0)]
|
||||
assert seq_editops('Foolish', 'Foo') == [('delete', 3, 3), ('delete', 4, 3), ('delete', 5, 3), ('delete', 6, 3)]
|
||||
|
||||
|
||||
def test_ambiguous():
|
||||
assert seq_editops('bcd', 'abcef') == [('insert', 0, 0), ('replace', 2, 3), ('insert', 3, 4)]
|
||||
|
||||
|
||||
def test_editops():
|
||||
"""Test editops() in cases where dealing with grapheme clusters matters"""
|
||||
|
||||
# In these cases, one of the words has a composed form, the other one does not.
|
||||
assert editops('Schlyñ', 'Schlym̃') == [('replace', 5, 5)]
|
||||
assert editops('oͤde', 'öde') == [('replace', 0, 0)]
|
23
qurator/dinglehopper/tests/test_integ_align.py
Normal file
23
qurator/dinglehopper/tests/test_integ_align.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
from __future__ import division, print_function
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from lxml import etree as ET
|
||||
|
||||
from .. import align, page_text
|
||||
|
||||
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_align_page_files():
|
||||
# In the fake OCR file, we changed 2 characters and replaced a fi ligature with fi.
|
||||
# → 4 elements in the alignment should be different.
|
||||
# NOTE: In this example, it doesn't matter that we work with "characters", not grapheme clusters.
|
||||
|
||||
gt = page_text(ET.parse(os.path.join(data_dir, 'test-gt.page2018.xml')))
|
||||
ocr = page_text(ET.parse(os.path.join(data_dir, 'test-fake-ocr.page2018.xml')))
|
||||
|
||||
result = list(align(gt, ocr))
|
||||
assert sum(left != right for left, right in result) == 4
|
|
@ -0,0 +1,35 @@
|
|||
from __future__ import division, print_function
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from lxml import etree as ET
|
||||
|
||||
from .. import character_error_rate, page_text, alto_text
|
||||
|
||||
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_character_error_rate_between_page_files():
|
||||
# In the fake OCR file, we changed 2 characters and replaced a fi ligature with fi.
|
||||
gt = page_text(ET.parse(os.path.join(data_dir, 'test-gt.page2018.xml')))
|
||||
ocr = page_text(ET.parse(os.path.join(data_dir, 'test-fake-ocr.page2018.xml')))
|
||||
assert character_error_rate(gt, ocr) == 4/(470 + 1 + 311) # 2 TextRegions, 1 \n
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_character_error_rate_between_page_alto():
|
||||
gt = page_text(ET.parse(os.path.join(data_dir, 'lorem-ipsum', 'lorem-ipsum-scan.gt.page.xml')))
|
||||
ocr = alto_text(ET.parse(os.path.join(data_dir, 'lorem-ipsum', 'lorem-ipsum-scan.ocr.tesseract.alto.xml')))
|
||||
|
||||
assert gt == ocr
|
||||
assert character_error_rate(gt, ocr) == 0
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_character_error_rate_between_page_alto_2():
|
||||
gt = page_text(ET.parse(os.path.join(data_dir, 'lorem-ipsum', 'lorem-ipsum-scan-bad.gt.page.xml')))
|
||||
ocr = alto_text(ET.parse(os.path.join(data_dir, 'lorem-ipsum', 'lorem-ipsum-scan-bad.ocr.tesseract.alto.xml')))
|
||||
|
||||
assert character_error_rate(gt, ocr) == 8/591 # Manually verified
|
35
qurator/dinglehopper/tests/test_integ_edit_distance_ocr.py
Normal file
35
qurator/dinglehopper/tests/test_integ_edit_distance_ocr.py
Normal file
|
@ -0,0 +1,35 @@
|
|||
from __future__ import division, print_function
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from lxml import etree as ET
|
||||
|
||||
from .. import distance, page_text, alto_text
|
||||
|
||||
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_distance_between_page_files():
|
||||
# In the fake OCR file, we changed 2 characters and replaced a fi ligature with fi.
|
||||
gt = page_text(ET.parse(os.path.join(data_dir, 'test-gt.page2018.xml')))
|
||||
ocr = page_text(ET.parse(os.path.join(data_dir, 'test-fake-ocr.page2018.xml')))
|
||||
assert distance(gt, ocr) == 4
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_distance_between_page_alto():
|
||||
gt = page_text(ET.parse(os.path.join(data_dir, 'lorem-ipsum', 'lorem-ipsum-scan.gt.page.xml')))
|
||||
ocr = alto_text(ET.parse(os.path.join(data_dir, 'lorem-ipsum', 'lorem-ipsum-scan.ocr.tesseract.alto.xml')))
|
||||
|
||||
assert gt == ocr
|
||||
assert distance(gt, ocr) == 0
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_distance_between_page_alto_2():
|
||||
gt = page_text(ET.parse(os.path.join(data_dir, 'lorem-ipsum', 'lorem-ipsum-scan-bad.gt.page.xml')))
|
||||
ocr = alto_text(ET.parse(os.path.join(data_dir, 'lorem-ipsum', 'lorem-ipsum-scan-bad.ocr.tesseract.alto.xml')))
|
||||
|
||||
assert distance(gt, ocr) == 8 # Manually verified
|
43
qurator/dinglehopper/tests/test_integ_word_error_rate_ocr.py
Normal file
43
qurator/dinglehopper/tests/test_integ_word_error_rate_ocr.py
Normal file
|
@ -0,0 +1,43 @@
|
|||
from __future__ import division, print_function
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from lxml import etree as ET
|
||||
|
||||
from .. import word_error_rate, words, page_text, alto_text
|
||||
|
||||
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_word_error_rate_between_page_files():
|
||||
# In the fake OCR file, we changed 2 characters and replaced a fi ligature with fi. → 3 changed words
|
||||
gt = page_text(ET.parse(os.path.join(data_dir, 'test-gt.page2018.xml')))
|
||||
|
||||
gt_word_count = 7+6+5+8+7+6+7+8+6+7+7+5+6+8+8+7+7+6+5+4 # Manually verified word count per line
|
||||
assert len(list(words(gt))) == gt_word_count
|
||||
|
||||
ocr = page_text(ET.parse(os.path.join(data_dir, 'test-fake-ocr.page2018.xml')))
|
||||
assert word_error_rate(gt, ocr) == 3/gt_word_count
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_word_error_rate_between_page_alto():
|
||||
gt = page_text(ET.parse(os.path.join(data_dir, 'lorem-ipsum', 'lorem-ipsum-scan.gt.page.xml')))
|
||||
ocr = alto_text(ET.parse(os.path.join(data_dir, 'lorem-ipsum', 'lorem-ipsum-scan.ocr.tesseract.alto.xml')))
|
||||
|
||||
assert gt == ocr
|
||||
assert word_error_rate(gt, ocr) == 0
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_word_error_rate_between_page_alto_2():
|
||||
gt = page_text(ET.parse(os.path.join(data_dir, 'lorem-ipsum', 'lorem-ipsum-scan-bad.gt.page.xml')))
|
||||
|
||||
gt_word_count = 14+18+17+14+17+17+3 # Manually verified word count per line
|
||||
assert len(list(words(gt))) == gt_word_count
|
||||
|
||||
ocr = alto_text(ET.parse(os.path.join(data_dir, 'lorem-ipsum', 'lorem-ipsum-scan-bad.ocr.tesseract.alto.xml')))
|
||||
|
||||
assert word_error_rate(gt, ocr) == 7/gt_word_count # Manually verified, 6 words are wrong, 1 got split (=2 errors)
|
99
qurator/dinglehopper/tests/test_ocr_files.py
Normal file
99
qurator/dinglehopper/tests/test_ocr_files.py
Normal file
|
@ -0,0 +1,99 @@
|
|||
import os
|
||||
import re
|
||||
|
||||
import lxml.etree as ET
|
||||
import textwrap
|
||||
|
||||
from .. import alto_namespace, alto_text, page_namespace, page_text, text
|
||||
|
||||
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
|
||||
|
||||
|
||||
def test_alto_namespace():
|
||||
tree = ET.parse(os.path.join(data_dir, 'test.alto3.xml'))
|
||||
assert alto_namespace(tree) == 'http://www.loc.gov/standards/alto/ns-v3#'
|
||||
|
||||
|
||||
def test_alto_text():
|
||||
tree = ET.parse(os.path.join(data_dir, 'test.alto3.xml'))
|
||||
result = alto_text(tree)
|
||||
expected = textwrap.dedent("""\
|
||||
über die vielen Sorgen wegen deſſelben vergaß
|
||||
Hartkopf, der Frau Amtmännin das ver-
|
||||
ſprochene zu überliefern.""")
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_alto_text_ALTO1():
|
||||
tree = ET.parse(os.path.join(data_dir, 'test.alto1.xml'))
|
||||
assert "being erected at the Broadway stock" in alto_text(tree)
|
||||
|
||||
|
||||
def test_alto_text_ALTO2():
|
||||
tree = ET.parse(os.path.join(data_dir, 'test.alto2.xml'))
|
||||
assert "Halbmonde, die genau durch einen Ouerstrich halbiert\nsind und an beiden Enden" in alto_text(tree)
|
||||
|
||||
|
||||
def test_alto_text_ALTO3():
|
||||
tree = ET.parse(os.path.join(data_dir, 'test.alto3.xml'))
|
||||
assert "über die vielen Sorgen wegen deſſelben vergaß" in alto_text(tree)
|
||||
|
||||
|
||||
def test_page_namespace():
|
||||
tree = ET.parse(os.path.join(data_dir, 'test.page2018.xml'))
|
||||
assert page_namespace(tree) == 'http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15'
|
||||
|
||||
|
||||
def test_page_test():
|
||||
tree = ET.parse(os.path.join(data_dir, 'test.page2018.xml'))
|
||||
result = page_text(tree)
|
||||
expected = textwrap.dedent("""\
|
||||
ber die vielen Sorgen wegen deelben vergaß
|
||||
Hartkopf, der Frau Amtmnnin das ver⸗
|
||||
ſproene zu berliefern. — Ein Erpreer
|
||||
wurde an ihn abgeſit, um ihn ums Him⸗
|
||||
melswien zu ſagen, daß er das Verſproene
|
||||
glei den Augenbli berbringen mte, die
|
||||
Frau Amtmnnin htte auf ihn verlaen,
|
||||
und nun wßte e nit, was e anfangen
|
||||
ſote. Den Augenbli ſote er kommen,
|
||||
ſon vergieng e in ihrer Ang. — Die
|
||||
Ge wren ſon angekommen, und es fehlte
|
||||
ihr do no an aem. —
|
||||
Hartkopf mußte er bennen, und
|
||||
endli na langem Nadenken fiel es ihm er
|
||||
wieder ein. — Er langte den Zettel aus dem
|
||||
Accisbue heraus, und ſagte ſeiner Frau, daß
|
||||
e das, was da wre, herbeyſaffen mte.
|
||||
Jndeß mangelten do einige Generalia, die
|
||||
alſo wegfielen. — Hartkopf gieng ſelb
|
||||
mit und berbrate es. —""")
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_page_with_empty_region():
|
||||
# This file contains an empty TextRegion:
|
||||
#
|
||||
# <TextRegion id="region0000">
|
||||
# <Coords points="488,133 1197,133 1197,193 488,193"/>
|
||||
# <TextEquiv>
|
||||
# <Unicode></Unicode>
|
||||
# </TextEquiv>
|
||||
# </TextRegion>
|
||||
tree = ET.parse(os.path.join(data_dir, 'brochrnx_73075507X/00000139.ocrd-tess.ocr.page.xml'))
|
||||
result = page_text(tree)
|
||||
assert result
|
||||
|
||||
|
||||
def test_page_order():
|
||||
# This file contains TextRegions where file order is not the same as reading order.
|
||||
tree = ET.parse(os.path.join(data_dir, 'order.page.xml'))
|
||||
result = page_text(tree)
|
||||
|
||||
assert re.search(r'Herr Konfrater.*75.*Etwas f.r Wittwen.*Ein gewi.er Lord.*76\. Die', result, re.DOTALL)
|
||||
|
||||
|
||||
def test_text():
|
||||
assert "being erected at the Broadway stock" in text(os.path.join(data_dir, 'test.alto1.xml'))
|
||||
assert "wieder ein. — Er langte den Zettel aus dem" in text(os.path.join(data_dir, 'test.page2018.xml'))
|
||||
assert "Lorem ipsum" in text(os.path.join(data_dir, 'test.txt'))
|
45
qurator/dinglehopper/tests/test_word_error_rate.py
Normal file
45
qurator/dinglehopper/tests/test_word_error_rate.py
Normal file
|
@ -0,0 +1,45 @@
|
|||
from __future__ import division, print_function
|
||||
|
||||
import math
|
||||
|
||||
from .. import word_error_rate, words, unordered_word_error_rate
|
||||
|
||||
|
||||
def test_words():
|
||||
result = list(words('Der schnelle [„braune“] Fuchs kann keine 3,14 Meter springen, oder?'))
|
||||
expected = ['Der', 'schnelle', 'braune', 'Fuchs', 'kann', 'keine', '3,14', 'Meter', 'springen', 'oder']
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_words_private_use_area():
|
||||
result = list(words(
|
||||
'ber die vielen Sorgen wegen deelben vergaß Hartkopf, der Frau Amtmnnin das ver⸗\n'
|
||||
'ſproene zu berliefern.'))
|
||||
expected = [
|
||||
'ber', 'die', 'vielen', 'Sorgen', 'wegen', 'deelben', 'vergaß', 'Hartkopf',
|
||||
'der', 'Frau', 'Amtmnnin', 'das', 'ver',
|
||||
'ſproene', 'zu', 'berliefern']
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_word_error_rate():
|
||||
assert word_error_rate('Dies ist ein Beispielsatz!', 'Dies ist ein Beispielsatz!') == 0
|
||||
assert word_error_rate('Dies. ist ein Beispielsatz!', 'Dies ist ein Beispielsatz!') == 0
|
||||
assert word_error_rate('Dies. ist ein Beispielsatz!', 'Dies ist ein Beispielsatz.') == 0
|
||||
|
||||
assert word_error_rate('Dies ist ein Beispielsatz!', 'Dies ist ein Beispielsarz:') == 1/4
|
||||
assert word_error_rate('Dies ist ein Beispielsatz!', 'Dies ein ist Beispielsatz!') == 2/4
|
||||
|
||||
assert word_error_rate('Dies ist ein Beispielsatz!', '') == 4/4
|
||||
assert math.isinf(word_error_rate('', 'Dies ist ein Beispielsatz!'))
|
||||
assert word_error_rate('', '') == 0
|
||||
|
||||
assert word_error_rate('Schlyñ lorem ipsum dolor sit amet,', 'Schlym̃ lorem ipsum dolor sit amet.') == 1/6
|
||||
|
||||
|
||||
def test_unordered_word_error_rate():
|
||||
assert unordered_word_error_rate('abc def ghi', 'ghi abc def') == 0
|
||||
assert unordered_word_error_rate('abc def ghi', 'ghi abcX def') == 1/3
|
||||
assert unordered_word_error_rate('abc def ghi jkl', 'abc ghi def jkl') == 0
|
||||
assert unordered_word_error_rate('abc def ghi jkl', 'abc ghi defX jkl') == 1/4
|
||||
# XXX There seem to be some cases where this does not work
|
24
qurator/dinglehopper/tests/util.py
Normal file
24
qurator/dinglehopper/tests/util.py
Normal file
|
@ -0,0 +1,24 @@
|
|||
from itertools import zip_longest
|
||||
from typing import Iterable
|
||||
|
||||
import colorama
|
||||
|
||||
|
||||
def diffprint(x, y):
|
||||
"""Print elements or lists x and y, with differences in red"""
|
||||
|
||||
def _diffprint(x, y):
|
||||
if x != y:
|
||||
print(colorama.Fore.RED, x, y, colorama.Fore.RESET)
|
||||
else:
|
||||
print(x, y)
|
||||
|
||||
if isinstance(x, Iterable):
|
||||
for xe, ye in zip_longest(x, y):
|
||||
_diffprint(xe, ye)
|
||||
else:
|
||||
_diffprint(x, y)
|
||||
|
||||
|
||||
def unzip(l):
|
||||
return zip(*l)
|
Loading…
Add table
Add a link
Reference in a new issue