mirror of
				https://github.com/mikegerber/ocrd_calamari.git
				synced 2025-11-04 01:24:14 +01:00 
			
		
		
		
	✨ Allow controlling of output hierarchy level, e.g. only line, not words+glyphs
This commit is contained in:
		
							parent
							
								
									0f0bae18ba
								
							
						
					
					
						commit
						ef3fb44fb5
					
				
					 4 changed files with 73 additions and 37 deletions
				
			
		
							
								
								
									
										12
									
								
								README.md
									
										
									
									
									
								
							
							
						
						
									
										12
									
								
								README.md
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -13,10 +13,11 @@ This offers a OCR-D compliant workspace processor for some of the functionality
 | 
			
		|||
This processor only operates on the text line level and so needs a line segmentation (and by extension a binarized 
 | 
			
		||||
image) as its input.
 | 
			
		||||
 | 
			
		||||
In addition to the line text it also outputs glyph segmentation including
 | 
			
		||||
per-glyph confidence values and per-glyph alternative predictions as provided by
 | 
			
		||||
the Calamari OCR engine. Note that while Calamari does not provide word
 | 
			
		||||
segmentation, this processor produces word segmentation inferred from text
 | 
			
		||||
In addition to the line text it may also output word and glyph segmentation
 | 
			
		||||
including per-glyph confidence values and per-glyph alternative predictions as
 | 
			
		||||
provided by the Calamari OCR engine, using a `textequiv_level` of `word` or
 | 
			
		||||
`glyph`. Note that while Calamari does not provide word segmentation, this
 | 
			
		||||
processor produces word segmentation inferred from text
 | 
			
		||||
segmentation and the glyph positions. The provided glyph and word segmentation
 | 
			
		||||
can be used for text extraction and highlighting, but is probably not useful for
 | 
			
		||||
further image-based processing.
 | 
			
		||||
| 
						 | 
				
			
			@ -53,7 +54,8 @@ ocrd-calamari-recognize -p test-parameters.json -m mets.xml -I OCR-D-SEG-LINE -O
 | 
			
		|||
With `test-parameters.json`:
 | 
			
		||||
~~~
 | 
			
		||||
{
 | 
			
		||||
    "checkpoint": "/path/to/some/trained/models/*.ckpt.json"
 | 
			
		||||
    "checkpoint": "/path/to/some/trained/models/*.ckpt.json",
 | 
			
		||||
    "textequiv_level": "line",
 | 
			
		||||
}
 | 
			
		||||
~~~
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -25,6 +25,12 @@
 | 
			
		|||
        "voter": {
 | 
			
		||||
          "description": "The voting algorithm to use",
 | 
			
		||||
          "type": "string", "default": "confidence_voter_default_ctc"
 | 
			
		||||
        },
 | 
			
		||||
        "textequiv_level": {
 | 
			
		||||
          "type": "string",
 | 
			
		||||
          "enum": ["line", "word", "glyph"],
 | 
			
		||||
          "default": "line",
 | 
			
		||||
          "description": "Deepest PAGE XML hierarchy level to include TextEquiv results for"
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -120,6 +120,7 @@ class CalamariRecognize(Processor):
 | 
			
		|||
                                spaces = (c == ' ')
 | 
			
		||||
                        yield word
 | 
			
		||||
 | 
			
		||||
                    if self.parameter['textequiv_level'] in ['word', 'glyph']:
 | 
			
		||||
                        word_no = 0
 | 
			
		||||
                        i = 0
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -137,6 +138,7 @@ class CalamariRecognize(Processor):
 | 
			
		|||
                                word = WordType(id='%s_word%04d' % (line.id, word_no), Coords=CoordsType(points))
 | 
			
		||||
                                word.add_TextEquiv(TextEquivType(Unicode=word_text))
 | 
			
		||||
 | 
			
		||||
                                if self.parameter['textequiv_level'] == 'glyph':
 | 
			
		||||
                                    for glyph_no, p in enumerate(word_positions):
 | 
			
		||||
                                        glyph_start = p.global_start
 | 
			
		||||
                                        glyph_end = p.global_end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -87,6 +87,7 @@ def test_word_segmentation(workspace):
 | 
			
		|||
        output_file_grp="OCR-D-OCR-CALAMARI",
 | 
			
		||||
        parameter={
 | 
			
		||||
            "checkpoint": CHECKPOINT,
 | 
			
		||||
            "textequiv_level": "word",   # Note that we're going down to word level here
 | 
			
		||||
        }
 | 
			
		||||
    ).process()
 | 
			
		||||
    workspace.save_mets()
 | 
			
		||||
| 
						 | 
				
			
			@ -106,5 +107,30 @@ def test_word_segmentation(workspace):
 | 
			
		|||
    line_text = line.xpath("pc:TextEquiv/pc:Unicode", namespaces=NSMAP)[0].text
 | 
			
		||||
    assert words_text == line_text
 | 
			
		||||
 | 
			
		||||
    # For extra measure, check that we're not seeing any glyphs, as we asked for textequiv_level == "word"
 | 
			
		||||
    glyphs = tree.xpath("//pc:Glyph", namespaces=NSMAP)
 | 
			
		||||
    assert len(glyphs) == 0
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_glyphs(workspace):
 | 
			
		||||
    CalamariRecognize(
 | 
			
		||||
        workspace,
 | 
			
		||||
        input_file_grp="OCR-D-GT-SEG-LINE",
 | 
			
		||||
        output_file_grp="OCR-D-OCR-CALAMARI",
 | 
			
		||||
        parameter={
 | 
			
		||||
            "checkpoint": CHECKPOINT,
 | 
			
		||||
            "textequiv_level": "glyph",   # Note that we're going down to glyph level here
 | 
			
		||||
        }
 | 
			
		||||
    ).process()
 | 
			
		||||
    workspace.save_mets()
 | 
			
		||||
 | 
			
		||||
    page1 = os.path.join(workspace.directory, "OCR-D-OCR-CALAMARI/OCR-D-OCR-CALAMARI_0001.xml")
 | 
			
		||||
    assert os.path.exists(page1)
 | 
			
		||||
    tree = etree.parse(page1)
 | 
			
		||||
 | 
			
		||||
    # The result should contain a lot of glyphs
 | 
			
		||||
    glyphs = tree.xpath("//pc:Glyph", namespaces=NSMAP)
 | 
			
		||||
    assert len(glyphs) >= 100
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# vim:tw=120:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue