🎨 Extract methods to fix lines + words

pull/1/head
Gerber, Mike 5 years ago
parent 8d21cd8ab9
commit c7033a5d4d

@ -42,20 +42,9 @@ class RepairInconsistencies(Processor):
kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL] kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL]
super(RepairInconsistencies, self).__init__(*args, **kwargs) super(RepairInconsistencies, self).__init__(*args, **kwargs)
def process(self): def _fix_words(self, line):
for (n, input_file) in enumerate(self.input_files): """Fix word order in a line"""
page_id = input_file.pageId or input_file.ID
LOG.info("INPUT FILE %i / %s", n, page_id)
pcgts = page_from_file(self.workspace.download_file(input_file))
page = pcgts.get_Page()
regions = page.get_TextRegion()
for region in regions:
lines = region.get_TextLine()
for line in lines:
# Fix words in lines
words = line.get_Word() words = line.get_Word()
line_text = get_text(line) line_text = get_text(line)
words_text = get_text(words, ' ') words_text = get_text(words, ' ')
@ -68,10 +57,9 @@ class RepairInconsistencies(Processor):
LOG.info('Fixing word order of line "%s"', line.id) LOG.info('Fixing word order of line "%s"', line.id)
line.set_Word(sorted_words) line.set_Word(sorted_words)
words = line.get_Word() def _fix_glyphs(self, word):
for word in words: """Fix glyph order in a word"""
# Fix glyphs in words
glyphs = word.get_Glyph() glyphs = word.get_Glyph()
word_text = get_text(word) word_text = get_text(word)
glyphs_text = get_text(glyphs, '') glyphs_text = get_text(glyphs, '')
@ -84,6 +72,25 @@ class RepairInconsistencies(Processor):
LOG.info('Fixing glyph order of word "%s"', word.id) LOG.info('Fixing glyph order of word "%s"', word.id)
word.set_Glyph(sorted_glyphs) word.set_Glyph(sorted_glyphs)
def process(self):
for (n, input_file) in enumerate(self.input_files):
page_id = input_file.pageId or input_file.ID
LOG.info("INPUT FILE %i / %s", n, page_id)
pcgts = page_from_file(self.workspace.download_file(input_file))
page = pcgts.get_Page()
regions = page.get_TextRegion()
for region in regions:
lines = region.get_TextLine()
for line in lines:
self._fix_words(line)
words = line.get_Word()
for word in words:
self._fix_glyphs(word)
file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp) file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp)
if file_id == input_file.ID: if file_id == input_file.ID:
file_id = concat_padded(self.output_file_grp, n) file_id = concat_padded(self.output_file_grp, n)

Loading…
Cancel
Save