mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-06-09 03:40:12 +02:00
✨ dinglehopper: Give segment ids from the extracted textequiv_level
This commit is contained in:
parent
b23e4ce30e
commit
b23b75b601
1 changed files with 11 additions and 5 deletions
|
@ -157,11 +157,17 @@ class ExtractedText:
|
|||
def segment_id_for_pos(self, pos):
|
||||
# Calculate segment ids once, on the first call
|
||||
if not self._segment_id_for_pos:
|
||||
segment_id_for_pos = []
|
||||
for s in self.segments:
|
||||
segment_id_for_pos.extend(repeat(s.segment_id, len(s.text)))
|
||||
segment_id_for_pos.extend(repeat(None, len(self.joiner)))
|
||||
segment_id_for_pos = segment_id_for_pos[:-len(self.joiner)]
|
||||
if self._text is not None:
|
||||
segment_id_for_pos = list(repeat(self.segment_id, len(self._text)))
|
||||
else:
|
||||
# Recurse
|
||||
segment_id_for_pos = []
|
||||
for s in self.segments:
|
||||
seg_ids = [s.segment_id_for_pos(i) for i in range(len(s.text))]
|
||||
segment_id_for_pos.extend(seg_ids)
|
||||
segment_id_for_pos.extend(repeat(None, len(self.joiner)))
|
||||
segment_id_for_pos = segment_id_for_pos[:-len(self.joiner)]
|
||||
|
||||
# This is frozen, so we have to jump through the hoop:
|
||||
object.__setattr__(self, '_segment_id_for_pos', segment_id_for_pos)
|
||||
assert self._segment_id_for_pos
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue