mirror of
				https://github.com/qurator-spk/dinglehopper.git
				synced 2025-11-04 03:14:16 +01:00 
			
		
		
		
	🚧 dinglehopper: WIP data structure for extracted text
This commit is contained in:
		
							parent
							
								
									91371971eb
								
							
						
					
					
						commit
						eca8cbc81e
					
				
					 2 changed files with 3 additions and 3 deletions
				
			
		| 
						 | 
					@ -4,14 +4,14 @@ import enum
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# TODO handle grapheme cluster positions?
 | 
					# TODO handle grapheme cluster positions?
 | 
				
			||||||
 | 
					# TODO Use type annotations for attr.ib types when support for Python 3.5 is dropped
 | 
				
			||||||
 | 
					# TODO types are not validated (attr does not do this yet)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@attr.s(frozen=True)
 | 
					@attr.s(frozen=True)
 | 
				
			||||||
class ExtractedText:
 | 
					class ExtractedText:
 | 
				
			||||||
    segments = attr.ib()
 | 
					    segments = attr.ib()
 | 
				
			||||||
    joiner = attr.ib(type=str)
 | 
					    joiner = attr.ib(type=str)
 | 
				
			||||||
    # XXX Use type annotations for attr types when support for Python 3.5 is dropped
 | 
					 | 
				
			||||||
    # XXX Also I think these are not validated?
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @property
 | 
					    @property
 | 
				
			||||||
    def text(self):
 | 
					    def text(self):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -6,7 +6,7 @@ from extracted_text import ExtractedText, ExtractedTextSegment
 | 
				
			||||||
def test_text():
 | 
					def test_text():
 | 
				
			||||||
    test1 = ExtractedText([
 | 
					    test1 = ExtractedText([
 | 
				
			||||||
        ExtractedTextSegment('s0', 'foo'),
 | 
					        ExtractedTextSegment('s0', 'foo'),
 | 
				
			||||||
        ExtractedTextSegment(1, 'bar'),
 | 
					        ExtractedTextSegment('s1', 'bar'),
 | 
				
			||||||
        ExtractedTextSegment('s2', 'bazinga')
 | 
					        ExtractedTextSegment('s2', 'bazinga')
 | 
				
			||||||
    ], ' ')
 | 
					    ], ' ')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue