mirror of
				https://github.com/qurator-spk/sbb_textline_detection.git
				synced 2025-10-31 17:34:16 +01:00 
			
		
		
		
	resolving issue https://github.com/qurator-spk/sbb_textline_detection/issues/53
This commit is contained in:
		
							parent
							
								
									17069f72d2
								
							
						
					
					
						commit
						4c498fcad2
					
				
					 1 changed files with 82 additions and 75 deletions
				
			
		|  | @ -2066,89 +2066,96 @@ class textline_detector: | ||||||
|         gc.collect() |         gc.collect() | ||||||
|         t2=time.time() |         t2=time.time() | ||||||
|          |          | ||||||
|          |  | ||||||
|         try: |         try: | ||||||
|             # extract text regions and corresponding contours and surrounding box |             try: | ||||||
|             text_regions=self.extract_text_regions(image_page) |                 # extract text regions and corresponding contours and surrounding box | ||||||
|  |                 text_regions=self.extract_text_regions(image_page) | ||||||
|                  |                  | ||||||
|             text_regions = cv2.erode(text_regions, self.kernel, iterations=3) |                 text_regions = cv2.erode(text_regions, self.kernel, iterations=3) | ||||||
|             text_regions = cv2.dilate(text_regions, self.kernel, iterations=4) |                 text_regions = cv2.dilate(text_regions, self.kernel, iterations=4) | ||||||
|                  |                  | ||||||
|             #plt.imshow(text_regions[:,:,0]) |                 #plt.imshow(text_regions[:,:,0]) | ||||||
|             #plt.show() |                 #plt.show() | ||||||
| 
 | 
 | ||||||
|             contours=self.get_text_region_contours_and_boxes(text_regions) |                 contours=self.get_text_region_contours_and_boxes(text_regions) | ||||||
|              |              | ||||||
|              |              | ||||||
|              |              | ||||||
|             ##########   |                 ##########   | ||||||
|  |                 K.clear_session() | ||||||
|  |                 gc.collect() | ||||||
|  |              | ||||||
|  |              | ||||||
|  |             except: | ||||||
|  |                 text_regions=None | ||||||
|  |                 contours=[] | ||||||
|  |                  | ||||||
|  |                  | ||||||
|  |             t3=time.time() | ||||||
|  | 
 | ||||||
|  |              | ||||||
|  |             if len(contours)>0: | ||||||
|  |                  | ||||||
|  | 
 | ||||||
|  |                  | ||||||
|  |                 # extracting textlines using segmentation | ||||||
|  |                 textline_mask_tot=self.textline_contours(image_page) | ||||||
|  |                 ##########   | ||||||
|  |                 K.clear_session() | ||||||
|  |                 gc.collect() | ||||||
|  |                  | ||||||
|  |                 t4=time.time() | ||||||
|  |                  | ||||||
|  |                  | ||||||
|  |                 # calculate the slope for deskewing for each box of text region. | ||||||
|  |                 contours=self.get_slopes_and_deskew(contours,textline_mask_tot) | ||||||
|  |                  | ||||||
|  |                 gc.collect() | ||||||
|  |                 t5=time.time() | ||||||
|  |                  | ||||||
|  |                  | ||||||
|  |                 # get orders of each textregion. This method by now only works for one column documents.  | ||||||
|  |                 indexes_sorted, matrix_of_orders=self.order_of_regions(textline_mask_tot,contours) | ||||||
|  |                 order_of_texts, id_of_texts=self.order_and_id_of_texts(contours ,matrix_of_orders ,indexes_sorted ) | ||||||
|  |                  | ||||||
|  |                  | ||||||
|  |                 ##########   | ||||||
|  |                 gc.collect() | ||||||
|  |                 t6=time.time() | ||||||
|  |                  | ||||||
|  |                  | ||||||
|  |                 self.get_all_image_patches_coordination(image_page) | ||||||
|  |                  | ||||||
|  |                 ##########  | ||||||
|  |                 ##########   | ||||||
|  |                 gc.collect() | ||||||
|  |                  | ||||||
|  |                 t7=time.time() | ||||||
|  | 
 | ||||||
|  |             else: | ||||||
|  |                 contours=[] | ||||||
|  |                 order_of_texts=None | ||||||
|  |                 id_of_texts=None | ||||||
|  |             self.write_into_page_xml(contours,page_coord,self.dir_out , order_of_texts , id_of_texts) | ||||||
|  | 
 | ||||||
|  |             # Destroy the current Keras session/graph to free memory | ||||||
|             K.clear_session() |             K.clear_session() | ||||||
|             gc.collect() |  | ||||||
|          |  | ||||||
|              |              | ||||||
|  |             print( "time total = "+"{0:.2f}".format(time.time()-t1) ) | ||||||
|  |             print( "time needed for page extraction = "+"{0:.2f}".format(t2-t1) ) | ||||||
|  |             print( "time needed for text region extraction and get contours = "+"{0:.2f}".format(t3-t2) ) | ||||||
|  |             if len(contours)>0: | ||||||
|  |                 print( "time needed for textlines = "+"{0:.2f}".format(t4-t3) ) | ||||||
|  |                 print( "time needed to get slopes of regions (deskewing) = "+"{0:.2f}".format(t5-t4) ) | ||||||
|  |                 print( "time needed to get order of regions = "+"{0:.2f}".format(t6-t5) ) | ||||||
|  |                 print( "time needed to implement deskewing = "+"{0:.2f}".format(t7-t6) ) | ||||||
|         except: |         except: | ||||||
|             text_regions=None |  | ||||||
|             contours=[] |  | ||||||
|              |  | ||||||
|              |  | ||||||
|         t3=time.time() |  | ||||||
| 
 |  | ||||||
|          |  | ||||||
|         if len(contours)>0: |  | ||||||
|              |  | ||||||
| 
 |  | ||||||
|              |  | ||||||
|             # extracting textlines using segmentation |  | ||||||
|             textline_mask_tot=self.textline_contours(image_page) |  | ||||||
|             ##########   |  | ||||||
|             K.clear_session() |  | ||||||
|             gc.collect() |  | ||||||
|              |  | ||||||
|             t4=time.time() |  | ||||||
|              |  | ||||||
|              |  | ||||||
|             # calculate the slope for deskewing for each box of text region. |  | ||||||
|             contours=self.get_slopes_and_deskew(contours,textline_mask_tot) |  | ||||||
|              |  | ||||||
|             gc.collect() |  | ||||||
|             t5=time.time() |  | ||||||
|              |  | ||||||
|              |  | ||||||
|             # get orders of each textregion. This method by now only works for one column documents.  |  | ||||||
|             indexes_sorted, matrix_of_orders=self.order_of_regions(textline_mask_tot,contours) |  | ||||||
|             order_of_texts, id_of_texts=self.order_and_id_of_texts(contours ,matrix_of_orders ,indexes_sorted ) |  | ||||||
|              |  | ||||||
|              |  | ||||||
|             ##########   |  | ||||||
|             gc.collect() |  | ||||||
|             t6=time.time() |  | ||||||
|              |  | ||||||
|              |  | ||||||
|             self.get_all_image_patches_coordination(image_page) |  | ||||||
|              |  | ||||||
|             ##########  |  | ||||||
|             ##########   |  | ||||||
|             gc.collect() |  | ||||||
|              |  | ||||||
|             t7=time.time() |  | ||||||
| 
 |  | ||||||
|         else: |  | ||||||
|             contours=[] |             contours=[] | ||||||
|             order_of_texts=None |             order_of_texts=None | ||||||
|             id_of_texts=None |             id_of_texts=None | ||||||
|         self.write_into_page_xml(contours,page_coord,self.dir_out , order_of_texts , id_of_texts) |             self.write_into_page_xml(contours,page_coord,self.dir_out , order_of_texts , id_of_texts) | ||||||
|  |             print( "time total = "+"{0:.2f}".format(time.time()-t1) ) | ||||||
|          |          | ||||||
|         # Destroy the current Keras session/graph to free memory |  | ||||||
|         K.clear_session() |  | ||||||
|          |  | ||||||
|         print( "time total = "+"{0:.2f}".format(time.time()-t1) ) |  | ||||||
|         print( "time needed for page extraction = "+"{0:.2f}".format(t2-t1) ) |  | ||||||
|         print( "time needed for text region extraction and get contours = "+"{0:.2f}".format(t3-t2) ) |  | ||||||
|         if len(contours)>0: |  | ||||||
|             print( "time needed for textlines = "+"{0:.2f}".format(t4-t3) ) |  | ||||||
|             print( "time needed to get slopes of regions (deskewing) = "+"{0:.2f}".format(t5-t4) ) |  | ||||||
|             print( "time needed to get order of regions = "+"{0:.2f}".format(t6-t5) ) |  | ||||||
|             print( "time needed to implement deskewing = "+"{0:.2f}".format(t7-t6) ) |  | ||||||
| 
 | 
 | ||||||
|          |          | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue