mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-10 20:59:54 +02:00
extracting images only - avoid artifacts with heuristics
This commit is contained in:
parent
9170a9f21c
commit
8e2cdad1be
2 changed files with 42 additions and 6 deletions
|
@ -1731,11 +1731,14 @@ class Eynollah:
|
||||||
polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]]) )
|
polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]]) )
|
||||||
"""
|
"""
|
||||||
x, y, w, h = cv2.boundingRect(ploy_img_ind)
|
x, y, w, h = cv2.boundingRect(ploy_img_ind)
|
||||||
box = [x, y, w, h]
|
if h < 150 or w < 150:
|
||||||
_, page_coord_img = crop_image_inside_box(box, text_regions_p_true)
|
pass
|
||||||
#cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]]))
|
else:
|
||||||
|
box = [x, y, w, h]
|
||||||
polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]]) )
|
_, page_coord_img = crop_image_inside_box(box, text_regions_p_true)
|
||||||
|
#cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]]))
|
||||||
|
|
||||||
|
polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]]) )
|
||||||
|
|
||||||
return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page
|
return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page
|
||||||
def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier):
|
def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier):
|
||||||
|
@ -3011,7 +3014,7 @@ class Eynollah:
|
||||||
pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, [], [], [], [], [], cont_page, [], [])
|
pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, [], [], [], [], [], cont_page, [], [])
|
||||||
|
|
||||||
if self.plotter:
|
if self.plotter:
|
||||||
self.plotter.write_images_into_directory(polygons_of_images, img_res)
|
self.plotter.write_images_into_directory(polygons_of_images, image_page)
|
||||||
#plt.imshow(text_regions_p_1)
|
#plt.imshow(text_regions_p_1)
|
||||||
#plt.show()
|
#plt.show()
|
||||||
|
|
||||||
|
|
33
run_image_extraction_over_ppn_lists.py
Normal file
33
run_image_extraction_over_ppn_lists.py
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
dir_ppn = '/home/vahid/Documents/eynollah/ppn_list.txt'
|
||||||
|
|
||||||
|
|
||||||
|
with open(dir_ppn) as f:
|
||||||
|
ppn_list = f.readlines()
|
||||||
|
|
||||||
|
|
||||||
|
ppn_list = [ind.split('\n')[0] for ind in ppn_list]
|
||||||
|
|
||||||
|
url_main = 'https://content.staatsbibliothek-berlin.de/dc/download/zip?ppn=PPN'
|
||||||
|
|
||||||
|
out_result = './new_results_ppns2'
|
||||||
|
|
||||||
|
|
||||||
|
for ppn_ind in ppn_list:
|
||||||
|
url = url_main + ppn_ind
|
||||||
|
#curl -o ./ppn.zip "https://content.staatsbibliothek-berlin.de/dc/download/zip?ppn=PPN1762638355"
|
||||||
|
os.system("curl -o "+"./PPN_"+ppn_ind+".zip"+" "+url)
|
||||||
|
os.system("unzip "+"PPN_"+ppn_ind+".zip"+ " -d "+"PPN_"+ppn_ind)
|
||||||
|
os.system("rm -rf "+"PPN_"+ppn_ind+"/*.txt")
|
||||||
|
|
||||||
|
os.system("mkdir "+out_result+'/'+"PPN_"+ppn_ind+"_out")
|
||||||
|
os.system("mkdir "+out_result+'/'+"PPN_"+ppn_ind+"_out_images")
|
||||||
|
command_eynollah = "eynollah -m /home/vahid/Downloads/models_eynollah_renamed_savedmodel -di "+"PPN_"+ppn_ind+" "+"-o "+out_result+'/'+"PPN_"+ppn_ind+"_out "+"-eoi "+"-ep -si "+out_result+'/'+"PPN_"+ppn_ind+"_out_images"
|
||||||
|
os.system(command_eynollah)
|
||||||
|
|
||||||
|
os.system("rm -rf "+"PPN_"+ppn_ind+".zip")
|
||||||
|
os.system("rm -rf "+"PPN_"+ppn_ind)
|
||||||
|
#sys.exit()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue