From a22df11ebb564631611f4609048b31e67eb0541f Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 14 Apr 2025 00:42:08 +0200 Subject: [PATCH 01/10] Restoring the contour in the original image caused an error due to an empty tuple. This issue has been resolved, and as expected, the confidence score for this contour is set to zero --- src/eynollah/utils/contour.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index a81ccb4..0e84153 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -230,7 +230,6 @@ def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first): def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first, confidence_matrix): img_copy = np.zeros(img.shape) img_copy = cv2.fillPoly(img_copy, pts=[contour_par], color=(1, 1, 1)) - confidence_matrix_mapped_with_contour = confidence_matrix * img_copy[:,:,0] confidence_contour = np.sum(confidence_matrix_mapped_with_contour) / float(np.sum(img_copy[:,:,0])) @@ -239,9 +238,13 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first ret, thresh = cv2.threshold(imgray, 0, 255, 0) cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) - cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) - # print(np.shape(cont_int[0])) + if len(cont_int)==0: + cont_int = [] + cont_int.append(contour_par) + confidence_contour = 0 + else: + cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) + cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) return cont_int[0], index_r_con, confidence_contour def get_textregion_contours_in_org_image_light(cnts, img, slope_first, confidence_matrix, map=map): From 41318f0404722c3980db6f9174871c2e222258d7 Mon Sep 17 00:00:00 2001 From: kba Date: Tue, 15 Apr 2025 11:14:26 +0200 Subject: [PATCH 02/10] :memo: changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f7ce6bb..ad86fe5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +Fixed: + + * restoring the contour in the original image caused an error due to an empty tuple + ## [0.4.0] - 2025-04-07 Fixed: From 30ba23464193b61541e4ba7784974b4d5c4ec33d Mon Sep 17 00:00:00 2001 From: kba Date: Wed, 16 Apr 2025 19:27:17 +0200 Subject: [PATCH 03/10] CI: pypi --- .github/workflows/pypi.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 .github/workflows/pypi.yml diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml new file mode 100644 index 0000000..bb2344e --- /dev/null +++ b/.github/workflows/pypi.yml @@ -0,0 +1,24 @@ +name: PyPI CD + +on: + release: + types: [published] + workflow_dispatch: + +jobs: + pypi-publish: + name: upload release to PyPI + runs-on: ubuntu-latest + permissions: + # IMPORTANT: this permission is mandatory for Trusted Publishing + id-token: write + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + - name: Build package + run: make build + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + verbose: true From 77dae129d50783b225eb3f72e32d38adaa8e0610 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 22 Apr 2025 13:22:28 +0200 Subject: [PATCH 04/10] CI: Use most recent actions/setup-python@v5 --- .github/workflows/pypi.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index bb2344e..248f4ef 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -15,7 +15,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 - name: Build package run: make build - name: Publish package distributions to PyPI From 208bde706f6a998af7811372ca80be82d3af95cb Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 30 Apr 2025 13:55:09 +0200 Subject: [PATCH 05/10] resolving issue #158 --- src/eynollah/utils/separate_lines.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index 3499c29..6602574 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -214,9 +214,13 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): textline_con_fil=filter_contours_area_of_image(img_patch, textline_con, hierarchy, max_area=1, min_area=0.0008) - y_diff_mean=np.mean(np.diff(peaks_new_tot))#self.find_contours_mean_y_diff(textline_con_fil) - sigma_gaus=int( y_diff_mean * (7./40.0) ) - #print(sigma_gaus,'sigma_gaus') + + if len(np.diff(peaks_new_tot))>0: + y_diff_mean=np.mean(np.diff(peaks_new_tot))#self.find_contours_mean_y_diff(textline_con_fil) + sigma_gaus=int( y_diff_mean * (7./40.0) ) + else: + sigma_gaus=12 + except: sigma_gaus=12 if sigma_gaus<3: @@ -1616,6 +1620,7 @@ def do_work_of_slopes_new( textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.00008) + y_diff_mean = find_contours_mean_y_diff(textline_con_fil) if len(textline_con_fil) > 1 else np.NaN if np.isnan(y_diff_mean): slope_for_all = MAX_SLOPE @@ -1641,13 +1646,6 @@ def do_work_of_slopes_new( all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w].copy() mask_only_con_region = mask_only_con_region[y: y + h, x: x + w] - ##plt.imshow(textline_mask_tot_ea) - ##plt.show() - ##plt.imshow(all_text_region_raw) - ##plt.show() - ##plt.imshow(mask_only_con_region) - ##plt.show() - all_text_region_raw[mask_only_con_region == 0] = 0 cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text) From 4cb4414740a89741c6ff25a33932ffc16ce201f8 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 30 Apr 2025 16:01:52 +0200 Subject: [PATCH 06/10] Resolve remaining issue with #158 and resolving #124 --- src/eynollah/utils/separate_lines.py | 263 ++++++++++----------------- 1 file changed, 95 insertions(+), 168 deletions(-) diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index 6602574..0322579 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -102,14 +102,15 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): textline_con_fil = filter_contours_area_of_image(img_patch, textline_con, hierarchy, max_area=1, min_area=0.0008) - y_diff_mean = np.mean(np.diff(peaks_new_tot)) # self.find_contours_mean_y_diff(textline_con_fil) - sigma_gaus = int(y_diff_mean * (7.0 / 40.0)) - # print(sigma_gaus,'sigma_gaus') + if len(np.diff(peaks_new_tot))>1: + y_diff_mean = np.mean(np.diff(peaks_new_tot)) # self.find_contours_mean_y_diff(textline_con_fil) + sigma_gaus = int(y_diff_mean * (7.0 / 40.0)) + else: + sigma_gaus = 12 except: sigma_gaus = 12 if sigma_gaus < 3: sigma_gaus = 3 - # print(sigma_gaus,'sigma') y_padded_smoothed = gaussian_filter1d(y_padded, sigma_gaus) y_padded_up_to_down = -y_padded + np.max(y_padded) @@ -137,7 +138,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): M = cv2.getRotationMatrix2D(center, -thetha, 1.0) x_d = M[0, 2] y_d = M[1, 2] - thetha = thetha / 180. * np.pi rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]]) contour_text_interest_copy = contour_text_interest.copy() @@ -162,77 +162,73 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): x = np.array(range(len(y))) peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) - if 1>0: - try: - y_padded_smoothed_e= gaussian_filter1d(y_padded, 2) - y_padded_up_to_down_e=-y_padded+np.max(y_padded) - y_padded_up_to_down_padded_e=np.zeros(len(y_padded_up_to_down_e)+40) - y_padded_up_to_down_padded_e[20:len(y_padded_up_to_down_e)+20]=y_padded_up_to_down_e - y_padded_up_to_down_padded_e= gaussian_filter1d(y_padded_up_to_down_padded_e, 2) - + + try: + y_padded_smoothed_e= gaussian_filter1d(y_padded, 2) + y_padded_up_to_down_e=-y_padded+np.max(y_padded) + y_padded_up_to_down_padded_e=np.zeros(len(y_padded_up_to_down_e)+40) + y_padded_up_to_down_padded_e[20:len(y_padded_up_to_down_e)+20]=y_padded_up_to_down_e + y_padded_up_to_down_padded_e= gaussian_filter1d(y_padded_up_to_down_padded_e, 2) + + peaks_e, _ = find_peaks(y_padded_smoothed_e, height=0) + peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) + neg_peaks_max=np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) - peaks_e, _ = find_peaks(y_padded_smoothed_e, height=0) - peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0) - neg_peaks_max=np.max(y_padded_up_to_down_padded_e[peaks_neg_e]) + arg_neg_must_be_deleted= np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3] + diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted) + + arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted))) + arg_diff_cluster=arg_diff[diff_arg_neg_must_be_deleted>1] + peaks_new=peaks_e[:] + peaks_neg_new=peaks_neg_e[:] - arg_neg_must_be_deleted= np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3] - diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted) - - arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted))) - arg_diff_cluster=arg_diff[diff_arg_neg_must_be_deleted>1] + clusters_to_be_deleted=[] + if len(arg_diff_cluster)>0: + clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0]+1]) + for i in range(len(arg_diff_cluster)-1): + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1: + arg_diff_cluster[i+1]+1]) + clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster)-1]+1:]) + if len(clusters_to_be_deleted)>0: + peaks_new_extra=[] + for m in range(len(clusters_to_be_deleted)): + min_cluster=np.min(peaks_e[clusters_to_be_deleted[m]]) + max_cluster=np.max(peaks_e[clusters_to_be_deleted[m]]) + peaks_new_extra.append( int( (min_cluster+max_cluster)/2.0) ) + for m1 in range(len(clusters_to_be_deleted[m])): + peaks_new=peaks_new[peaks_new!=peaks_e[clusters_to_be_deleted[m][m1]-1]] + peaks_new=peaks_new[peaks_new!=peaks_e[clusters_to_be_deleted[m][m1]]] + peaks_neg_new=peaks_neg_new[peaks_neg_new!=peaks_neg_e[clusters_to_be_deleted[m][m1]]] + peaks_new_tot=[] + for i1 in peaks_new: + peaks_new_tot.append(i1) + for i1 in peaks_new_extra: + peaks_new_tot.append(i1) + peaks_new_tot=np.sort(peaks_new_tot) + else: + peaks_new_tot=peaks_e[:] - peaks_new=peaks_e[:] - peaks_neg_new=peaks_neg_e[:] + textline_con,hierarchy=return_contours_of_image(img_patch) + textline_con_fil=filter_contours_area_of_image(img_patch, + textline_con, hierarchy, + max_area=1, min_area=0.0008) - clusters_to_be_deleted=[] - if len(arg_diff_cluster)>0: - clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0]+1]) - for i in range(len(arg_diff_cluster)-1): - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1: - arg_diff_cluster[i+1]+1]) - clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster)-1]+1:]) - if len(clusters_to_be_deleted)>0: - peaks_new_extra=[] - for m in range(len(clusters_to_be_deleted)): - min_cluster=np.min(peaks_e[clusters_to_be_deleted[m]]) - max_cluster=np.max(peaks_e[clusters_to_be_deleted[m]]) - peaks_new_extra.append( int( (min_cluster+max_cluster)/2.0) ) - for m1 in range(len(clusters_to_be_deleted[m])): - peaks_new=peaks_new[peaks_new!=peaks_e[clusters_to_be_deleted[m][m1]-1]] - peaks_new=peaks_new[peaks_new!=peaks_e[clusters_to_be_deleted[m][m1]]] - peaks_neg_new=peaks_neg_new[peaks_neg_new!=peaks_neg_e[clusters_to_be_deleted[m][m1]]] - peaks_new_tot=[] - for i1 in peaks_new: - peaks_new_tot.append(i1) - for i1 in peaks_new_extra: - peaks_new_tot.append(i1) - peaks_new_tot=np.sort(peaks_new_tot) - else: - peaks_new_tot=peaks_e[:] - - textline_con,hierarchy=return_contours_of_image(img_patch) - textline_con_fil=filter_contours_area_of_image(img_patch, - textline_con, hierarchy, - max_area=1, min_area=0.0008) - - if len(np.diff(peaks_new_tot))>0: - y_diff_mean=np.mean(np.diff(peaks_new_tot))#self.find_contours_mean_y_diff(textline_con_fil) - sigma_gaus=int( y_diff_mean * (7./40.0) ) - else: - sigma_gaus=12 - - except: + if len(np.diff(peaks_new_tot))>0: + y_diff_mean=np.mean(np.diff(peaks_new_tot))#self.find_contours_mean_y_diff(textline_con_fil) + sigma_gaus=int( y_diff_mean * (7./40.0) ) + else: sigma_gaus=12 - if sigma_gaus<3: - sigma_gaus=3 - #print(sigma_gaus,'sigma') + + except: + sigma_gaus=12 + if sigma_gaus<3: + sigma_gaus=3 y_padded_smoothed= gaussian_filter1d(y_padded, sigma_gaus) y_padded_up_to_down=-y_padded+np.max(y_padded) y_padded_up_to_down_padded=np.zeros(len(y_padded_up_to_down)+40) y_padded_up_to_down_padded[20:len(y_padded_up_to_down)+20]=y_padded_up_to_down y_padded_up_to_down_padded= gaussian_filter1d(y_padded_up_to_down_padded, sigma_gaus) - peaks, _ = find_peaks(y_padded_smoothed, height=0) peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0) @@ -243,6 +239,7 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted))) arg_diff_cluster=arg_diff[diff_arg_neg_must_be_deleted>1] + except: arg_neg_must_be_deleted=[] arg_diff_cluster=[] @@ -250,7 +247,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): peaks_new=peaks[:] peaks_neg_new=peaks_neg[:] clusters_to_be_deleted=[] - if len(arg_diff_cluster)>=2 and len(arg_diff_cluster)>0: clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0]+1]) for i in range(len(arg_diff_cluster)-1): @@ -279,21 +275,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): peaks_new_tot.append(i1) peaks_new_tot=np.sort(peaks_new_tot) - ##plt.plot(y_padded_up_to_down_padded) - ##plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*') - ##plt.show() - - ##plt.plot(y_padded_up_to_down_padded) - ##plt.plot(peaks_neg_new,y_padded_up_to_down_padded[peaks_neg_new],'*') - ##plt.show() - - ##plt.plot(y_padded_smoothed) - ##plt.plot(peaks,y_padded_smoothed[peaks],'*') - ##plt.show() - - ##plt.plot(y_padded_smoothed) - ##plt.plot(peaks_new_tot,y_padded_smoothed[peaks_new_tot],'*') - ##plt.show() peaks=peaks_new_tot[:] peaks_neg=peaks_neg_new[:] else: @@ -302,11 +283,13 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): peaks_neg=peaks_neg_new[:] except: pass - - mean_value_of_peaks=np.mean(y_padded_smoothed[peaks]) - std_value_of_peaks=np.std(y_padded_smoothed[peaks]) + if len(y_padded_smoothed[peaks]) > 1: + mean_value_of_peaks=np.mean(y_padded_smoothed[peaks]) + std_value_of_peaks=np.std(y_padded_smoothed[peaks]) + else: + mean_value_of_peaks = np.nan + std_value_of_peaks = np.nan peaks_values=y_padded_smoothed[peaks] - peaks_neg = peaks_neg - 20 - 20 peaks = peaks - 20 for jj in range(len(peaks_neg)): @@ -349,7 +332,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): point_down_narrow = peaks[jj] + first_nonzero + int( 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) - if point_down_narrow >= img_patch.shape[0]: point_down_narrow = img_patch.shape[0] - 2 @@ -605,7 +587,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): [int(x_max), int(point_up)], [int(x_max), int(point_down)], [int(x_min), int(point_down)]])) - return peaks, textline_boxes_rot def separate_lines_vertical(img_patch, contour_text_interest, thetha): @@ -637,7 +618,7 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): peaks_neg_new = peaks_neg[:] clusters_to_be_deleted = [] - if len(arg_diff_cluster) >= 2 and len(arg_diff_cluster) > 0: + if len(arg_neg_must_be_deleted) >= 2 and len(arg_diff_cluster) >= 2: clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : @@ -645,7 +626,7 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) elif len(arg_neg_must_be_deleted) >= 2 and len(arg_diff_cluster) == 0: clusters_to_be_deleted.append(arg_neg_must_be_deleted[:]) - if len(arg_neg_must_be_deleted) == 1: + else: clusters_to_be_deleted.append(arg_neg_must_be_deleted) if len(clusters_to_be_deleted) > 0: peaks_new_extra = [] @@ -671,9 +652,14 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): peaks_new_tot = peaks[:] peaks = peaks_new_tot[:] peaks_neg = peaks_neg_new[:] - - mean_value_of_peaks = np.mean(y_padded_smoothed[peaks]) - std_value_of_peaks = np.std(y_padded_smoothed[peaks]) + + if len(y_padded_smoothed[peaks])>1: + mean_value_of_peaks = np.mean(y_padded_smoothed[peaks]) + std_value_of_peaks = np.std(y_padded_smoothed[peaks]) + else: + mean_value_of_peaks = np.nan + std_value_of_peaks = np.nan + peaks_values = y_padded_smoothed[peaks] peaks_neg = peaks_neg - 20 - 20 @@ -691,7 +677,6 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): textline_boxes_rot = [] if len(peaks_neg) == len(peaks) + 1 and len(peaks) >= 3: - # print('11') for jj in range(len(peaks)): if jj == (len(peaks) - 1): @@ -998,15 +983,16 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): textline_con_fil = filter_contours_area_of_image(img_patch, textline_con, hierarchy, max_area=1, min_area=0.0008) - y_diff_mean = np.mean(np.diff(peaks_new_tot)) # self.find_contours_mean_y_diff(textline_con_fil) + if len(np.diff(peaks_new_tot)): + y_diff_mean = np.mean(np.diff(peaks_new_tot)) # self.find_contours_mean_y_diff(textline_con_fil) + sigma_gaus = int(y_diff_mean * (7.0 / 40.0)) + else: + sigma_gaus = 12 - sigma_gaus = int(y_diff_mean * (7.0 / 40.0)) - # print(sigma_gaus,'sigma_gaus') except: sigma_gaus = 12 if sigma_gaus < 3: sigma_gaus = 3 - # print(sigma_gaus,'sigma') y_padded_smoothed = gaussian_filter1d(y_padded, sigma_gaus) y_padded_up_to_down = -y_padded + np.max(y_padded) @@ -1030,7 +1016,7 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): arg_diff_cluster = arg_diff[diff_arg_neg_must_be_deleted > 1] clusters_to_be_deleted = [] - if len(arg_diff_cluster) >= 2 and len(arg_diff_cluster) > 0: + if len(arg_neg_must_be_deleted) >= 2 and len(arg_diff_cluster) >= 2: clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1]) for i in range(len(arg_diff_cluster) - 1): clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : @@ -1038,7 +1024,7 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :]) elif len(arg_neg_must_be_deleted) >= 2 and len(arg_diff_cluster) == 0: clusters_to_be_deleted.append(arg_neg_must_be_deleted[:]) - if len(arg_neg_must_be_deleted) == 1: + else: clusters_to_be_deleted.append(arg_neg_must_be_deleted) if len(clusters_to_be_deleted) > 0: peaks_new_extra = [] @@ -1081,9 +1067,14 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): peaks_new_tot = peaks[:] peaks = peaks_new_tot[:] peaks_neg = peaks_neg_new[:] - - mean_value_of_peaks = np.mean(y_padded_smoothed[peaks]) - std_value_of_peaks = np.std(y_padded_smoothed[peaks]) + + if len(y_padded_smoothed[peaks]) > 1: + mean_value_of_peaks = np.mean(y_padded_smoothed[peaks]) + std_value_of_peaks = np.std(y_padded_smoothed[peaks]) + else: + mean_value_of_peaks = np.nan + std_value_of_peaks = np.nan + peaks_values = y_padded_smoothed[peaks] ###peaks_neg = peaks_neg - 20 - 20 @@ -1093,10 +1084,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha): if len(peaks_neg_true) > 0: peaks_neg_true = np.array(peaks_neg_true) - peaks_neg_true = peaks_neg_true - 20 - 20 - # print(peaks_neg_true) for i in range(len(peaks_neg_true)): img_patch[peaks_neg_true[i] - 6 : peaks_neg_true[i] + 6, :] = 0 else: @@ -1181,13 +1170,11 @@ def separate_lines_new_inside_tiles(img_path, thetha): if diff_peaks[i] <= cut_off: forest.append(peaks_neg[i + 1]) if diff_peaks[i] > cut_off: - # print(forest[np.argmin(z[forest]) ] ) if not np.isnan(forest[np.argmin(z[forest])]): peaks_neg_true.append(forest[np.argmin(z[forest])]) forest = [] forest.append(peaks_neg[i + 1]) if i == (len(peaks_neg) - 1): - # print(print(forest[np.argmin(z[forest]) ] )) if not np.isnan(forest[np.argmin(z[forest])]): peaks_neg_true.append(forest[np.argmin(z[forest])]) @@ -1204,17 +1191,14 @@ def separate_lines_new_inside_tiles(img_path, thetha): if diff_peaks_pos[i] <= cut_off: forest.append(peaks[i + 1]) if diff_peaks_pos[i] > cut_off: - # print(forest[np.argmin(z[forest]) ] ) if not np.isnan(forest[np.argmax(z[forest])]): peaks_pos_true.append(forest[np.argmax(z[forest])]) forest = [] forest.append(peaks[i + 1]) if i == (len(peaks) - 1): - # print(print(forest[np.argmin(z[forest]) ] )) if not np.isnan(forest[np.argmax(z[forest])]): peaks_pos_true.append(forest[np.argmax(z[forest])]) - # print(len(peaks_neg_true) ,len(peaks_pos_true) ,'lensss') if len(peaks_neg_true) > 0: peaks_neg_true = np.array(peaks_neg_true) @@ -1240,7 +1224,6 @@ def separate_lines_new_inside_tiles(img_path, thetha): """ peaks_neg_true = peaks_neg_true - 20 - 20 - # print(peaks_neg_true) for i in range(len(peaks_neg_true)): img_path[peaks_neg_true[i] - 6 : peaks_neg_true[i] + 6, :] = 0 @@ -1282,7 +1265,6 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i contours_imgs, hierarchy, max_area=max_area, min_area=min_area) cont_final = [] - ###print(add_boxes_coor_into_textlines,'ikki') for i in range(len(contours_imgs)): img_contour = np.zeros((cnts_images.shape[0], cnts_images.shape[1], 3)) img_contour = cv2.fillPoly(img_contour, pts=[contours_imgs[i]], color=(255, 255, 255)) @@ -1297,12 +1279,10 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i ##0] ##contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1] ##if add_boxes_coor_into_textlines: - ##print(np.shape(contours_text_rot[0]),'sjppo') ##contours_text_rot[0][:, 0, 0]=contours_text_rot[0][:, 0, 0] + box_ind[0] ##contours_text_rot[0][:, 0, 1]=contours_text_rot[0][:, 0, 1] + box_ind[1] cont_final.append(contours_text_rot[0]) - ##print(cont_final,'nadizzzz') return None, cont_final def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False): @@ -1313,20 +1293,7 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_CLOSE, kernel) textline_mask = cv2.erode(textline_mask, kernel, iterations=2) # textline_mask = cv2.erode(textline_mask, kernel, iterations=1) - - # print(textline_mask.shape[0]/float(textline_mask.shape[1]),'miz') try: - # if np.abs(slope)>.5 and textline_mask.shape[0]/float(textline_mask.shape[1])>3: - # plt.imshow(textline_mask) - # plt.show() - - # if abs(slope)>1: - # x_help=30 - # y_help=2 - # else: - # x_help=2 - # y_help=2 - x_help = 30 y_help = 2 @@ -1350,28 +1317,12 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest img_contour = np.zeros((box_ind[3], box_ind[2], 3)) img_contour = cv2.fillPoly(img_contour, pts=[contour_text_copy], color=(255, 255, 255)) - # if np.abs(slope)>.5 and textline_mask.shape[0]/float(textline_mask.shape[1])>3: - # plt.imshow(img_contour) - # plt.show() - img_contour_help = np.zeros((img_contour.shape[0] + int(2 * y_help), img_contour.shape[1] + int(2 * x_help), 3)) img_contour_help[y_help : y_help + img_contour.shape[0], x_help : x_help + img_contour.shape[1], :] = np.copy(img_contour[:, :, :]) img_contour_rot = rotate_image(img_contour_help, slope) - # plt.imshow(img_contour_rot_help) - # plt.show() - - # plt.imshow(dst_help) - # plt.show() - - # if np.abs(slope)>.5 and textline_mask.shape[0]/float(textline_mask.shape[1])>3: - # plt.imshow(img_contour_rot_help) - # plt.show() - - # plt.imshow(dst_help) - # plt.show() img_contour_rot = img_contour_rot.astype(np.uint8) # dst_help = dst_help.astype(np.uint8) @@ -1382,9 +1333,7 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest len_con_text_rot = [len(contours_text_rot[ib]) for ib in range(len(contours_text_rot))] ind_big_con = np.argmax(len_con_text_rot) - # print('juzaa') if abs(slope) > 45: - # print(add_boxes_coor_into_textlines,'avval') _, contours_rotated_clean = separate_lines_vertical_cont( textline_mask, contours_text_rot[ind_big_con], box_ind, slope, add_boxes_coor_into_textlines=add_boxes_coor_into_textlines) @@ -1416,7 +1365,6 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl length_x = int(img_path.shape[1] / float(num_patches)) # margin = int(0.04 * length_x) just recently this was changed because it break lines into 2 margin = int(0.04 * length_x) - # print(margin,'margin') # if margin<=4: # margin = int(0.08 * length_x) # margin=0 @@ -1456,11 +1404,9 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl # if abs(slope_region)>70 and abs(slope_xline)<25: # slope_xline=[slope_region][0] slopes_tile_wise.append(slope_xline) - # print(slope_xline,'xlineeee') img_line_rotated = rotate_image(img_xline, slope_xline) img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1 - - # print(slopes_tile_wise,'slopes_tile_wise') + img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:] img_patch_ineterst_revised = np.zeros(img_patch_ineterst.shape) @@ -1502,8 +1448,6 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin : length_x - margin] img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size - # plt.imshow(img_patch_ineterst_revised) - # plt.show() return img_patch_ineterst_revised def do_image_rotation(angle, img, sigma_des, logger=None): @@ -1536,20 +1480,13 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, #img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0] , int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:] img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:] - #print(img_resized.shape,'img_resizedshape') - #plt.imshow(img_resized) - #plt.show() if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]: - #plt.imshow(img_resized) - #plt.show() angles = np.array([-45, 0, 45, 90,]) angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles) angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) elif main_page: - #plt.imshow(img_resized) - #plt.show() angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45]) angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) @@ -1620,7 +1557,6 @@ def do_work_of_slopes_new( textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.00008) - y_diff_mean = find_contours_mean_y_diff(textline_con_fil) if len(textline_con_fil) > 1 else np.NaN if np.isnan(y_diff_mean): slope_for_all = MAX_SLOPE @@ -1637,12 +1573,9 @@ def do_work_of_slopes_new( if slope_for_all == MAX_SLOPE: slope_for_all = slope_deskew slope = slope_for_all - mask_only_con_region = np.zeros(textline_mask_tot_ea.shape) mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contour_par], color=(1, 1, 1)) - # plt.imshow(mask_only_con_region) - # plt.show() all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w].copy() mask_only_con_region = mask_only_con_region[y: y + h, x: x + w] @@ -1706,20 +1639,15 @@ def do_work_of_slopes_new_curved( mask_region_in_patch_region = mask_biggest[y : y + h, x : x + w] textline_biggest_region = mask_biggest * textline_mask_tot_ea - # print(slope_for_all,'slope_for_all') textline_rotated_separated = separate_lines_new2(textline_biggest_region[y: y+h, x: x+w], 0, num_col, slope_for_all, logger=logger, plotter=plotter) - # new line added - ##print(np.shape(textline_rotated_separated),np.shape(mask_biggest)) + textline_rotated_separated[mask_region_in_patch_region[:, :] != 1] = 0 - # till here textline_region_in_image[y : y + h, x : x + w] = textline_rotated_separated - # plt.imshow(textline_region_in_image) - # plt.show() pixel_img = 1 cnt_textlines_in_image = return_contours_of_interested_textline(textline_region_in_image, pixel_img) @@ -1742,7 +1670,6 @@ def do_work_of_slopes_new_curved( logger.error(why) else: textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text, True) - # print(np.shape(textlines_cnt_per_region),'textlines_cnt_per_region') return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope From b227736094e33e2ba8cd6446eb9c0b46c006c10f Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 30 Apr 2025 16:04:34 +0200 Subject: [PATCH 07/10] Fix OCR text cleaning to correctly handle 'U', 'K', and 'N' starting sentence; update text line splitting size --- src/eynollah/eynollah.py | 62 ++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 022cf0a..a94e890 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -259,7 +259,7 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18" - self.model_reading_order_dir = dir_models + "/model_ens_reading_order_machine_based" + self.model_reading_order_dir = dir_models + "/model_mb_ro_aug_3"#"/model_ens_reading_order_machine_based" #"/modelens_12sp_elay_0_3_4__3_6_n" #"/modelens_earlylayout_12spaltige_2_3_5_6_7_8" #"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18" @@ -3320,12 +3320,22 @@ class Eynollah: def do_order_of_regions_with_model(self, contours_only_text_parent, contours_only_text_parent_h, text_regions_p): y_len = text_regions_p.shape[0] x_len = text_regions_p.shape[1] + img_poly = np.zeros((y_len,x_len), dtype='uint8') img_poly[text_regions_p[:,:]==1] = 1 img_poly[text_regions_p[:,:]==2] = 2 img_poly[text_regions_p[:,:]==3] = 4 img_poly[text_regions_p[:,:]==6] = 5 + + + #temp + sep_mask = (img_poly==5)*1 + sep_mask = sep_mask.astype('uint8') + sep_mask = cv2.erode(sep_mask, kernel=KERNEL, iterations=2) + img_poly[img_poly==5] = 0 + img_poly[sep_mask==1] = 5 + # img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') if contours_only_text_parent_h: @@ -3341,9 +3351,13 @@ class Eynollah: if not len(co_text_all): return [], [] - labels_con = np.zeros((y_len, x_len, len(co_text_all)), dtype=bool) + labels_con = np.zeros((int(y_len /6.), int(x_len/6.), len(co_text_all)), dtype=bool) + co_text_all = [(i/6).astype(int) for i in co_text_all] for i in range(len(co_text_all)): img = labels_con[:,:,i].astype(np.uint8) + + #img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST) + cv2.fillPoly(img, pts=[co_text_all[i]], color=(1,)) labels_con[:,:,i] = img @@ -3359,6 +3373,7 @@ class Eynollah: labels_con = resize_image(labels_con.astype(np.uint8), height1, width1).astype(bool) img_header_and_sep = resize_image(img_header_and_sep, height1, width1) img_poly = resize_image(img_poly, height3, width3) + inference_bs = 3 input_1 = np.zeros((inference_bs, height1, width1, 3)) @@ -4575,10 +4590,6 @@ class Eynollah: return pcgts - ## check the ro order - - - #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: @@ -4886,7 +4897,7 @@ class Eynollah_ocr: self.model_ocr.to(self.device) else: - self.model_ocr_dir = dir_models + "/model_step_75000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn" + self.model_ocr_dir = dir_models + "/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn" model_ocr = load_model(self.model_ocr_dir , compile=False) self.prediction_model = tf.keras.models.Model( @@ -4974,7 +4985,7 @@ class Eynollah_ocr: def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image): width = np.shape(textline_image)[1] height = np.shape(textline_image)[0] - common_window = int(0.06*width) + common_window = int(0.22*width) width1 = int ( width/2. - common_window ) width2 = int ( width/2. + common_window ) @@ -4984,13 +4995,17 @@ class Eynollah_ocr: peaks_real, _ = find_peaks(sum_smoothed, height=0) - if len(peaks_real)>70: + if len(peaks_real)>35: - peaks_real = peaks_real[(peaks_realwidth1)] + #peaks_real = peaks_real[(peaks_realwidth1)] + argsort = np.argsort(sum_smoothed[peaks_real])[::-1] + peaks_real_top_six = peaks_real[argsort[:6]] + midpoint = textline_image.shape[1] / 2. + arg_closest = np.argmin(np.abs(peaks_real_top_six - midpoint)) - arg_max = np.argmax(sum_smoothed[peaks_real]) + #arg_max = np.argmax(sum_smoothed[peaks_real]) - peaks_final = peaks_real[arg_max] + peaks_final = peaks_real_top_six[arg_closest]#peaks_real[arg_max] return peaks_final else: @@ -5038,10 +5053,19 @@ class Eynollah_ocr: if width_new == 0: width_new = img.shape[1] + + ##if width_new+32 >= image_width: + ##width_new = width_new - 32 + + ###patch_zero = np.zeros((32, 32, 3))#+255 + ###patch_zero[9:19,8:18,:] = 0 + img = resize_image(img, image_height, width_new) img_fin = np.ones((image_height, image_width, 3))*255 - img_fin[:,:+width_new,:] = img[:,:,:] + ###img_fin[:,:32,:] = patch_zero[:,:,:] + ###img_fin[:,32:32+width_new,:] = img[:,:,:] + img_fin[:,:width_new,:] = img[:,:,:] img_fin = img_fin / 255. return img_fin @@ -5097,7 +5121,7 @@ class Eynollah_ocr: img_crop = img_poly_on_img[y:y+h, x:x+w, :] img_crop[mask_poly==0] = 255 - if h2w_ratio > 0.05: + if h2w_ratio > 0.1: cropped_lines.append(img_crop) cropped_lines_meging_indexing.append(0) else: @@ -5234,7 +5258,7 @@ class Eynollah_ocr: if self.draw_texts_on_image: total_bb_coordinates.append([x,y,w,h]) - h2w_ratio = h/float(w) + w_scaled = w * image_height/float(h) img_poly_on_img = np.copy(img) if self.prediction_with_both_of_rgb_and_bin: @@ -5252,7 +5276,7 @@ class Eynollah_ocr: img_crop_bin[mask_poly==0] = 255 if not self.export_textline_images_and_text: - if h2w_ratio > 0.1: + if w_scaled < 1.5*image_width: img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(0) @@ -5334,11 +5358,11 @@ class Eynollah_ocr: if self.prediction_with_both_of_rgb_and_bin: preds_bin = self.prediction_model.predict(imgs_bin, verbose=0) preds = (preds + preds_bin) / 2. - + pred_texts = self.decode_batch_predictions(preds) for ib in range(imgs.shape[0]): - pred_texts_ib = pred_texts[ib].strip("[UNK]") + pred_texts_ib = pred_texts[ib].replace("[UNK]", "") extracted_texts.append(pred_texts_ib) extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))] @@ -5378,7 +5402,7 @@ class Eynollah_ocr: text_by_textregion = [] for ind in unique_cropped_lines_region_indexer: extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind] - text_by_textregion.append(" ".join(extracted_texts_merged_un)) + text_by_textregion.append("".join(extracted_texts_merged_un)) indexer = 0 indexer_textregion = 0 From e2da7a623987f9c693957512f7902947699389ff Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 30 Apr 2025 16:06:29 +0200 Subject: [PATCH 08/10] Fix model name to return the correct machine-based model name --- src/eynollah/eynollah.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index a94e890..d47016b 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -259,7 +259,7 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18" - self.model_reading_order_dir = dir_models + "/model_mb_ro_aug_3"#"/model_ens_reading_order_machine_based" + self.model_reading_order_dir = dir_models + "/model_ens_reading_order_machine_based" #"/modelens_12sp_elay_0_3_4__3_6_n" #"/modelens_earlylayout_12spaltige_2_3_5_6_7_8" #"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18" From f8b4d29a59098f8a82e90b2790015568841bc53f Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 2 May 2025 00:13:11 +0200 Subject: [PATCH 09/10] docker: prepackage ocrd-all-module-dir.json --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index 4785fc1..4ba498b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,6 +36,8 @@ COPY . . COPY ocrd-tool.json . # prepackage ocrd-tool.json as ocrd-all-tool.json RUN ocrd ocrd-tool ocrd-tool.json dump-tools > $(dirname $(ocrd bashlib filename))/ocrd-all-tool.json +# prepackage ocrd-all-module-dir.json +RUN ocrd ocrd-tool ocrd-tool.json dump-module-dirs > $(dirname $(ocrd bashlib filename))/ocrd-all-module-dir.json # install everything and reduce image size RUN make install EXTRAS=OCR && rm -rf /build/eynollah # smoke test From e9179e1d3458c9261989cf996863881f03a24ebd Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 2 May 2025 00:13:06 +0200 Subject: [PATCH 10/10] docker: use latest core base stage --- Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 5f2bf34..73d4d34 100644 --- a/Makefile +++ b/Makefile @@ -3,8 +3,9 @@ PIP ?= pip3 EXTRAS ?= # DOCKER_BASE_IMAGE = artefakt.dev.sbb.berlin:5000/sbb/ocrd_core:v2.68.0 -DOCKER_BASE_IMAGE = docker.io/ocrd/core-cuda-tf2:v3.3.0 -DOCKER_TAG = ocrd/eynollah +DOCKER_BASE_IMAGE ?= docker.io/ocrd/core-cuda-tf2:latest +DOCKER_TAG ?= ocrd/eynollah +DOCKER ?= docker #SEG_MODEL := https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz #SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz @@ -117,7 +118,7 @@ coverage: # Build docker image docker: - docker build \ + $(DOCKER) build \ --build-arg DOCKER_BASE_IMAGE=$(DOCKER_BASE_IMAGE) \ --build-arg VCS_REF=$$(git rev-parse --short HEAD) \ --build-arg BUILD_DATE=$$(date -u +"%Y-%m-%dT%H:%M:%SZ") \