Merge branch 'main' into eynollah_light

pull/86/head
vahidrezanezhad 2 years ago committed by GitHub
commit 1621532092
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -5,6 +5,7 @@ jobs:
build-python37:
machine:
- image: ubuntu-2004:2023.02.1
steps:
- checkout
- restore_cache:

@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.7', '3.8']
python-version: ['3.7'] # '3.8'
steps:
- uses: actions/checkout@v2

@ -5,6 +5,30 @@ Versioned according to [Semantic Versioning](http://semver.org/).
## Unreleased
## [0.2.0] - 2023-03-24
Changed:
* Convert default model from HDFS to TF SavedModel, #91
Added:
* parmeter `tables` to toggle table detectino, #91
* default model described in ocrd-tool.json, #91
## [0.1.0] - 2023-03-22
Fixed:
* Do not produce spurious `TextEquiv`, #68
* Less spammy logging, #64, #65, #71
Changed:
* Upgrade to tensorflow 2.4.0, #74
* Improved README
* CI: test for python 3.7+, #90
## [0.0.11] - 2022-02-02
Fixed:
@ -72,6 +96,8 @@ Fixed:
Initial release
<!-- link-labels -->
[0.2.0]: ../../compare/v0.2.0...v0.1.0
[0.1.0]: ../../compare/v0.1.0...v0.0.11
[0.0.11]: ../../compare/v0.0.11...v0.0.10
[0.0.10]: ../../compare/v0.0.10...v0.0.9
[0.0.9]: ../../compare/v0.0.9...v0.0.8

@ -38,11 +38,15 @@ cd eynollah; pip install -e .
Alternatively, you can run `make install` or `make install-dev` for editable installation.
### Models
<details>
<summary>click to expand/collapse</summary><br/>
First, this model makes use of up to 9 trained models which are responsible for different operations like size detection, column classification, image enhancement, page extraction, main layout detection, full layout detection and textline detection.That does not mean that all 9 models are always required for every document. Based on the document characteristics and parameters specified, different scenarios can be applied.
Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data.de/eynollah/).
Alternatively, running `make models` will download and extract models to `$(PWD)/models_eynollah`.
* If you set `-ae` (**a**llow image **e**nhancement) parameter to `true`, the tool will first check the ppi (pixel-per-inch) of the image and when it is less than 300, the tool will resize it and only then image enhancement will occur. Image enhancement can also take place without this option, but by setting this option to `true`, the layout xml data (e.g. coordinates) will be based on the resized and enhanced image instead of the original image.
In case you want to train your own model to use with Eynollah, have a look at [sbb_pixelwise_segmentation](https://github.com/qurator-spk/sbb_pixelwise_segmentation).
@ -99,3 +103,4 @@ ocrd-eynollah-segment -I OCR-D-IMG-BIN -O SEG-LINE -P models
```
still uses the original (RGB) image despite any binarization that may have occured in previous OCR-D processing steps

@ -1 +0,0 @@
__import__("pkg_resources").declare_namespace(__name__)

@ -29,6 +29,7 @@ from qurator.eynollah.eynollah import Eynollah
"-m",
help="directory of models",
type=click.Path(exists=True, file_okay=False),
required=True,
)
@click.option(
"--save_images",

@ -393,7 +393,6 @@ class Eynollah:
prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg
prediction_true = prediction_true.astype(int)
return prediction_true
def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_pred):
@ -495,12 +494,11 @@ class Eynollah:
label_p_pred = model_num_classifier.predict(img_in, verbose=0)
else:
label_p_pred = self.model_classifier.predict(img_in, verbose=0)
num_col = np.argmax(label_p_pred[0]) + 1
self.logger.info("Found %s columns (%s)", num_col, label_p_pred)
img_new, _ = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred)
if img_new.shape[1] > img.shape[1]:
@ -535,6 +533,7 @@ class Eynollah:
img = self.imread()
img_bin = None
t1 = time.time()
_, page_coord = self.early_page_for_num_of_column_classification(img_bin)
if not self.dir_in:
model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier)
@ -578,7 +577,6 @@ class Eynollah:
image_res = np.copy(img)
is_image_enhanced = False
self.logger.debug("exit resize_and_enhance_image_with_column_classifier")
return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin
@ -655,10 +653,6 @@ class Eynollah:
except:
self.logger.warning("no GPU device available")
# try:
# model = load_model(model_dir, compile=False)
# except:
# model = load_model(model_dir , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches})
if model_dir.endswith('.h5') and Path(model_dir[:-3]).exists():
# prefer SavedModel over HDF5 format if it exists
model_dir = model_dir[:-3]
@ -672,6 +666,7 @@ class Eynollah:
model = load_model(model_dir , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches})
self.models[model_dir] = model
return model, None
def do_prediction(self, patches, img, model, marginal_of_patch_percent=0.1):
@ -808,6 +803,7 @@ class Eynollah:
label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]))
seg = np.argmax(label_p_pred, axis=3)[0]
seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
prediction_true = resize_image(seg_color, img_h_page, img_w_page)
@ -858,7 +854,8 @@ class Eynollah:
index_y_d = img_h - img_height_model
img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]))
label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]),
verbose=0)
seg = np.argmax(label_p_pred, axis=3)[0]
@ -957,8 +954,6 @@ class Eynollah:
prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color
prediction_true = prediction_true.astype(np.uint8)
##del model
##gc.collect()
return prediction_true
def extract_page(self):
@ -1044,7 +1039,6 @@ class Eynollah:
croped_page, page_coord = crop_image_inside_box(box, img)
return croped_page, page_coord
def extract_text_regions(self, img, patches, cols):
self.logger.debug("enter extract_text_regions")
img_height_h = img.shape[0]
@ -1138,7 +1132,6 @@ class Eynollah:
marginal_of_patch_percent = 0.1
prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent)
prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h)
self.logger.debug("exit extract_text_regions")
return prediction_regions, prediction_regions2
@ -1541,6 +1534,7 @@ class Eynollah:
else:
return prediction_textline[:, :, 0], prediction_textline_longshot_true_size[:, :, 0]
def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_tot, contours_per_process):
self.logger.debug('enter do_work_of_slopes')
slope_biggest = 0
@ -1713,6 +1707,7 @@ class Eynollah:
if not self.dir_in:
model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p2)
img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]))
if self.dir_in:
@ -1762,8 +1757,6 @@ class Eynollah:
prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
if not self.dir_in:
model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens)
ratio_y=1
@ -1781,7 +1774,6 @@ class Eynollah:
mask_lines_only=(prediction_regions_org[:,:]==3)*1
mask_texts_only=(prediction_regions_org[:,:]==1)*1
mask_images_only=(prediction_regions_org[:,:]==2)*1
@ -1799,7 +1791,6 @@ class Eynollah:
text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1))
return text_regions_p_true, erosion_hurts, polygons_lines_xml
except:
@ -1821,7 +1812,6 @@ class Eynollah:
prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
if not self.dir_in:
model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens)
@ -1840,7 +1830,6 @@ class Eynollah:
prediction_regions_org=prediction_regions_org[:,:,0]
#mask_lines_only=(prediction_regions_org[:,:]==3)*1
#img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1))
#prediction_regions_org = self.do_prediction(True, img, model_region)
@ -2369,7 +2358,6 @@ class Eynollah:
img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:]
prediction_ext = self.do_prediction(patches, img_new, model_region)
pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), model_region)
pre_updown = cv2.flip(pre_updown, -1)
@ -2392,7 +2380,6 @@ class Eynollah:
img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:]
prediction_ext = self.do_prediction(patches, img_new, model_region)
pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), model_region)
pre_updown = cv2.flip(pre_updown, -1)
@ -2408,9 +2395,7 @@ class Eynollah:
pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], model_region)
pre2 = self.do_prediction(patches, img[:,img_w_half:,:], model_region)
pre_full = self.do_prediction(patches, img[:,:,:], model_region)
pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), model_region)
pre_updown = cv2.flip(pre_updown, -1)
@ -2432,8 +2417,6 @@ class Eynollah:
prediction_table_erode = cv2.erode(prediction_table[:,:,0], KERNEL, iterations=20)
prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20)
return prediction_table_erode.astype(np.int16)
def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts):
img_g = self.imread(grayscale=True, uint8=True)
@ -2558,6 +2541,7 @@ class Eynollah:
textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline)
if self.textline_light:
textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16)
if self.plotter:
self.plotter.save_plot_of_textlines(textline_mask_tot_ea, image_page)
return textline_mask_tot_ea
@ -2787,7 +2771,6 @@ class Eynollah:
regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4
regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully)
regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier)
if num_col_classifier > 2:
regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0
@ -2836,6 +2819,7 @@ class Eynollah:
"""
self.logger.debug("enter run")
t0_tot = time.time()
if not self.dir_in:
@ -3023,6 +3007,7 @@ class Eynollah:
else:
slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew)
slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew)
else:
scale_param = 1

@ -1,5 +1,5 @@
{
"version": "0.0.11",
"version": "0.2.0",
"git_url": "https://github.com/qurator-spk/eynollah",
"tools": {
"ocrd-eynollah-segment": {
@ -49,7 +49,17 @@
"default": false,
"description": "ignore the special role of headings during reading order detection"
}
},
"resources": [
{
"description": "models for eynollah (TensorFlow format)",
"url": "https://qurator-data.de/eynollah/2021-04-25/SavedModel.tar.gz",
"name": "default",
"size": 1483106598,
"type": "archive",
"path_in_archive": "default"
}
]
}
}
}

@ -20,7 +20,7 @@ def contours_in_same_horizon(cy_main_hor):
list_h.append(i)
if len(list_h) > 1:
all_args.append(list(set(list_h)))
return np.unique(all_args)
return np.unique(np.array(all_args, dtype=object))
def find_contours_mean_y_diff(contours_main):
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]

@ -13,7 +13,6 @@ setup(
author='Vahid Rezanezhad',
url='https://github.com/qurator-spk/eynollah',
license='Apache License 2.0',
namespace_packages=['qurator'],
packages=find_packages(exclude=['tests']),
install_requires=install_requires,
package_data={

Loading…
Cancel
Save