mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-08 11:49:55 +02:00
add option to overwrite output xml, but skip by default if file exists
This commit is contained in:
parent
b9ca7a6191
commit
b4b0890294
3 changed files with 22 additions and 6 deletions
|
@ -97,6 +97,12 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
|
||||||
type=click.Path(exists=True, file_okay=False),
|
type=click.Path(exists=True, file_okay=False),
|
||||||
required=True,
|
required=True,
|
||||||
)
|
)
|
||||||
|
@click.option(
|
||||||
|
"--overwrite",
|
||||||
|
"-O",
|
||||||
|
help="overwrite (instead of skipping) if output xml exists",
|
||||||
|
is_flag=True,
|
||||||
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"--dir_in",
|
"--dir_in",
|
||||||
"-di",
|
"-di",
|
||||||
|
@ -253,7 +259,7 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
|
||||||
help="Override log level globally to this",
|
help="Override log level globally to this",
|
||||||
)
|
)
|
||||||
|
|
||||||
def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, skip_layout_and_reading_order, ignore_page_extraction, log_level):
|
def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, skip_layout_and_reading_order, ignore_page_extraction, log_level):
|
||||||
initLogging()
|
initLogging()
|
||||||
if log_level:
|
if log_level:
|
||||||
getLogger('eynollah').setLevel(getLevelName(log_level))
|
getLogger('eynollah').setLevel(getLevelName(log_level))
|
||||||
|
@ -273,6 +279,7 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
eynollah = Eynollah(
|
eynollah = Eynollah(
|
||||||
image_filename=image,
|
image_filename=image,
|
||||||
|
overwrite=overwrite,
|
||||||
dir_out=out,
|
dir_out=out,
|
||||||
dir_in=dir_in,
|
dir_in=dir_in,
|
||||||
dir_models=model,
|
dir_models=model,
|
||||||
|
|
|
@ -165,6 +165,7 @@ class Eynollah:
|
||||||
image_filename=None,
|
image_filename=None,
|
||||||
image_pil=None,
|
image_pil=None,
|
||||||
image_filename_stem=None,
|
image_filename_stem=None,
|
||||||
|
overwrite=False,
|
||||||
dir_out=None,
|
dir_out=None,
|
||||||
dir_in=None,
|
dir_in=None,
|
||||||
dir_of_cropped_images=None,
|
dir_of_cropped_images=None,
|
||||||
|
@ -203,6 +204,7 @@ class Eynollah:
|
||||||
if override_dpi:
|
if override_dpi:
|
||||||
self.dpi = override_dpi
|
self.dpi = override_dpi
|
||||||
self.image_filename = image_filename
|
self.image_filename = image_filename
|
||||||
|
self.overwrite = overwrite
|
||||||
self.dir_out = dir_out
|
self.dir_out = dir_out
|
||||||
self.dir_in = dir_in
|
self.dir_in = dir_in
|
||||||
self.dir_of_all = dir_of_all
|
self.dir_of_all = dir_of_all
|
||||||
|
@ -360,6 +362,7 @@ class Eynollah:
|
||||||
curved_line=self.curved_line,
|
curved_line=self.curved_line,
|
||||||
textline_light = self.textline_light,
|
textline_light = self.textline_light,
|
||||||
pcgts=self.pcgts)
|
pcgts=self.pcgts)
|
||||||
|
|
||||||
def imread(self, grayscale=False, uint8=True):
|
def imread(self, grayscale=False, uint8=True):
|
||||||
key = 'img'
|
key = 'img'
|
||||||
if grayscale:
|
if grayscale:
|
||||||
|
@ -4460,8 +4463,14 @@ class Eynollah:
|
||||||
if self.dir_in:
|
if self.dir_in:
|
||||||
self.reset_file_name_dir(os.path.join(self.dir_in,img_name))
|
self.reset_file_name_dir(os.path.join(self.dir_in,img_name))
|
||||||
#print("text region early -11 in %.1fs", time.time() - t0)
|
#print("text region early -11 in %.1fs", time.time() - t0)
|
||||||
|
|
||||||
|
if os.path.exists(self.writer.output_filename):
|
||||||
|
if self.overwrite:
|
||||||
|
self.logger.warning("will overwrite existing output file '%s'", self.writer.output_filename)
|
||||||
|
else:
|
||||||
|
self.logger.warning("will skip input for existing output file '%s'", self.writer.output_filename)
|
||||||
|
continue
|
||||||
|
|
||||||
if self.extract_only_images:
|
if self.extract_only_images:
|
||||||
img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version)
|
img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version)
|
||||||
self.logger.info("Enhancing took %.1fs ", time.time() - t0)
|
self.logger.info("Enhancing took %.1fs ", time.time() - t0)
|
||||||
|
|
|
@ -28,6 +28,7 @@ class EynollahXmlWriter():
|
||||||
self.counter = EynollahIdCounter()
|
self.counter = EynollahIdCounter()
|
||||||
self.dir_out = dir_out
|
self.dir_out = dir_out
|
||||||
self.image_filename = image_filename
|
self.image_filename = image_filename
|
||||||
|
self.output_filename = os.path.join(self.dir_out, self.image_filename_stem) + ".xml"
|
||||||
self.curved_line = curved_line
|
self.curved_line = curved_line
|
||||||
self.textline_light = textline_light
|
self.textline_light = textline_light
|
||||||
self.pcgts = pcgts
|
self.pcgts = pcgts
|
||||||
|
@ -163,9 +164,8 @@ class EynollahXmlWriter():
|
||||||
coords.set_points(points_co[:-1])
|
coords.set_points(points_co[:-1])
|
||||||
|
|
||||||
def write_pagexml(self, pcgts):
|
def write_pagexml(self, pcgts):
|
||||||
out_fname = os.path.join(self.dir_out, self.image_filename_stem) + ".xml"
|
self.logger.info("output filename: '%s'", self.output_filename)
|
||||||
self.logger.info("output filename: '%s'", out_fname)
|
with open(self.output_filename, 'w') as f:
|
||||||
with open(out_fname, 'w') as f:
|
|
||||||
f.write(to_xml(pcgts))
|
f.write(to_xml(pcgts))
|
||||||
|
|
||||||
def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines):
|
def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue