mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-08 11:49:55 +02:00
add option to overwrite output xml, but skip by default if file exists
This commit is contained in:
parent
b9ca7a6191
commit
b4b0890294
3 changed files with 22 additions and 6 deletions
|
@ -97,6 +97,12 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
|
|||
type=click.Path(exists=True, file_okay=False),
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"--overwrite",
|
||||
"-O",
|
||||
help="overwrite (instead of skipping) if output xml exists",
|
||||
is_flag=True,
|
||||
)
|
||||
@click.option(
|
||||
"--dir_in",
|
||||
"-di",
|
||||
|
@ -253,7 +259,7 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
|
|||
help="Override log level globally to this",
|
||||
)
|
||||
|
||||
def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, skip_layout_and_reading_order, ignore_page_extraction, log_level):
|
||||
def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, skip_layout_and_reading_order, ignore_page_extraction, log_level):
|
||||
initLogging()
|
||||
if log_level:
|
||||
getLogger('eynollah').setLevel(getLevelName(log_level))
|
||||
|
@ -273,6 +279,7 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s
|
|||
sys.exit(1)
|
||||
eynollah = Eynollah(
|
||||
image_filename=image,
|
||||
overwrite=overwrite,
|
||||
dir_out=out,
|
||||
dir_in=dir_in,
|
||||
dir_models=model,
|
||||
|
|
|
@ -165,6 +165,7 @@ class Eynollah:
|
|||
image_filename=None,
|
||||
image_pil=None,
|
||||
image_filename_stem=None,
|
||||
overwrite=False,
|
||||
dir_out=None,
|
||||
dir_in=None,
|
||||
dir_of_cropped_images=None,
|
||||
|
@ -203,6 +204,7 @@ class Eynollah:
|
|||
if override_dpi:
|
||||
self.dpi = override_dpi
|
||||
self.image_filename = image_filename
|
||||
self.overwrite = overwrite
|
||||
self.dir_out = dir_out
|
||||
self.dir_in = dir_in
|
||||
self.dir_of_all = dir_of_all
|
||||
|
@ -360,6 +362,7 @@ class Eynollah:
|
|||
curved_line=self.curved_line,
|
||||
textline_light = self.textline_light,
|
||||
pcgts=self.pcgts)
|
||||
|
||||
def imread(self, grayscale=False, uint8=True):
|
||||
key = 'img'
|
||||
if grayscale:
|
||||
|
@ -4461,6 +4464,12 @@ class Eynollah:
|
|||
self.reset_file_name_dir(os.path.join(self.dir_in,img_name))
|
||||
#print("text region early -11 in %.1fs", time.time() - t0)
|
||||
|
||||
if os.path.exists(self.writer.output_filename):
|
||||
if self.overwrite:
|
||||
self.logger.warning("will overwrite existing output file '%s'", self.writer.output_filename)
|
||||
else:
|
||||
self.logger.warning("will skip input for existing output file '%s'", self.writer.output_filename)
|
||||
continue
|
||||
|
||||
if self.extract_only_images:
|
||||
img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version)
|
||||
|
|
|
@ -28,6 +28,7 @@ class EynollahXmlWriter():
|
|||
self.counter = EynollahIdCounter()
|
||||
self.dir_out = dir_out
|
||||
self.image_filename = image_filename
|
||||
self.output_filename = os.path.join(self.dir_out, self.image_filename_stem) + ".xml"
|
||||
self.curved_line = curved_line
|
||||
self.textline_light = textline_light
|
||||
self.pcgts = pcgts
|
||||
|
@ -163,9 +164,8 @@ class EynollahXmlWriter():
|
|||
coords.set_points(points_co[:-1])
|
||||
|
||||
def write_pagexml(self, pcgts):
|
||||
out_fname = os.path.join(self.dir_out, self.image_filename_stem) + ".xml"
|
||||
self.logger.info("output filename: '%s'", out_fname)
|
||||
with open(out_fname, 'w') as f:
|
||||
self.logger.info("output filename: '%s'", self.output_filename)
|
||||
with open(self.output_filename, 'w') as f:
|
||||
f.write(to_xml(pcgts))
|
||||
|
||||
def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue