extract_page(): get external contours instead of indiscriminate tree

This commit is contained in:
Robert Sachunsky 2026-04-27 00:02:48 +02:00
parent 287bebde0d
commit b5bc161a4c

View file

@ -849,7 +849,6 @@ class Eynollah:
return segmentation, confidence return segmentation, confidence
def extract_page(self, image): def extract_page(self, image):
self.logger.debug("enter extract_page")
cropped_page = img = image['img_res'] cropped_page = img = image['img_res']
h, w = img.shape[:2] h, w = img.shape[:2]
page_coord = [0, h, 0, w] page_coord = [0, h, 0, w]
@ -858,14 +857,13 @@ class Eynollah:
[[w, h]], [[w, h]],
[[0, h]]])] [[0, h]]])]
if not self.ignore_page_extraction: if not self.ignore_page_extraction:
self.logger.debug("enter extract_page")
#cv2.GaussianBlur(img, (5, 5), 0) #cv2.GaussianBlur(img, (5, 5), 0)
prediction = self.do_prediction(False, img, self.model_zoo.get("page")) prediction = self.do_prediction(False, img, self.model_zoo.get("page"))
contours, _ = cv2.findContours(prediction, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours, _ = cv2.findContours(prediction, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if len(contours):
if len(contours)>0: areas = np.array(list(map(cv2.contourArea, contours)))
cnt_size = np.array([cv2.contourArea(contours[j]) cnt = contours[np.argmax(areas)]
for j in range(len(contours))])
cnt = contours[np.argmax(cnt_size)]
cont_page = [cnt] cont_page = [cnt]
x, y, w, h = cv2.boundingRect(cnt) x, y, w, h = cv2.boundingRect(cnt)
#if x <= 30: #if x <= 30: