From 1dac77a2f5c8054140dca4be81d32d1ad9735426 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Sat, 9 Dec 2023 10:36:55 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Make=20gett=5Fstruct=5Flog=20faster?= =?UTF-8?q?=20by=20using=20find(all)=20instead=20of=20xpath()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/mods4pandas/mods4pandas.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/qurator/mods4pandas/mods4pandas.py b/qurator/mods4pandas/mods4pandas.py index 16934e7..32728a3 100755 --- a/qurator/mods4pandas/mods4pandas.py +++ b/qurator/mods4pandas/mods4pandas.py @@ -308,12 +308,14 @@ def pages_to_dict(mets, raise_errors=True) -> List[Dict]: # This is all XLink, there might be a more generic way to traverse the links. However, currently, # it suffices to do this the old-fashioned way. - sm_links = mets.xpath(f'//mets:structLink/mets:smLink[@xlink:to="{to_phys}"]', namespaces=ns) + sm_links = mets.findall( + f'//mets:structLink/mets:smLink[@xlink:to="{to_phys}"]', ns + ) targets = [] for sm_link in sm_links: xlink_from = sm_link.attrib.get(f"{{{ns['xlink']}}}from") - targets.extend(mets.xpath(f'//mets:div[@ID="{xlink_from}"]', namespaces=ns)) + targets.extend(mets.findall(f'//mets:div[@ID="{xlink_from}"]', ns)) return targets struct_divs = set(get_struct_log(to_phys=page_dict["ID"]))