mirror of
https://github.com/qurator-spk/modstool.git
synced 2025-06-10 04:09:56 +02:00
⚡ Make gettstruct_log faster by using precise predicates
This commit is contained in:
parent
1dac77a2f5
commit
448639b05b
1 changed files with 9 additions and 3 deletions
|
@ -266,9 +266,12 @@ def pages_to_dict(mets, raise_errors=True) -> List[Dict]:
|
||||||
ppn = get_mets_recordIdentifier()
|
ppn = get_mets_recordIdentifier()
|
||||||
|
|
||||||
# Getting per-page/structure information is a bit different
|
# Getting per-page/structure information is a bit different
|
||||||
structMap_PHYSICAL = (mets.xpath('//mets:structMap[@TYPE="PHYSICAL"]', namespaces=ns) or [None])[0]
|
structMap_PHYSICAL = mets.find('./mets:structMap[@TYPE="PHYSICAL"]', ns)
|
||||||
if not structMap_PHYSICAL:
|
structMap_LOGICAL = mets.find('./mets:structMap[@TYPE="LOGICAL"]', ns)
|
||||||
|
if structMap_PHYSICAL is None:
|
||||||
raise ValueError("No structMap[@TYPE='PHYSICAL'] found")
|
raise ValueError("No structMap[@TYPE='PHYSICAL'] found")
|
||||||
|
if structMap_LOGICAL is None:
|
||||||
|
raise ValueError("No structMap[@TYPE='LOGICAL'] found")
|
||||||
|
|
||||||
div_physSequence = structMap_PHYSICAL[0]
|
div_physSequence = structMap_PHYSICAL[0]
|
||||||
assert div_physSequence.attrib.get("TYPE") == "physSequence"
|
assert div_physSequence.attrib.get("TYPE") == "physSequence"
|
||||||
|
@ -278,6 +281,9 @@ def pages_to_dict(mets, raise_errors=True) -> List[Dict]:
|
||||||
file_ = mets.find(f'.//{{{ns["mets"]}}}file[@ID="{ID}"]')
|
file_ = mets.find(f'.//{{{ns["mets"]}}}file[@ID="{ID}"]')
|
||||||
return file_
|
return file_
|
||||||
|
|
||||||
|
def get_mets_div(*, ID):
|
||||||
|
if ID:
|
||||||
|
return structMap_LOGICAL.findall(f'.//mets:div[@ID="{ID}"]', ns)
|
||||||
|
|
||||||
for page in div_physSequence:
|
for page in div_physSequence:
|
||||||
|
|
||||||
|
@ -315,7 +321,7 @@ def pages_to_dict(mets, raise_errors=True) -> List[Dict]:
|
||||||
targets = []
|
targets = []
|
||||||
for sm_link in sm_links:
|
for sm_link in sm_links:
|
||||||
xlink_from = sm_link.attrib.get(f"{{{ns['xlink']}}}from")
|
xlink_from = sm_link.attrib.get(f"{{{ns['xlink']}}}from")
|
||||||
targets.extend(mets.findall(f'//mets:div[@ID="{xlink_from}"]', ns))
|
targets.extend(get_mets_div(ID=xlink_from))
|
||||||
return targets
|
return targets
|
||||||
|
|
||||||
struct_divs = set(get_struct_log(to_phys=page_dict["ID"]))
|
struct_divs = set(get_struct_log(to_phys=page_dict["ID"]))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue