mirror of
https://github.com/qurator-spk/modstool.git
synced 2025-06-09 19:59:57 +02:00
⚡ Make get_mets_file aa lot faster by using find() instead of xpath()
This commit is contained in:
parent
16a3a3bcc8
commit
90c60ebb80
1 changed files with 7 additions and 5 deletions
|
@ -273,6 +273,12 @@ def pages_to_dict(mets, raise_errors=True) -> List[Dict]:
|
|||
div_physSequence = structMap_PHYSICAL[0]
|
||||
assert div_physSequence.attrib.get("TYPE") == "physSequence"
|
||||
|
||||
def get_mets_file(*, ID):
|
||||
if ID:
|
||||
file_ = mets.find(f'.//{{{ns["mets"]}}}file[@ID="{ID}"]')
|
||||
return file_
|
||||
|
||||
|
||||
for page in div_physSequence:
|
||||
|
||||
# TODO sort by ORDER?
|
||||
|
@ -285,12 +291,8 @@ def pages_to_dict(mets, raise_errors=True) -> List[Dict]:
|
|||
file_id = fptr.attrib.get("FILEID")
|
||||
assert file_id
|
||||
|
||||
def get_mets_file(*, ID):
|
||||
if ID:
|
||||
file_ = (mets.xpath(f'//mets:file[@ID="{ID}"]', namespaces=ns) or [None])[0]
|
||||
return file_
|
||||
|
||||
file_ = get_mets_file(ID=file_id)
|
||||
assert file_ is not None
|
||||
fileGrp_USE = file_.getparent().attrib.get("USE")
|
||||
file_FLocat_href = (file_.xpath('mets:FLocat/@xlink:href', namespaces=ns) or [None])[0]
|
||||
page_dict[f"fileGrp_{fileGrp_USE}_file_FLocat_href"] = file_FLocat_href
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue