From 4e7b8ed6420393d88263087957ec85e2433e7a21 Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Fri, 14 Apr 2023 12:53:11 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Convert=20mods:relatedItem=20for=20?= =?UTF-8?q?types=20original=20and=20host?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes gh-21. --- qurator/mods4pandas/mods4pandas.py | 10 +++++++++- qurator/mods4pandas/tests/test_modstool.py | 14 +++++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/qurator/mods4pandas/mods4pandas.py b/qurator/mods4pandas/mods4pandas.py index 299fa5c..781c42c 100755 --- a/qurator/mods4pandas/mods4pandas.py +++ b/qurator/mods4pandas/mods4pandas.py @@ -152,7 +152,15 @@ def mods_to_dict(mods, raise_errors=True): .has_attributes({'authority': 'iso15924', 'type': 'code'}) \ .text_set() elif tag == '{http://www.loc.gov/mods/v3}relatedItem': - pass + tag_group = TagGroup(tag, group) + for type_, grouped_group in sorted_groupby(tag_group.group, key=lambda g: g.attrib['type']): + sub_tag = 'relatedItem-{}'.format(type_) + grouped_group = list(grouped_group) + if type_ in ["original", "host"]: + value[sub_tag] = TagGroup(sub_tag, grouped_group).is_singleton().descend(raise_errors) + else: + # TODO type="series" + pass elif tag == '{http://www.loc.gov/mods/v3}name': for n, e in enumerate(group): value['name{}'.format(n)] = mods_to_dict(e, raise_errors) diff --git a/qurator/mods4pandas/tests/test_modstool.py b/qurator/mods4pandas/tests/test_modstool.py index 37355eb..5d628a2 100644 --- a/qurator/mods4pandas/tests/test_modstool.py +++ b/qurator/mods4pandas/tests/test_modstool.py @@ -1,4 +1,3 @@ -from tkinter import W from lxml import etree as ET import pytest @@ -126,3 +125,16 @@ def test_originInfo_no_event_type(): assert len(ws) == 1 assert ws[0].message.args[0] == 'Filtered {http://www.loc.gov/mods/v3}originInfo element (has no eventType)' + +def test_relatedItem(): + d = dict_fromstring(""" + + + + PPN167755803 + + + + """) + + assert d['relatedItem-original_recordInfo_recordIdentifier'] == 'PPN167755803'