From 64aafbb88c5d1782b541ead31482627d818bf020 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Thu, 7 Aug 2025 20:29:40 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=B9=20MODS:=20Handle=20mods:languageTe?= =?UTF-8?q?rm=20with=20authority=3Diso639-2/rfc3066?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mods4pandas/lib.py | 12 ++++++++++++ src/mods4pandas/mods4pandas.py | 1 + 2 files changed, 13 insertions(+) diff --git a/src/mods4pandas/lib.py b/src/mods4pandas/lib.py index fed3a01..45f50e8 100644 --- a/src/mods4pandas/lib.py +++ b/src/mods4pandas/lib.py @@ -187,6 +187,18 @@ class TagGroup: warnings.warn("Changed scriptTerm authority to lower case") return self + def fix_language_term(self) -> TagGroup: + for e in self.group: + if e.attrib["authority"] == "iso639-2": + e.attrib["authority"] = "iso639-2b" + warnings.warn("Changed languageTerm authority to iso639-2b") + if e.attrib["authority"] == "rfc3066": + if e.text == "de": + e.attrib["authority"] = "iso639-2b" + e.text = "deu" + warnings.warn("Changed languageTerm authority from rfc3066 to iso639-2b") + return self + def add_missing_type_text(self) -> TagGroup: for e in self.group: if not e.attrib.get("type") == "text": diff --git a/src/mods4pandas/mods4pandas.py b/src/mods4pandas/mods4pandas.py index 888aba4..1c0d48a 100755 --- a/src/mods4pandas/mods4pandas.py +++ b/src/mods4pandas/mods4pandas.py @@ -255,6 +255,7 @@ def mods_to_dict(mods, raise_errors=True): elif tag == "{http://www.loc.gov/mods/v3}languageTerm": value["languageTerm"] = ( TagGroup(tag, group) + .fix_language_term() .has_attributes({"authority": "iso639-2b", "type": "code"}) .text_set() )