diff --git a/src/mods4pandas/lib.py b/src/mods4pandas/lib.py index ff33c2a..fed3a01 100644 --- a/src/mods4pandas/lib.py +++ b/src/mods4pandas/lib.py @@ -194,6 +194,14 @@ class TagGroup: warnings.warn("Added type='text')") return self + def remove_attributes(self, attribs) -> TagGroup: + if not isinstance(attribs, Sequence): + attribs = [attribs] + for e in self.group: + for a in attribs: + e.attrib.pop(a, None) + return self + def merge_sub_tags_to_set(self) -> dict: from .mods4pandas import mods_to_dict diff --git a/src/mods4pandas/mods4pandas.py b/src/mods4pandas/mods4pandas.py index 3252999..888aba4 100755 --- a/src/mods4pandas/mods4pandas.py +++ b/src/mods4pandas/mods4pandas.py @@ -200,6 +200,7 @@ def mods_to_dict(mods, raise_errors=True): TagGroup(tag, group) .filter(only_standard_title) .is_singleton() + .remove_attributes(["ID", "{http://www.w3.org/XML/1998/namespace}lang"]) .has_no_attributes() .descend(raise_errors) )