From 4d6e1f4ff4e164c1319aaaae51cb2b366d30fb96 Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Tue, 3 Dec 2024 17:24:24 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Add=20missing=20tag=20alto:fileI?= =?UTF-8?q?dentifier?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mods4pandas/alto4pandas.py | 2 + .../data/alto/PPN1844793923/00000017.xml | 663 ++++++++++++++++++ 2 files changed, 665 insertions(+) create mode 100644 src/mods4pandas/tests/data/alto/PPN1844793923/00000017.xml diff --git a/src/mods4pandas/alto4pandas.py b/src/mods4pandas/alto4pandas.py index 8dde40a..8c8f934 100755 --- a/src/mods4pandas/alto4pandas.py +++ b/src/mods4pandas/alto4pandas.py @@ -76,6 +76,8 @@ def alto_to_dict(alto, raise_errors=True): value[localname] = TagGroup(tag, group).is_singleton().has_no_attributes().descend(raise_errors) elif localname == 'fileName': value[localname] = TagGroup(tag, group).is_singleton().has_no_attributes().text() + elif localname == 'fileIdentifier': + value[localname] = TagGroup(tag, group).is_singleton().has_no_attributes().text() elif localname == 'Layout': value[localname] = TagGroup(tag, group).is_singleton().has_no_attributes().descend(raise_errors) diff --git a/src/mods4pandas/tests/data/alto/PPN1844793923/00000017.xml b/src/mods4pandas/tests/data/alto/PPN1844793923/00000017.xml new file mode 100644 index 0000000..7f658fa --- /dev/null +++ b/src/mods4pandas/tests/data/alto/PPN1844793923/00000017.xml @@ -0,0 +1,663 @@ + + + + + + pixel + + 16_b079a_default.jpg + https://content.staatsbibliothek-berlin.de/dc/1844793923-0017/full/full/0/default.jpg + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +