From 8237b3edafc404eacdf85912168b93808386ee44 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Thu, 22 Aug 2019 15:35:19 +0200 Subject: [PATCH] =?UTF-8?q?=EF=BF=BD=20dinglehopper:=20Substitute=20more?= =?UTF-8?q?=20characters?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/dinglehopper/substitute_equivalences.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/qurator/dinglehopper/substitute_equivalences.py b/qurator/dinglehopper/substitute_equivalences.py index 9d5daa9..7c37c7f 100644 --- a/qurator/dinglehopper/substitute_equivalences.py +++ b/qurator/dinglehopper/substitute_equivalences.py @@ -22,11 +22,18 @@ def substitute_equivalences(s): '': 'ct', '’': '\'', '⸗': '-', - '': 'tz', # MUFI: LATIN SMALL LIGATURE TZ - 'aͤ': 'ä', # LATIN SMALL LETTER A, COMBINING LATIN SMALL LETTER E - 'oͤ': 'ö', # LATIN SMALL LETTER O, COMBINING LATIN SMALL LETTER E - 'uͤ': 'ü', # LATIN SMALL LETTER U, COMBINING LATIN SMALL LETTER E + '': 'tz', # MUFI: LATIN SMALL LIGATURE TZ + 'aͤ': 'ä', # LATIN SMALL LETTER A, COMBINING LATIN SMALL LETTER E + 'oͤ': 'ö', # LATIN SMALL LETTER O, COMBINING LATIN SMALL LETTER E + 'uͤ': 'ü', # LATIN SMALL LETTER U, COMBINING LATIN SMALL LETTER E + '\uf535': 'Qu', # eMOP: Latin ligature capital Q small u + 'ij': 'ij', # U+0133 LATIN SMALL LIGATURE IJ + '\uE8BF': 'q&', # MUFI: LATIN SMALL LETTER Q LIGATED WITH FINAL ET XXX How to replace this correctly? + '\uEBA5': 'ſp', # MUFI: LATIN SMALL LIGATURE LONG S P + 'st': 'st', # U+FB06 LATIN SMALL LIGATURE ST + '\uF50E': 'q́' # U+F50E LATIN SMALL LETTER Q WITH ACUTE ACCENT } + for fr, to in equivalences.items(): s = s.replace(fr, to) return s