mirror of
				https://github.com/qurator-spk/modstool.git
				synced 2025-11-03 19:04:13 +01:00 
			
		
		
		
	🐛 Handle multiple mods:role
This commit is contained in:
		
							parent
							
								
									75ff143a25
								
							
						
					
					
						commit
						9a8f7f69bd
					
				
					 2 changed files with 65 additions and 19 deletions
				
			
		| 
						 | 
				
			
			@ -165,6 +165,25 @@ class TagGroup:
 | 
			
		|||
                warnings.warn('Changed scriptTerm authority to lower case')
 | 
			
		||||
        return self
 | 
			
		||||
 | 
			
		||||
    def merge_sub_tags_to_set(self):
 | 
			
		||||
        value = {}
 | 
			
		||||
 | 
			
		||||
        sub_dicts = [mods_to_dict(e) for e in self.group]
 | 
			
		||||
        sub_tags = {k for d in sub_dicts for k in d.keys()}
 | 
			
		||||
        for sub_tag in sub_tags:
 | 
			
		||||
            s = set()
 | 
			
		||||
            for d in sub_dicts:
 | 
			
		||||
                v = d.get(sub_tag)
 | 
			
		||||
                if v:
 | 
			
		||||
                    # There could be multiple scriptTerms in one language element, e.g. Antiqua and Fraktur in a
 | 
			
		||||
                    # German language document.
 | 
			
		||||
                    if isinstance(v, set):
 | 
			
		||||
                        s.update(v)
 | 
			
		||||
                    else:
 | 
			
		||||
                        s.add(v)
 | 
			
		||||
            value[sub_tag] = s
 | 
			
		||||
        return value
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def sorted_groupby(iterable, key=None):
 | 
			
		||||
    """
 | 
			
		||||
| 
						 | 
				
			
			@ -303,21 +322,8 @@ def mods_to_dict(mods, raise_errors=True):
 | 
			
		|||
                k = 'genre-{}'.format(authority) if authority is not None else 'genre'
 | 
			
		||||
                value[k] = {e.text for e in group if e.attrib.get('authority') == authority}
 | 
			
		||||
        elif tag == '{http://www.loc.gov/mods/v3}language':
 | 
			
		||||
            # Make languageTerm/scriptTerm sets
 | 
			
		||||
            sub_dicts = [mods_to_dict(e) for e in group]
 | 
			
		||||
            sub_tags = {k for d in sub_dicts for k in d.keys()}
 | 
			
		||||
            for sub_tag in sub_tags:
 | 
			
		||||
                s = set()
 | 
			
		||||
                for d in sub_dicts:
 | 
			
		||||
                    v = d.get(sub_tag)
 | 
			
		||||
                    if v:
 | 
			
		||||
                        # There could be multiple scriptTerms in one language element, e.g. Antiqua and Fraktur in a
 | 
			
		||||
                        # German language document.
 | 
			
		||||
                        if isinstance(v, set):
 | 
			
		||||
                            s.update(v)
 | 
			
		||||
                        else:
 | 
			
		||||
                            s.add(v)
 | 
			
		||||
                value['language_{}'.format(sub_tag)] = s
 | 
			
		||||
            value["language"] = TagGroup(tag, group) \
 | 
			
		||||
                .merge_sub_tags_to_set()
 | 
			
		||||
        elif tag == '{http://www.loc.gov/mods/v3}languageTerm':
 | 
			
		||||
            value['languageTerm'] = TagGroup(tag, group) \
 | 
			
		||||
                .has_attributes({'authority': 'iso639-2b', 'type': 'code'}) \
 | 
			
		||||
| 
						 | 
				
			
			@ -333,11 +339,13 @@ def mods_to_dict(mods, raise_errors=True):
 | 
			
		|||
            for n, e in enumerate(group):
 | 
			
		||||
                value['name{}'.format(n)] = mods_to_dict(e, raise_errors)
 | 
			
		||||
        elif tag == '{http://www.loc.gov/mods/v3}role':
 | 
			
		||||
            value['role'] = TagGroup(tag, group).is_singleton().has_no_attributes().descend(raise_errors)
 | 
			
		||||
            value["role"] = TagGroup(tag, group) \
 | 
			
		||||
                .has_no_attributes() \
 | 
			
		||||
                .merge_sub_tags_to_set()
 | 
			
		||||
        elif tag == '{http://www.loc.gov/mods/v3}roleTerm':
 | 
			
		||||
            value['roleTerm'] = TagGroup(tag, group) \
 | 
			
		||||
                .is_singleton().has_attributes({'authority': 'marcrelator', 'type': 'code'}) \
 | 
			
		||||
                .text()
 | 
			
		||||
                .has_attributes({'authority': 'marcrelator', 'type': 'code'}) \
 | 
			
		||||
                .text_set()
 | 
			
		||||
        elif tag == '{http://www.loc.gov/mods/v3}namePart':
 | 
			
		||||
            for e in group:
 | 
			
		||||
                if not e.attrib.get('type'):
 | 
			
		||||
| 
						 | 
				
			
			@ -412,7 +420,7 @@ def mets_to_dict(mets, raise_errors=True):
 | 
			
		|||
                raise ValueError('Unknown tag "{}"'.format(tag))
 | 
			
		||||
            else:
 | 
			
		||||
                pass
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
    return value
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -33,6 +33,44 @@ def test_multitple_language_languageTerm():
 | 
			
		|||
    """)
 | 
			
		||||
    assert d['language_languageTerm'] == {'ger', 'lat'}
 | 
			
		||||
 | 
			
		||||
def test_role_roleTerm():
 | 
			
		||||
    d = dict_fromstring("""
 | 
			
		||||
    <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
 | 
			
		||||
    <mods:name type="personal" valueURI="http://d-nb.info/gnd/117357669">
 | 
			
		||||
      <mods:displayForm>Wurm, Mary</mods:displayForm>
 | 
			
		||||
      <mods:namePart type="given">Mary</mods:namePart>
 | 
			
		||||
      <mods:nameIdentifier type="gbv-ppn">078789583</mods:nameIdentifier>
 | 
			
		||||
      <mods:namePart type="family">Wurm</mods:namePart>
 | 
			
		||||
      <mods:role>
 | 
			
		||||
        <mods:roleTerm authority="marcrelator" type="code">cmp</mods:roleTerm>
 | 
			
		||||
      </mods:role>
 | 
			
		||||
    </mods:name>
 | 
			
		||||
    </mods:mods>
 | 
			
		||||
    """)
 | 
			
		||||
    assert d['name0_role_roleTerm'] == {'cmp'}
 | 
			
		||||
 | 
			
		||||
def test_multiple_role_roleTerm():
 | 
			
		||||
    """
 | 
			
		||||
    Multiple mods:role/mods:roleTerm should be merged into one column.
 | 
			
		||||
    """
 | 
			
		||||
    d = dict_fromstring("""
 | 
			
		||||
    <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
 | 
			
		||||
    <mods:name type="personal" valueURI="http://d-nb.info/gnd/117357669">
 | 
			
		||||
      <mods:displayForm>Wurm, Mary</mods:displayForm>
 | 
			
		||||
      <mods:namePart type="given">Mary</mods:namePart>
 | 
			
		||||
      <mods:nameIdentifier type="gbv-ppn">078789583</mods:nameIdentifier>
 | 
			
		||||
      <mods:namePart type="family">Wurm</mods:namePart>
 | 
			
		||||
      <mods:role>
 | 
			
		||||
        <mods:roleTerm authority="marcrelator" type="code">cmp</mods:roleTerm>
 | 
			
		||||
      </mods:role>
 | 
			
		||||
      <mods:role>
 | 
			
		||||
        <mods:roleTerm authority="marcrelator" type="code">aut</mods:roleTerm>
 | 
			
		||||
      </mods:role>
 | 
			
		||||
    </mods:name>
 | 
			
		||||
    </mods:mods>
 | 
			
		||||
    """)
 | 
			
		||||
    assert d['name0_role_roleTerm'] == {'cmp', 'aut'}
 | 
			
		||||
 | 
			
		||||
def test_scriptTerm():
 | 
			
		||||
    """
 | 
			
		||||
    Same language using different scripts have one mods:language, with multiple scriptTerms inside.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue