from lxml import etree as ET
import pytest
from qurator.mods4pandas.mods4pandas import mods_to_dict
from qurator.mods4pandas.lib import flatten
def dict_fromstring(x):
    """Helper function to parse a MODS XML string to a flattened dict"""
    return flatten(mods_to_dict(ET.fromstring(x)))
def test_single_language_languageTerm():
    d = dict_fromstring("""
    
        
            lat
            ger
        
    
    """)
    assert d['language_languageTerm'] == {'ger', 'lat'}
def test_multitple_language_languageTerm():
    """
    Different languages MAY have multiple mods:language elements.
    See MODS-AP 2.3.1
    """
    d = dict_fromstring("""
    
        lat
        ger
    
    """)
    assert d['language_languageTerm'] == {'ger', 'lat'}
def test_role_roleTerm():
    d = dict_fromstring("""
    
    
      Wurm, Mary
      Mary
      078789583
      Wurm
      
        cmp
      
    
    
    """)
    assert d['name0_role_roleTerm'] == {'cmp'}
def test_multiple_role_roleTerm():
    """
    Multiple mods:role/mods:roleTerm should be merged into one column.
    """
    d = dict_fromstring("""
    
    
      Wurm, Mary
      Mary
      078789583
      Wurm
      
        cmp
      
      
        aut
      
    
    
    """)
    assert d['name0_role_roleTerm'] == {'cmp', 'aut'}
def test_scriptTerm():
    """
    Same language using different scripts have one mods:language, with multiple scriptTerms inside.
    See MODS-AP 2.3.1.
    """
    d = dict_fromstring("""
    
        
            ger
            215
            217
        
        
            lat
            216
        
    
    """)
    assert d['language_scriptTerm'] == {'215', '216', '217'}
def test_recordInfo():
    d = dict_fromstring("""
    
        
            PPN610714341
        
    
    """)
    assert d['recordInfo_recordIdentifier'] == 'PPN610714341'
def test_accessCondition():
    d = dict_fromstring("""
    
        UNKNOWN
    
    """)
    assert d['accessCondition-use and reproduction'] == 'UNKNOWN'
def test_originInfo_no_event_type():
    with pytest.warns(UserWarning) as ws:
        d = dict_fromstring("""
        
            
               Berlin
            
        
        """)
    assert d == {}  # empty
    assert len(ws) == 1
    assert ws[0].message.args[0] == 'Filtered {http://www.loc.gov/mods/v3}originInfo element (has no eventType)'
def test_relatedItem():
    d = dict_fromstring("""
    
        
            
                PPN167755803
            
        
    
    """)
    assert d['relatedItem-original_recordInfo_recordIdentifier'] == 'PPN167755803'