import json
import re


input_file = 'export_2025-06-02.json'
output_file = 'export_2025-06-02_clean.json'

with open(input_file, 'r', encoding='utf-8') as f:
    data = json.load(f)

def normalize_ingredients(recipe):
    if 'recipeIngredient' in recipe:
        recipe['recipeIngredient'] = [
            ingredient.replace('g / Gramm', 'g') for ingredient in recipe['recipeIngredient']
        ]
        recipe['recipeIngredient'] = [
            ingredient.replace('kg / Kilogramm', 'kg') for ingredient in recipe['recipeIngredient']
        ]
        recipe['recipeIngredient'] = [
            ingredient.replace('.0 ', ' ') for ingredient in recipe['recipeIngredient']
        ]
        recipe['recipeIngredient'] = [
            re.sub("^0 (g|kg|Milliliter) ", "", ingredient) for ingredient in recipe['recipeIngredient']
        ]
        recipe['recipeIngredient'] = [
            ingredient \
                for ingredient in recipe['recipeIngredient'] \
                if ingredient != "None"
        ]
    return recipe


def normalize_instructions(recipe):
    if 'recipeInstructions' in recipe:
        recipe['recipeInstructions'] = [
           instruction \
                for instruction in recipe['recipeInstructions'] \
                if instruction.get('text')
        ]
        if not recipe['recipeInstructions']:
            del recipe['recipeInstructions']
    return recipe


def normalize_recipe(recipe):
    cleaned_recipe = recipe
    cleaned_recipe = normalize_ingredients(cleaned_recipe)
    cleaned_recipe = normalize_instructions(cleaned_recipe)
    return cleaned_recipe


if isinstance(data, list):
    cleaned_data = [normalize_recipe(recipe) for recipe in data]
else:
    cleaned_data = normalize_recipe(data)

# Write the cleaned JSON data to a new file
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(cleaned_data, f, ensure_ascii=False, indent=2)

print(f"Cleaned data written to '{output_file}'")