import json import re input_file = 'export_2025-06-02.json' output_file = 'export_2025-06-02_clean.json' with open(input_file, 'r', encoding='utf-8') as f: data = json.load(f) def normalize_ingredients(recipe): if 'recipeIngredient' in recipe: recipe['recipeIngredient'] = [ ingredient.replace('g / Gramm', 'g') for ingredient in recipe['recipeIngredient'] ] recipe['recipeIngredient'] = [ ingredient.replace('kg / Kilogramm', 'kg') for ingredient in recipe['recipeIngredient'] ] recipe['recipeIngredient'] = [ ingredient.replace('.0 ', ' ') for ingredient in recipe['recipeIngredient'] ] recipe['recipeIngredient'] = [ re.sub("^0 (g|kg|Milliliter) ", "", ingredient) for ingredient in recipe['recipeIngredient'] ] recipe['recipeIngredient'] = [ ingredient \ for ingredient in recipe['recipeIngredient'] \ if ingredient != "None" ] return recipe if isinstance(data, list): cleaned_data = [normalize_ingredients(recipe) for recipe in data] else: cleaned_data = normalize_ingredients(data) # Write the cleaned JSON data to a new file with open(output_file, 'w', encoding='utf-8') as f: json.dump(cleaned_data, f, ensure_ascii=False, indent=2) print(f"Cleaned data written to '{output_file}'")