import json import re input_file = 'export_2025-06-02.json' output_file = 'export_2025-06-02_clean.json' with open(input_file, 'r', encoding='utf-8') as f: data = json.load(f) def normalize_ingredients(recipe): if 'recipeIngredient' in recipe: recipe['recipeIngredient'] = [ ingredient.replace('g / Gramm', 'g') for ingredient in recipe['recipeIngredient'] ] recipe['recipeIngredient'] = [ ingredient.replace('kg / Kilogramm', 'kg') for ingredient in recipe['recipeIngredient'] ] recipe['recipeIngredient'] = [ ingredient.replace('.0 ', ' ') for ingredient in recipe['recipeIngredient'] ] recipe['recipeIngredient'] = [ re.sub("^0 (g|kg|Milliliter) ", "", ingredient) for ingredient in recipe['recipeIngredient'] ] recipe['recipeIngredient'] = [ ingredient \ for ingredient in recipe['recipeIngredient'] \ if ingredient != "None" ] return recipe def normalize_instructions(recipe): if 'recipeInstructions' in recipe: recipe['recipeInstructions'] = [ instruction \ for instruction in recipe['recipeInstructions'] \ if instruction.get('text') ] if not recipe['recipeInstructions']: del recipe['recipeInstructions'] return recipe def normalize_recipe(recipe): cleaned_recipe = recipe cleaned_recipe = normalize_ingredients(cleaned_recipe) cleaned_recipe = normalize_instructions(cleaned_recipe) return cleaned_recipe if isinstance(data, list): cleaned_data = [normalize_recipe(recipe) for recipe in data] else: cleaned_data = normalize_recipe(data) # Write the cleaned JSON data to a new file with open(output_file, 'w', encoding='utf-8') as f: json.dump(cleaned_data, f, ensure_ascii=False, indent=2) print(f"Cleaned data written to '{output_file}'")