61 lines
1.9 KiB
Python
61 lines
1.9 KiB
Python
import json
|
|
import re
|
|
|
|
|
|
input_file = 'export_2025-06-02.json'
|
|
output_file = 'export_2025-06-02_clean.json'
|
|
|
|
with open(input_file, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
def normalize_ingredients(recipe):
|
|
if 'recipeIngredient' in recipe:
|
|
recipe['recipeIngredient'] = [
|
|
ingredient.replace('g / Gramm', 'g') for ingredient in recipe['recipeIngredient']
|
|
]
|
|
recipe['recipeIngredient'] = [
|
|
ingredient.replace('kg / Kilogramm', 'kg') for ingredient in recipe['recipeIngredient']
|
|
]
|
|
recipe['recipeIngredient'] = [
|
|
ingredient.replace('.0 ', ' ') for ingredient in recipe['recipeIngredient']
|
|
]
|
|
recipe['recipeIngredient'] = [
|
|
re.sub("^0 (g|kg|Milliliter) ", "", ingredient) for ingredient in recipe['recipeIngredient']
|
|
]
|
|
recipe['recipeIngredient'] = [
|
|
ingredient \
|
|
for ingredient in recipe['recipeIngredient'] \
|
|
if ingredient != "None"
|
|
]
|
|
return recipe
|
|
|
|
|
|
def normalize_instructions(recipe):
|
|
if 'recipeInstructions' in recipe:
|
|
recipe['recipeInstructions'] = [
|
|
instruction \
|
|
for instruction in recipe['recipeInstructions'] \
|
|
if instruction.get('text')
|
|
]
|
|
if not recipe['recipeInstructions']:
|
|
del recipe['recipeInstructions']
|
|
return recipe
|
|
|
|
|
|
def normalize_recipe(recipe):
|
|
cleaned_recipe = recipe
|
|
cleaned_recipe = normalize_ingredients(cleaned_recipe)
|
|
cleaned_recipe = normalize_instructions(cleaned_recipe)
|
|
return cleaned_recipe
|
|
|
|
|
|
if isinstance(data, list):
|
|
cleaned_data = [normalize_recipe(recipe) for recipe in data]
|
|
else:
|
|
cleaned_data = normalize_recipe(data)
|
|
|
|
# Write the cleaned JSON data to a new file
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
json.dump(cleaned_data, f, ensure_ascii=False, indent=2)
|
|
|
|
print(f"Cleaned data written to '{output_file}'")
|