gpn23-recipes/1_clean_json.py

45 lines
1.4 KiB
Python

import json
import re
def normalize_ingredients(recipe):
if 'recipeIngredient' in recipe:
recipe['recipeIngredient'] = [
ingredient.replace('g / Gramm', 'g') for ingredient in recipe['recipeIngredient']
]
recipe['recipeIngredient'] = [
ingredient.replace('kg / Kilogramm', 'kg') for ingredient in recipe['recipeIngredient']
]
recipe['recipeIngredient'] = [
ingredient.replace('.0 ', ' ') for ingredient in recipe['recipeIngredient']
]
recipe['recipeIngredient'] = [
re.sub("^0 (g|kg|Milliliter|None) ", "", ingredient) for ingredient in recipe['recipeIngredient']
]
recipe['recipeIngredient'] = [
ingredient \
for ingredient in recipe['recipeIngredient'] \
if ingredient != "None"
]
return recipe
def normalize_instructions(recipe):
if 'recipeInstructions' in recipe:
recipe['recipeInstructions'] = [
instruction \
for instruction in recipe['recipeInstructions'] \
if instruction.get('text')
]
if not recipe['recipeInstructions']:
del recipe['recipeInstructions']
return recipe
def normalize_recipe(recipe):
cleaned_recipe = recipe
cleaned_recipe = normalize_ingredients(cleaned_recipe)
cleaned_recipe = normalize_instructions(cleaned_recipe)
return cleaned_recipe
# TODO