gpn23-recipes/1_clean_json.py

120 lines
3.1 KiB
Python
Raw Normal View History

2025-06-01 23:11:38 +02:00
import json
2025-06-02 09:48:27 +02:00
import re
2025-06-07 01:00:32 +02:00
import os
from config import *
2025-06-02 09:48:27 +02:00
2025-06-08 21:17:47 +02:00
2025-06-01 23:11:38 +02:00
def normalize_ingredients(recipe):
2025-06-08 21:17:47 +02:00
if "recipeIngredient" in recipe:
recipe["recipeIngredient"] = [
ingredient.replace("g / Gramm", "g")
for ingredient in recipe["recipeIngredient"]
2025-06-01 23:11:38 +02:00
]
2025-06-08 21:17:47 +02:00
recipe["recipeIngredient"] = [
ingredient.replace("kg / Kilogramm", "kg")
for ingredient in recipe["recipeIngredient"]
2025-06-01 23:11:38 +02:00
]
2025-06-08 21:17:47 +02:00
recipe["recipeIngredient"] = [
ingredient.replace(".0 ", " ") for ingredient in recipe["recipeIngredient"]
2025-06-01 23:11:38 +02:00
]
2025-06-08 21:17:47 +02:00
recipe["recipeIngredient"] = [
re.sub("^0 (g|kg|Milliliter|None) ", "", ingredient)
for ingredient in recipe["recipeIngredient"]
2025-06-02 09:48:27 +02:00
]
2025-06-08 21:17:47 +02:00
recipe["recipeIngredient"] = [
ingredient
for ingredient in recipe["recipeIngredient"]
if ingredient != "None"
2025-06-02 09:48:27 +02:00
]
2025-06-01 23:11:38 +02:00
return recipe
2025-06-02 12:32:45 +02:00
def normalize_instructions(recipe):
2025-06-08 21:17:47 +02:00
if "recipeInstructions" in recipe:
recipe["recipeInstructions"] = [
instruction
for instruction in recipe["recipeInstructions"]
if instruction.get("text")
2025-06-02 12:32:45 +02:00
]
2025-06-08 21:17:47 +02:00
if not recipe["recipeInstructions"]:
del recipe["recipeInstructions"]
2025-06-02 12:32:45 +02:00
return recipe
def normalize_recipe(recipe):
cleaned_recipe = recipe
cleaned_recipe = normalize_ingredients(cleaned_recipe)
cleaned_recipe = normalize_instructions(cleaned_recipe)
return cleaned_recipe
2025-06-07 01:00:32 +02:00
# TODO: functions oben sind so nicht mehr funktional
def check_recipe(recipe) -> list:
md = []
2025-06-08 21:17:47 +02:00
if "steps" in recipe:
steps = recipe["steps"]
2025-06-07 01:00:32 +02:00
else:
steps = []
md.append("No steps?")
for i, step in enumerate(steps, 1):
md += check_ingredients(step)
return md
2025-06-08 21:17:47 +02:00
2025-06-07 01:00:32 +02:00
def normalize_amount(a):
if a == round(a):
return round(a)
else:
return a
2025-06-08 21:17:47 +02:00
2025-06-07 01:00:32 +02:00
def check_ingredients(step):
md = []
2025-06-08 21:17:47 +02:00
for ingredient in step["ingredients"]:
2025-06-07 01:00:32 +02:00
i_amount = ingredient["amount"]
i_amount = normalize_amount(i_amount)
if not ingredient.get("unit"):
i_unit = ""
if i_amount:
md.append("- Amount but no unit?")
else:
i_unit = ingredient["unit"]["name"]
if i_unit and i_unit not in VALID_UNITS:
md.append(f"- Invalid unit {i_unit}")
if not ingredient.get("food"):
md.append("- No food element in ingredient?")
continue
return md
def make_link(recipe):
return f"[{recipe["name"]}]({TANDOOR_URL + "/view/recipe/" + str(recipe["id"])})"
2025-06-08 21:17:47 +02:00
2025-06-07 01:00:32 +02:00
def main():
recipes = []
for json_file in os.listdir(OUTDIR_JSON):
2025-06-08 21:17:47 +02:00
with open(os.path.join(OUTDIR_JSON, json_file), "r", encoding="utf-8") as f:
2025-06-07 01:00:32 +02:00
data = json.load(f)
recipes.append(data)
for recipe in recipes:
md = check_recipe(recipe)
if not md:
print(f"## 💚 {make_link(recipe)}")
else:
print(f"## 💔 {make_link(recipe)}")
for line in md:
print(line)
2025-06-08 21:17:47 +02:00
2025-06-07 01:00:32 +02:00
if __name__ == "__main__":
main()