Fix some typos (found by `codespell` and `typos`)

Signed-off-by: Stefan Weil <sw@weilnetz.de>
pull/111/head
Stefan Weil 8 months ago
parent 2383730a55
commit 79701e410d

@ -100,11 +100,11 @@ This generates `summary.html` and `summary.json` in the same `output_folder`.
If you are summarizing many reports and have used the `--differences` flag while If you are summarizing many reports and have used the `--differences` flag while
generating them, it may be useful to limit the number of differences reported by using generating them, it may be useful to limit the number of differences reported by using
the `--occurences-threshold` parameter. This will reduce the size of the generated HTML the `--occurrences-threshold` parameter. This will reduce the size of the generated HTML
report, making it easier to open and navigate. Note that the JSON report will still report, making it easier to open and navigate. Note that the JSON report will still
contain all differences. Example: contain all differences. Example:
~~~ ~~~
dinglehopper-summarize output_folder/ --occurences-threshold 10 dinglehopper-summarize output_folder/ --occurrences-threshold 10
~~~ ~~~
### dinglehopper-line-dirs ### dinglehopper-line-dirs

@ -329,7 +329,7 @@ def get_attr(te: Any, attr_name: str) -> float:
"""Extract the attribute for the given name. """Extract the attribute for the given name.
Note: currently only handles numeric values! Note: currently only handles numeric values!
Other or non existend values are encoded as np.nan. Other or non existent values are encoded as np.nan.
""" """
attr_value = te.attrib.get(attr_name) attr_value = te.attrib.get(attr_name)
try: try:

@ -391,7 +391,7 @@
"\\text{CER} = \\frac{i + s + d}{n}\n", "\\text{CER} = \\frac{i + s + d}{n}\n",
"$$\n", "$$\n",
"\n", "\n",
"where $i$ is the number of inserts, $s$ the number of substitutions, $d$ the number of deletions and $n$ is the number of characters in the reference text. (The text is not super clear about $n$ being the number of characters in the reference text, but it seems appropiate as they *are* clear about this when computing the word error rate.)" "where $i$ is the number of inserts, $s$ the number of substitutions, $d$ the number of deletions and $n$ is the number of characters in the reference text. (The text is not super clear about $n$ being the number of characters in the reference text, but it seems appropriate as they *are* clear about this when computing the word error rate.)"
] ]
}, },
{ {
@ -680,7 +680,7 @@
" return cat in unwanted_categories or subcat in unwanted_subcategories\n", " return cat in unwanted_categories or subcat in unwanted_subcategories\n",
"\n", "\n",
" # We follow Unicode Standard Annex #29 on Unicode Text Segmentation here: Split on word boundaries using\n", " # We follow Unicode Standard Annex #29 on Unicode Text Segmentation here: Split on word boundaries using\n",
" # uniseg.wordbreak.words() and ignore all \"words\" that contain only whitespace, punctation \"or similar characters.\"\n", " # uniseg.wordbreak.words() and ignore all \"words\" that contain only whitespace, punctuation \"or similar characters.\"\n",
" for word in uniseg.wordbreak.words(s):\n", " for word in uniseg.wordbreak.words(s):\n",
" if all(unwanted(c) for c in word):\n", " if all(unwanted(c) for c in word):\n",
" pass\n", " pass\n",

@ -54,7 +54,7 @@ def words(s: str) -> Generator[str, None, None]:
# We follow Unicode Standard Annex #29 on Unicode Text Segmentation here: Split on # We follow Unicode Standard Annex #29 on Unicode Text Segmentation here: Split on
# word boundaries using uniseg.wordbreak.words() and ignore all "words" that contain # word boundaries using uniseg.wordbreak.words() and ignore all "words" that contain
# only whitespace, punctation "or similar characters." # only whitespace, punctuation "or similar characters."
for word in uniseg.wordbreak.words(s): for word in uniseg.wordbreak.words(s):
if all(unwanted(c) for c in word): if all(unwanted(c) for c in word):
pass pass

Loading…
Cancel
Save