mirror of
				https://github.com/qurator-spk/dinglehopper.git
				synced 2025-10-26 06:54:18 +01:00 
			
		
		
		
	Fix some typos (found by codespell and typos)
				
					
				
			Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
		
							parent
							
								
									2383730a55
								
							
						
					
					
						commit
						79701e410d
					
				
					 4 changed files with 6 additions and 6 deletions
				
			
		|  | @ -100,11 +100,11 @@ This generates `summary.html` and `summary.json` in the same `output_folder`. | |||
| 
 | ||||
| If you are summarizing many reports and have used the `--differences` flag while | ||||
| generating them, it may be useful to limit the number of differences reported by using | ||||
| the `--occurences-threshold` parameter. This will reduce the size of the generated HTML | ||||
| the `--occurrences-threshold` parameter. This will reduce the size of the generated HTML | ||||
| report, making it easier to open and navigate. Note that the JSON report will still | ||||
| contain all differences. Example: | ||||
| ~~~ | ||||
| dinglehopper-summarize output_folder/ --occurences-threshold 10 | ||||
| dinglehopper-summarize output_folder/ --occurrences-threshold 10 | ||||
| ~~~ | ||||
| 
 | ||||
| ### dinglehopper-line-dirs | ||||
|  |  | |||
|  | @ -329,7 +329,7 @@ def get_attr(te: Any, attr_name: str) -> float: | |||
|     """Extract the attribute for the given name. | ||||
| 
 | ||||
|     Note: currently only handles numeric values! | ||||
|     Other or non existend values are encoded as np.nan. | ||||
|     Other or non existent values are encoded as np.nan. | ||||
|     """ | ||||
|     attr_value = te.attrib.get(attr_name) | ||||
|     try: | ||||
|  |  | |||
|  | @ -391,7 +391,7 @@ | |||
|     "\\text{CER} = \\frac{i + s + d}{n}\n", | ||||
|     "$$\n", | ||||
|     "\n", | ||||
|     "where $i$ is the number of inserts, $s$ the number of substitutions, $d$ the number of deletions and $n$ is the number of characters in the reference text. (The text is not super clear about $n$ being the number of characters in the reference text, but it seems appropiate as they *are* clear about this when computing the word error rate.)" | ||||
|     "where $i$ is the number of inserts, $s$ the number of substitutions, $d$ the number of deletions and $n$ is the number of characters in the reference text. (The text is not super clear about $n$ being the number of characters in the reference text, but it seems appropriate as they *are* clear about this when computing the word error rate.)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|  | @ -680,7 +680,7 @@ | |||
|       "        return cat in unwanted_categories or subcat in unwanted_subcategories\n", | ||||
|       "\n", | ||||
|       "    # We follow Unicode Standard Annex #29 on Unicode Text Segmentation here: Split on word boundaries using\n", | ||||
|       "    # uniseg.wordbreak.words() and ignore all \"words\" that contain only whitespace, punctation \"or similar characters.\"\n", | ||||
|       "    # uniseg.wordbreak.words() and ignore all \"words\" that contain only whitespace, punctuation \"or similar characters.\"\n", | ||||
|       "    for word in uniseg.wordbreak.words(s):\n", | ||||
|       "        if all(unwanted(c) for c in word):\n", | ||||
|       "            pass\n", | ||||
|  |  | |||
|  | @ -54,7 +54,7 @@ def words(s: str) -> Generator[str, None, None]: | |||
| 
 | ||||
|     # We follow Unicode Standard Annex #29 on Unicode Text Segmentation here: Split on | ||||
|     # word boundaries using uniseg.wordbreak.words() and ignore all "words" that contain | ||||
|     # only whitespace, punctation "or similar characters." | ||||
|     # only whitespace, punctuation "or similar characters." | ||||
|     for word in uniseg.wordbreak.words(s): | ||||
|         if all(unwanted(c) for c in word): | ||||
|             pass | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue