1
0
Fork 0
mirror of https://github.com/qurator-spk/neat.git synced 2025-06-09 11:49:54 +02:00

add image preview

This commit is contained in:
Kai Labusch 2019-09-24 10:21:49 +02:00
parent 6afb0a6375
commit 450886cda6
3 changed files with 79 additions and 62 deletions

View file

@ -6,13 +6,16 @@ from io import StringIO
@click.command()
@click.argument('tsv-file', type=click.Path(exists=True), required=True, nargs=1)
def extract_document_links(tsv_file):
@click.argument('url-file', type=click.Path(exists=False), required=True, nargs=1)
def extract_document_links(tsv_file, url_file):
parts = extract_doc_links(tsv_file)
for part in parts:
urls = [part['url'] for part in parts]
print(part['url'])
urls = pd.DataFrame(urls, columns=['url'])
urls.to_csv(url_file, sep="\t", quoting=3, index=False)
@click.command()
@ -24,12 +27,9 @@ def annotate_tsv(tsv_file, annotated_tsv_file):
annotated_parts = []
urls = []
for part in parts:
part_data = StringIO(part['header'] + part['text'])
urls.append(part['url'])
df = pd.read_csv(part_data, sep="\t", comment='#', quoting=3)