From 22e97da8de52cb0dbe1f5969a361183604aa26a0 Mon Sep 17 00:00:00 2001 From: Kai Labusch Date: Wed, 6 Nov 2019 10:30:48 +0100 Subject: [PATCH] remove url mapping file --- ner-edith.js | 125 +++++++++++++++++++++++++++++------------------- tools/README.md | 17 +------ 2 files changed, 78 insertions(+), 64 deletions(-) diff --git a/ner-edith.js b/ner-edith.js index aeb442c..8aa34b9 100644 --- a/ner-edith.js +++ b/ner-edith.js @@ -3,6 +3,39 @@ function loadFile(evt, onComplete) { var file = evt.target.files[0]; + var urls = null; + + var reader = new FileReader(); + + reader.onload = + function(event) { + + let link_detector = /(https?:\/\/[^\s]+)/g; + + let lines = event.target.result.split(/\r\n|\n/); + for(var i = 0; i < lines.length; i++){ + + let line = lines[i]; + + if (!line.startsWith('#')) continue; + + let tmp = line.match(link_detector); + + if (tmp == null) continue; + + //console.log(tmp); + + if (urls == null) { + urls = tmp; + } + else { + urls.push(tmp[0]) + } + }; + }; + + reader.readAsText(file); + Papa.parse(file, { header: true, delimiter: '\t', @@ -11,23 +44,36 @@ function loadFile(evt, onComplete) { comments: "#", skipEmptyLines: true, dynamicTyping: true, - complete: function(results) { onComplete(results, file) } + complete: function(results) { onComplete(results, file, urls); } }); } +function setupInterface(data, file, urls) { -var displayRows=30 -var startIndex=0; -var endIndex=displayRows; -var urls = null; - -function setupInterface(data, file) { + var displayRows=30 + var startIndex=0; + var endIndex=displayRows; function updatePreview(nRow) { if (urls == null) return; - let img_url = urls.data[data.data[nRow]['url_id']]['url'] + let img_url = urls[data.data[nRow]['url_id']]; + + let left = data.data[nRow]['left']; + let right = data.data[nRow]['right']; + let top = data.data[nRow]['top']; + let bottom = data.data[nRow]['bottom']; + + let width = right - left; + let height = bottom - top; + + img_url = img_url.replace('left', left.toString()); + img_url = img_url.replace('right', right.toString()); + img_url = img_url.replace('top', top.toString()); + img_url = img_url.replace('bottom',bottom.toString()); + img_url = img_url.replace('width', width.toString()); + img_url = img_url.replace('height', height.toString()); console.log(img_url); @@ -40,45 +86,11 @@ function setupInterface(data, file) { function gotoLocation(evt) { - if (urls != null) { + if (urls == null) return; - let nRow = parseInt($(evt.target).text()); + let nRow = parseInt($(evt.target).text()); - updatePreview(nRow) - } - else { - let url_mapping_html = - ` -
-
-
- - Please - upload a url mapping file - or - `; - - $("#tableregion").html(url_mapping_html); - $("#btn-region").empty(); - $("#region-right").empty(); - - $('#goback').on('click', - function(evt) { - setupInterface(data, file); - } - ); - - $('#url-mapping-tsv-file').change( - function(evt) { - loadFile(evt, - function(results, url_mapping_file) { - urls = results; - - setupInterface(data, file); - }); - } - ); - } + updatePreview(nRow) } function colorCode() { @@ -426,6 +438,23 @@ function setupInterface(data, file) { dynamicTyping: true }); + let lines = csv.split(/\r\n|\n/); + + csv = [ lines[0] ]; + let url_id = -1; + + for(var i = 0; i < data.data.length; i++){ + if (data.data[i]['url_id'] > url_id) { + + url_id = data.data[i]['url_id']; + + csv.push("# " + urls[url_id]); + } + csv.push(lines[i+1]); + } + + csv = csv.join('\n'); + openSaveFileDialog (csv, file.name, null) } @@ -474,8 +503,6 @@ function setupInterface(data, file) { if (!$.contains($('#table')[0], target)) return $(target).data('tableInfo').clickAction(target); - - //makeTdEditable(target); }); updateTable(); @@ -541,9 +568,9 @@ $(document).ready( function(evt) { loadFile ( evt, - function(results, file) { + function(results, file, urls) { - setupInterface(results, file); + setupInterface(results, file, urls); }) } ); diff --git a/tools/README.md b/tools/README.md index b070b59..438f593 100644 --- a/tools/README.md +++ b/tools/README.md @@ -44,14 +44,11 @@ page2tsv PAGE5.xml PAGE.tsv --image-url=http://link-to-corresponding-image-5 ... ``` -A corresponding URL-mapping file can be obtained from: +For instance, for the file assets/example.xml: ``` -extract-doc-links PAGE.tsv PAGE-urls.tsv +page2tsv example.xml example4.tsv --image-url=http://content.staatsbibliothek-berlin.de/zefys/SNP27646518-18800101-0-3-0-0/left,top,width,height/full/0/default.jpg ``` -By loading the annotated TSV as well as the url mapping file into -ner.edith, you will be able to jump directly to the original image -where the full text has been extracted from. --- @@ -62,13 +59,3 @@ Create a URL-annotated TSV file from an existing TSV file: ``` annotate-tsv enp_DE.tsv enp_DE-annotated.tsv ``` -Create a corresponding URL-mapping file: - -``` -extract-doc-links enp_DE.tsv enp_DE-urls.tsv -``` - -By loading the annotated TSV as well as the url mapping file into -ner.edith, you will be able to jump directly to the original image -where the full text has been extracted from. -