diff --git a/ner-edith.js b/ner-edith.js
index aeb442c..8aa34b9 100644
--- a/ner-edith.js
+++ b/ner-edith.js
@@ -3,6 +3,39 @@ function loadFile(evt, onComplete) {
var file = evt.target.files[0];
+ var urls = null;
+
+ var reader = new FileReader();
+
+ reader.onload =
+ function(event) {
+
+ let link_detector = /(https?:\/\/[^\s]+)/g;
+
+ let lines = event.target.result.split(/\r\n|\n/);
+ for(var i = 0; i < lines.length; i++){
+
+ let line = lines[i];
+
+ if (!line.startsWith('#')) continue;
+
+ let tmp = line.match(link_detector);
+
+ if (tmp == null) continue;
+
+ //console.log(tmp);
+
+ if (urls == null) {
+ urls = tmp;
+ }
+ else {
+ urls.push(tmp[0])
+ }
+ };
+ };
+
+ reader.readAsText(file);
+
Papa.parse(file, {
header: true,
delimiter: '\t',
@@ -11,23 +44,36 @@ function loadFile(evt, onComplete) {
comments: "#",
skipEmptyLines: true,
dynamicTyping: true,
- complete: function(results) { onComplete(results, file) }
+ complete: function(results) { onComplete(results, file, urls); }
});
}
+function setupInterface(data, file, urls) {
-var displayRows=30
-var startIndex=0;
-var endIndex=displayRows;
-var urls = null;
-
-function setupInterface(data, file) {
+ var displayRows=30
+ var startIndex=0;
+ var endIndex=displayRows;
function updatePreview(nRow) {
if (urls == null) return;
- let img_url = urls.data[data.data[nRow]['url_id']]['url']
+ let img_url = urls[data.data[nRow]['url_id']];
+
+ let left = data.data[nRow]['left'];
+ let right = data.data[nRow]['right'];
+ let top = data.data[nRow]['top'];
+ let bottom = data.data[nRow]['bottom'];
+
+ let width = right - left;
+ let height = bottom - top;
+
+ img_url = img_url.replace('left', left.toString());
+ img_url = img_url.replace('right', right.toString());
+ img_url = img_url.replace('top', top.toString());
+ img_url = img_url.replace('bottom',bottom.toString());
+ img_url = img_url.replace('width', width.toString());
+ img_url = img_url.replace('height', height.toString());
console.log(img_url);
@@ -40,45 +86,11 @@ function setupInterface(data, file) {
function gotoLocation(evt) {
- if (urls != null) {
+ if (urls == null) return;
- let nRow = parseInt($(evt.target).text());
+ let nRow = parseInt($(evt.target).text());
- updatePreview(nRow)
- }
- else {
- let url_mapping_html =
- `
-
-
-
-
- Please
- upload a url mapping file
- or
- `;
-
- $("#tableregion").html(url_mapping_html);
- $("#btn-region").empty();
- $("#region-right").empty();
-
- $('#goback').on('click',
- function(evt) {
- setupInterface(data, file);
- }
- );
-
- $('#url-mapping-tsv-file').change(
- function(evt) {
- loadFile(evt,
- function(results, url_mapping_file) {
- urls = results;
-
- setupInterface(data, file);
- });
- }
- );
- }
+ updatePreview(nRow)
}
function colorCode() {
@@ -426,6 +438,23 @@ function setupInterface(data, file) {
dynamicTyping: true
});
+ let lines = csv.split(/\r\n|\n/);
+
+ csv = [ lines[0] ];
+ let url_id = -1;
+
+ for(var i = 0; i < data.data.length; i++){
+ if (data.data[i]['url_id'] > url_id) {
+
+ url_id = data.data[i]['url_id'];
+
+ csv.push("# " + urls[url_id]);
+ }
+ csv.push(lines[i+1]);
+ }
+
+ csv = csv.join('\n');
+
openSaveFileDialog (csv, file.name, null)
}
@@ -474,8 +503,6 @@ function setupInterface(data, file) {
if (!$.contains($('#table')[0], target)) return
$(target).data('tableInfo').clickAction(target);
-
- //makeTdEditable(target);
});
updateTable();
@@ -541,9 +568,9 @@ $(document).ready(
function(evt) {
loadFile ( evt,
- function(results, file) {
+ function(results, file, urls) {
- setupInterface(results, file);
+ setupInterface(results, file, urls);
})
}
);
diff --git a/tools/README.md b/tools/README.md
index b070b59..438f593 100644
--- a/tools/README.md
+++ b/tools/README.md
@@ -44,14 +44,11 @@ page2tsv PAGE5.xml PAGE.tsv --image-url=http://link-to-corresponding-image-5
...
```
-A corresponding URL-mapping file can be obtained from:
+For instance, for the file assets/example.xml:
```
-extract-doc-links PAGE.tsv PAGE-urls.tsv
+page2tsv example.xml example4.tsv --image-url=http://content.staatsbibliothek-berlin.de/zefys/SNP27646518-18800101-0-3-0-0/left,top,width,height/full/0/default.jpg
```
-By loading the annotated TSV as well as the url mapping file into
-ner.edith, you will be able to jump directly to the original image
-where the full text has been extracted from.
---
@@ -62,13 +59,3 @@ Create a URL-annotated TSV file from an existing TSV file:
```
annotate-tsv enp_DE.tsv enp_DE-annotated.tsv
```
-Create a corresponding URL-mapping file:
-
-```
-extract-doc-links enp_DE.tsv enp_DE-urls.tsv
-```
-
-By loading the annotated TSV as well as the url mapping file into
-ner.edith, you will be able to jump directly to the original image
-where the full text has been extracted from.
-