mirror of
https://github.com/qurator-spk/neat.git
synced 2025-06-11 04:39:54 +02:00
remove url mapping file
This commit is contained in:
parent
ff451f9ebd
commit
22e97da8de
2 changed files with 78 additions and 64 deletions
125
ner-edith.js
125
ner-edith.js
|
@ -3,6 +3,39 @@ function loadFile(evt, onComplete) {
|
|||
|
||||
var file = evt.target.files[0];
|
||||
|
||||
var urls = null;
|
||||
|
||||
var reader = new FileReader();
|
||||
|
||||
reader.onload =
|
||||
function(event) {
|
||||
|
||||
let link_detector = /(https?:\/\/[^\s]+)/g;
|
||||
|
||||
let lines = event.target.result.split(/\r\n|\n/);
|
||||
for(var i = 0; i < lines.length; i++){
|
||||
|
||||
let line = lines[i];
|
||||
|
||||
if (!line.startsWith('#')) continue;
|
||||
|
||||
let tmp = line.match(link_detector);
|
||||
|
||||
if (tmp == null) continue;
|
||||
|
||||
//console.log(tmp);
|
||||
|
||||
if (urls == null) {
|
||||
urls = tmp;
|
||||
}
|
||||
else {
|
||||
urls.push(tmp[0])
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
reader.readAsText(file);
|
||||
|
||||
Papa.parse(file, {
|
||||
header: true,
|
||||
delimiter: '\t',
|
||||
|
@ -11,23 +44,36 @@ function loadFile(evt, onComplete) {
|
|||
comments: "#",
|
||||
skipEmptyLines: true,
|
||||
dynamicTyping: true,
|
||||
complete: function(results) { onComplete(results, file) }
|
||||
complete: function(results) { onComplete(results, file, urls); }
|
||||
});
|
||||
}
|
||||
|
||||
function setupInterface(data, file, urls) {
|
||||
|
||||
var displayRows=30
|
||||
var startIndex=0;
|
||||
var endIndex=displayRows;
|
||||
var urls = null;
|
||||
|
||||
function setupInterface(data, file) {
|
||||
var displayRows=30
|
||||
var startIndex=0;
|
||||
var endIndex=displayRows;
|
||||
|
||||
function updatePreview(nRow) {
|
||||
|
||||
if (urls == null) return;
|
||||
|
||||
let img_url = urls.data[data.data[nRow]['url_id']]['url']
|
||||
let img_url = urls[data.data[nRow]['url_id']];
|
||||
|
||||
let left = data.data[nRow]['left'];
|
||||
let right = data.data[nRow]['right'];
|
||||
let top = data.data[nRow]['top'];
|
||||
let bottom = data.data[nRow]['bottom'];
|
||||
|
||||
let width = right - left;
|
||||
let height = bottom - top;
|
||||
|
||||
img_url = img_url.replace('left', left.toString());
|
||||
img_url = img_url.replace('right', right.toString());
|
||||
img_url = img_url.replace('top', top.toString());
|
||||
img_url = img_url.replace('bottom',bottom.toString());
|
||||
img_url = img_url.replace('width', width.toString());
|
||||
img_url = img_url.replace('height', height.toString());
|
||||
|
||||
console.log(img_url);
|
||||
|
||||
|
@ -40,45 +86,11 @@ function setupInterface(data, file) {
|
|||
|
||||
function gotoLocation(evt) {
|
||||
|
||||
if (urls != null) {
|
||||
if (urls == null) return;
|
||||
|
||||
let nRow = parseInt($(evt.target).text());
|
||||
let nRow = parseInt($(evt.target).text());
|
||||
|
||||
updatePreview(nRow)
|
||||
}
|
||||
else {
|
||||
let url_mapping_html =
|
||||
`
|
||||
<br/>
|
||||
<br/>
|
||||
<br/>
|
||||
<input type="file" id="url-mapping-tsv-file" style="visibility: hidden; width: 1px; height: 1px"/>
|
||||
Please
|
||||
<a href="" onclick="$('#url-mapping-tsv-file').click(); return false">upload a url mapping file</a>
|
||||
or<button class="btn btn-link" id="goback">go back to edit mode.</button>
|
||||
`;
|
||||
|
||||
$("#tableregion").html(url_mapping_html);
|
||||
$("#btn-region").empty();
|
||||
$("#region-right").empty();
|
||||
|
||||
$('#goback').on('click',
|
||||
function(evt) {
|
||||
setupInterface(data, file);
|
||||
}
|
||||
);
|
||||
|
||||
$('#url-mapping-tsv-file').change(
|
||||
function(evt) {
|
||||
loadFile(evt,
|
||||
function(results, url_mapping_file) {
|
||||
urls = results;
|
||||
|
||||
setupInterface(data, file);
|
||||
});
|
||||
}
|
||||
);
|
||||
}
|
||||
updatePreview(nRow)
|
||||
}
|
||||
|
||||
function colorCode() {
|
||||
|
@ -426,6 +438,23 @@ function setupInterface(data, file) {
|
|||
dynamicTyping: true
|
||||
});
|
||||
|
||||
let lines = csv.split(/\r\n|\n/);
|
||||
|
||||
csv = [ lines[0] ];
|
||||
let url_id = -1;
|
||||
|
||||
for(var i = 0; i < data.data.length; i++){
|
||||
if (data.data[i]['url_id'] > url_id) {
|
||||
|
||||
url_id = data.data[i]['url_id'];
|
||||
|
||||
csv.push("# " + urls[url_id]);
|
||||
}
|
||||
csv.push(lines[i+1]);
|
||||
}
|
||||
|
||||
csv = csv.join('\n');
|
||||
|
||||
openSaveFileDialog (csv, file.name, null)
|
||||
}
|
||||
|
||||
|
@ -474,8 +503,6 @@ function setupInterface(data, file) {
|
|||
if (!$.contains($('#table')[0], target)) return
|
||||
|
||||
$(target).data('tableInfo').clickAction(target);
|
||||
|
||||
//makeTdEditable(target);
|
||||
});
|
||||
|
||||
updateTable();
|
||||
|
@ -541,9 +568,9 @@ $(document).ready(
|
|||
function(evt) {
|
||||
|
||||
loadFile ( evt,
|
||||
function(results, file) {
|
||||
function(results, file, urls) {
|
||||
|
||||
setupInterface(results, file);
|
||||
setupInterface(results, file, urls);
|
||||
})
|
||||
}
|
||||
);
|
||||
|
|
|
@ -44,14 +44,11 @@ page2tsv PAGE5.xml PAGE.tsv --image-url=http://link-to-corresponding-image-5
|
|||
...
|
||||
```
|
||||
|
||||
A corresponding URL-mapping file can be obtained from:
|
||||
For instance, for the file assets/example.xml:
|
||||
|
||||
```
|
||||
extract-doc-links PAGE.tsv PAGE-urls.tsv
|
||||
page2tsv example.xml example4.tsv --image-url=http://content.staatsbibliothek-berlin.de/zefys/SNP27646518-18800101-0-3-0-0/left,top,width,height/full/0/default.jpg
|
||||
```
|
||||
By loading the annotated TSV as well as the url mapping file into
|
||||
ner.edith, you will be able to jump directly to the original image
|
||||
where the full text has been extracted from.
|
||||
|
||||
---
|
||||
|
||||
|
@ -62,13 +59,3 @@ Create a URL-annotated TSV file from an existing TSV file:
|
|||
```
|
||||
annotate-tsv enp_DE.tsv enp_DE-annotated.tsv
|
||||
```
|
||||
Create a corresponding URL-mapping file:
|
||||
|
||||
```
|
||||
extract-doc-links enp_DE.tsv enp_DE-urls.tsv
|
||||
```
|
||||
|
||||
By loading the annotated TSV as well as the url mapping file into
|
||||
ner.edith, you will be able to jump directly to the original image
|
||||
where the full text has been extracted from.
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue