function loadFile(evt, onComplete) {
var file = evt.target.files[0];
var urls = null;
var reader = new FileReader();
reader.onload =
function(event) {
let link_detector = /(https?:\/\/[^\s]+)/g;
let lines = event.target.result.split(/\r\n|\n/);
for(var i = 0; i < lines.length; i++){
let line = lines[i];
if (!line.startsWith('#')) continue;
let tmp = line.match(link_detector);
if (tmp == null) continue;
//console.log(tmp);
if (urls == null) {
urls = tmp;
}
else {
urls.push(tmp[0])
}
};
};
reader.readAsText(file);
Papa.parse(file, {
header: true,
delimiter: '\t',
quoteChar: String.fromCharCode(0),
escapeChar: String.fromCharCode(0),
comments: "#",
skipEmptyLines: true,
dynamicTyping: true,
complete: function(results) { onComplete(results, file, urls); }
});
}
function setupInterface(data, file, urls) {
var displayRows=30
var startIndex=0;
var endIndex=displayRows;
function updatePreview(nRow) {
if (urls == null) return;
let img_url = urls[data.data[nRow]['url_id']];
let left = data.data[nRow]['left'];
let right = data.data[nRow]['right'];
let top = data.data[nRow]['top'];
let bottom = data.data[nRow]['bottom'];
let width = right - left;
let height = bottom - top;
img_url = img_url.replace('left', left.toString());
img_url = img_url.replace('right', right.toString());
img_url = img_url.replace('top', top.toString());
img_url = img_url.replace('bottom',bottom.toString());
img_url = img_url.replace('width', width.toString());
img_url = img_url.replace('height', height.toString());
console.log(img_url);
if (img_url == "http://empty")
return
$("#preview").attr("src", img_url);
$("#preview-link").attr("href", img_url);
}
function gotoLocation(evt) {
if (urls == null) return;
let nRow = parseInt($(evt.target).text());
updatePreview(nRow)
}
function colorCode() {
$("#table td:contains('B-PER')").addClass('ner_per');
$("#table td:contains('I-PER')").addClass('ner_per');
$("#table td:contains('B-LOC')").addClass('ner_loc');
$("#table td:contains('I-LOC')").addClass('ner_loc');
$("#table td:contains('B-ORG')").addClass('ner_org');
$("#table td:contains('I-ORG')").addClass('ner_org');
$("#table td:contains('B-PUB')").addClass('ner_pub');
$("#table td:contains('I-PUB')").addClass('ner_pub');
$("#table td:contains('B-CONF')").addClass('ner_conf');
$("#table td:contains('I-CONF')").addClass('ner_conf');
$("#table td:contains('B-TODO')").addClass('ner_todo');
$("#table td:contains('I-TODO')").addClass('ner_todo');
}
let editingTd;
function makeTdEditable(td) {
editingTd = {
elem: td,
data: td.innerHTML,
finish:
function (td, isOk) {
if (isOk) {
let newValue = $('#edit-area').val();
$(td).html(newValue);
let tableInfo = $(td).data('tableInfo');
data.data[tableInfo.nRow][tableInfo.column] = newValue;
}
else {
$(td).html(editingTd.data);
}
editingTd = null;
}
};
let textArea = document.createElement('textarea');
textArea.style.width = td.clientWidth + 'px';
textArea.style.height = td.clientHeight + 'px';
textArea.id = 'edit-area';
$(textArea).val($(td).html());
$(td).html('');
$(td).append(textArea);
textArea.focus();
let edit_html =
`
`
td.insertAdjacentHTML("beforeEnd", edit_html);
$('#edit-ok').on('click',
function(evt) {
editingTd.finish(editingTd.elem, true);
});
$('#edit-cancel').on('click',
function(evt) {
editingTd.finish(editingTd.elem, false);
});
}
function makeLineSplitMerge(td) {
editingTd = {
elem: td,
data: td.innerHTML,
tokenizer_action: null,
finish: function(td, isOk) {
$(td).html(editingTd.data);
$(td).addClass('editable');
if (editingTd.tokenizer_action == null) {
editingTd = null;
return;
}
let tableInfo = $(td).data('tableInfo');
if (editingTd.tokenizer_action.includes('merge')) {
if (tableInfo.nRow < 1) {
editingTd = null;
return;
}
let pos = tableInfo.nRow + 1;
word_pos = data.data[tableInfo.nRow - 1]['No.'] + 1
while((pos < data.data.length) && (data.data[pos]['No.'] > 1)) {
data.data[pos]['No.'] = word_pos;
pos++;
word_pos++;
}
data.data[tableInfo.nRow - 1]['TOKEN'] += data.data[tableInfo.nRow]['TOKEN'];
data.data.splice(tableInfo.nRow, 1);
}
else if (editingTd.tokenizer_action.includes('split')) {
data.data.splice(tableInfo.nRow, 0, JSON.parse(JSON.stringify(data.data[tableInfo.nRow])));
data.data[tableInfo.nRow + 1]['No.'] += 1
let pos = tableInfo.nRow + 2;
while ((pos < data.data.length) && (data.data[pos]['No.'] > 1)) {
data.data[pos]['No.']++;
pos++;
}
}
else if (editingTd.tokenizer_action.includes('start-sentence')) {
let pos = tableInfo.nRow;
let word_pos = 1;
while ((pos < data.data.length) && (data.data[pos]['No.'] != 1)) {
data.data[pos]['No.'] = word_pos;
pos++;
word_pos++;
}
}
editingTd = null;
updateTable();
}
};
let edit_html = `
`;
$(td).removeClass();
$(td).html(edit_html);
$('#tokenizer').mouseleave(
function(event) {
editingTd.finish(editingTd.elem, false);
});
$('.tokenizer-action').click(
function(event) {
editingTd.tokenizer_action = $(event.target).text();
});
}
function makeTagEdit(td) {
editingTd = {
elem: td,
data: td.innerHTML,
finish: function(td, isOk) {
let tableInfo = $(td).data('tableInfo');
data.data[tableInfo.nRow][tableInfo.column] = editingTd.data;
$(td).html(editingTd.data);
$(td).addClass('editable');
editingTd = null;
colorCode();
}
};
let edit_html = `
B
B-PER
B-LOC
B-ORG
B-PUB
B-CONF
B-TODO
I
I-PER
I-LOC
I-ORG
I-PUB
I-CONF
I-TODO
`;
$(td).removeClass();
$(td).html(edit_html);
$('#tagger').mouseleave(
function(event) {
editingTd.finish(editingTd.elem, false);
});
$('.type_select').click(
function(event) {
editingTd.data = $(event.target).text();
});
}
function updateTable() {
let do_not_display = new Set(['url_id', 'left', 'right', 'top', 'bottom']);
editingTd = null;
let editable_html =
`
`;
$('#table-body').empty();
$.each(data.data,
function(nRow, el) {
if (nRow < startIndex) return;
if (nRow >= endIndex) return;
var row = $(" |
");
row.append($(' | '));
$.each(el,
function(column, content) {
if (do_not_display.has(column)) return
var clickAction = function() { console.log('Do something different');}
if (column == 'No.')
clickAction = makeLineSplitMerge
if ((column == 'TOKEN') || (column == 'GND-ID'))
clickAction = makeTdEditable
if ((column == 'NE-TAG') || (column == 'NE-EMB'))
clickAction = makeTagEdit
row.append(
$(editable_html).
text(content).
data('tableInfo', { 'nRow': nRow, 'column': column , 'clickAction': clickAction })
);
});
$("#table tbody").append(row);
});
colorCode();
$(".offset").on('click', gotoLocation);
updatePreview(startIndex)
if ($("#docpos").val() != startIndex) {
$("#docpos").val(data.data.length - startIndex);
}
}
let slider_pos = data.data.length - startIndex;
let slider_min = displayRows;
let slider_max = data.data.length;
let range_html =
`
`;
$("#region-right").html(range_html)
$("#docpos").change(
function(evt) {
if (startIndex == data.data.length - this.value) return;
startIndex = data.data.length - this.value;
endIndex = startIndex + displayRows;
updateTable();
});
$('#docpos').slider();
let table_html =
`
LOCATION |
POSITION |
TOKEN |
NE-TAG |
NE-EMB |
GND-ID |
`;
let save_html =
``
$("#tableregion").html(table_html)
$("#btn-region").html(save_html)
$("#file-region").html('' + file.name + '
');
function saveFile(evt) {
let csv =
Papa.unparse(data,
{
header: true,
delimiter: '\t',
comments: "#",
quoteChar: String.fromCharCode(0),
escapeChar: String.fromCharCode(0),
skipEmptyLines: true,
dynamicTyping: true
});
let lines = csv.split(/\r\n|\n/);
csv = [ lines[0] ];
let url_id = -1;
for(var i = 0; i < data.data.length; i++){
if (data.data[i]['url_id'] > url_id) {
url_id = data.data[i]['url_id'];
csv.push("# " + urls[url_id]);
}
csv.push(lines[i+1]);
}
csv = csv.join('\n');
openSaveFileDialog (csv, file.name, null)
}
function openSaveFileDialog (data, filename, mimetype) {
if (!data) return;
var blob = data.constructor !== Blob
? new Blob([data], {type: mimetype || 'application/octet-stream'})
: data ;
if (navigator.msSaveBlob) {
navigator.msSaveBlob(blob, filename);
return;
}
var lnk = document.createElement('a'),
url = window.URL,
objectURL;
if (mimetype) {
lnk.type = mimetype;
}
lnk.download = filename || 'untitled';
lnk.href = objectURL = url.createObjectURL(blob);
lnk.dispatchEvent(new MouseEvent('click'));
setTimeout(url.revokeObjectURL.bind(url, objectURL));
}
$('.saveButton').on('click', saveFile)
$('#table').on('click',
function(event) {
let target = event.target.closest('.editable');
if (editingTd) {
if (target == editingTd.elem) return;
editingTd.finish(editingTd.elem, true);
}
if (!$.contains($('#table')[0], target)) return
$(target).data('tableInfo').clickAction(target);
});
updateTable();
$('#tableregion')[0].addEventListener("wheel",
function(event) {
if (event.deltaY < 0) {
if (startIndex <= 0) return;
startIndex -= 1;
endIndex -= 1;
}
else {
if (endIndex >= data.data.length) return;
startIndex += 1;
endIndex += 1;
}
updateTable();
});
$('#back').on('click',
function(evt) {
if (startIndex >= displayRows) {
startIndex -= displayRows;
endIndex -= displayRows;
}
else {
startIndex = 0;
endIndex = displayRows;
}
updateTable();
}
);
$('#next').on('click',
function(evt) {
if (endIndex + displayRows < data.data.length) {
endIndex += displayRows;
startIndex = endIndex - displayRows;
}
else {
endIndex = data.data.length;
startIndex = endIndex - displayRows;
}
updateTable();
}
);
}
$(document).ready(
function() {
$('#tsv-file').change(
function(evt) {
loadFile ( evt,
function(results, file, urls) {
setupInterface(results, file, urls);
})
}
);
}
);