function loadFile(evt, onComplete) { var file = evt.target.files[0]; var urls = null; var reader = new FileReader(); reader.onload = function(event) { let link_detector = /(https?:\/\/[^\s]+)/g; let lines = event.target.result.split(/\r\n|\n/); for(var i = 0; i < lines.length; i++){ let line = lines[i]; if (!line.startsWith('#')) continue; let tmp = line.match(link_detector); if (tmp == null) continue; //console.log(tmp); if (urls == null) { urls = tmp; } else { urls.push(tmp[0]) } }; }; reader.readAsText(file); Papa.parse(file, { header: true, delimiter: '\t', quoteChar: String.fromCharCode(0), escapeChar: String.fromCharCode(0), comments: "#", skipEmptyLines: true, dynamicTyping: true, complete: function(results) { onComplete(results, file, urls); } }); } function setupInterface(data, file, urls) { var displayRows=30 var startIndex=0; var endIndex=displayRows; function updatePreview(nRow) { if (urls == null) return; let img_url = urls[data.data[nRow]['url_id']]; let left = data.data[nRow]['left']; let right = data.data[nRow]['right']; let top = data.data[nRow]['top']; let bottom = data.data[nRow]['bottom']; let width = right - left; let height = bottom - top; img_url = img_url.replace('left', left.toString()); img_url = img_url.replace('right', right.toString()); img_url = img_url.replace('top', top.toString()); img_url = img_url.replace('bottom',bottom.toString()); img_url = img_url.replace('width', width.toString()); img_url = img_url.replace('height', height.toString()); console.log(img_url); if (img_url == "http://empty") return $("#preview").attr("src", img_url); $("#preview-link").attr("href", img_url); } function gotoLocation(evt) { if (urls == null) return; let nRow = parseInt($(evt.target).text()); updatePreview(nRow) } function colorCode() { $("#table td:contains('B-PER')").addClass('ner_per'); $("#table td:contains('I-PER')").addClass('ner_per'); $("#table td:contains('B-LOC')").addClass('ner_loc'); $("#table td:contains('I-LOC')").addClass('ner_loc'); $("#table td:contains('B-ORG')").addClass('ner_org'); $("#table td:contains('I-ORG')").addClass('ner_org'); $("#table td:contains('B-PUB')").addClass('ner_pub'); $("#table td:contains('I-PUB')").addClass('ner_pub'); $("#table td:contains('B-CONF')").addClass('ner_conf'); $("#table td:contains('I-CONF')").addClass('ner_conf'); $("#table td:contains('B-TODO')").addClass('ner_todo'); $("#table td:contains('I-TODO')").addClass('ner_todo'); } let editingTd; function makeTdEditable(td) { editingTd = { elem: td, data: td.innerHTML, finish: function (td, isOk) { if (isOk) { let newValue = $('#edit-area').val(); $(td).html(newValue); let tableInfo = $(td).data('tableInfo'); data.data[tableInfo.nRow][tableInfo.column] = newValue; } else { $(td).html(editingTd.data); } editingTd = null; } }; let textArea = document.createElement('textarea'); textArea.style.width = td.clientWidth + 'px'; textArea.style.height = td.clientHeight + 'px'; textArea.id = 'edit-area'; $(textArea).val($(td).html()); $(td).html(''); $(td).append(textArea); textArea.focus(); let edit_html = `
` td.insertAdjacentHTML("beforeEnd", edit_html); $('#edit-ok').on('click', function(evt) { editingTd.finish(editingTd.elem, true); }); $('#edit-cancel').on('click', function(evt) { editingTd.finish(editingTd.elem, false); }); } function makeLineSplitMerge(td) { editingTd = { elem: td, data: td.innerHTML, tokenizer_action: null, finish: function(td, isOk) { $(td).html(editingTd.data); $(td).addClass('editable'); if (editingTd.tokenizer_action == null) { editingTd = null; return; } let tableInfo = $(td).data('tableInfo'); if (editingTd.tokenizer_action.includes('merge')) { if (tableInfo.nRow < 1) { editingTd = null; return; } let pos = tableInfo.nRow + 1; word_pos = data.data[tableInfo.nRow - 1]['No.'] + 1 while((pos < data.data.length) && (data.data[pos]['No.'] > 1)) { data.data[pos]['No.'] = word_pos; pos++; word_pos++; } data.data[tableInfo.nRow - 1]['TOKEN'] += data.data[tableInfo.nRow]['TOKEN']; data.data.splice(tableInfo.nRow, 1); } else if (editingTd.tokenizer_action.includes('split')) { data.data.splice(tableInfo.nRow, 0, JSON.parse(JSON.stringify(data.data[tableInfo.nRow]))); data.data[tableInfo.nRow + 1]['No.'] += 1 let pos = tableInfo.nRow + 2; while ((pos < data.data.length) && (data.data[pos]['No.'] > 1)) { data.data[pos]['No.']++; pos++; } } else if (editingTd.tokenizer_action.includes('start-sentence')) { let pos = tableInfo.nRow; let word_pos = 1; while ((pos < data.data.length) && (data.data[pos]['No.'] != 1)) { data.data[pos]['No.'] = word_pos; pos++; word_pos++; } } editingTd = null; updateTable(); } }; let edit_html = `
↕  split
⟳ merge-above
start-sentence
`; $(td).removeClass(); $(td).html(edit_html); $('#tokenizer').mouseleave( function(event) { editingTd.finish(editingTd.elem, false); }); $('.tokenizer-action').click( function(event) { editingTd.tokenizer_action = $(event.target).text(); }); } function makeTagEdit(td) { editingTd = { elem: td, data: td.innerHTML, finish: function(td, isOk) { let tableInfo = $(td).data('tableInfo'); data.data[tableInfo.nRow][tableInfo.column] = editingTd.data; $(td).html(editingTd.data); $(td).addClass('editable'); editingTd = null; colorCode(); } }; let edit_html = `
O
B
B-PER
B-LOC
B-ORG
B-PUB
B-CONF
B-TODO
I
I-PER
I-LOC
I-ORG
I-PUB
I-CONF
I-TODO
`; $(td).removeClass(); $(td).html(edit_html); $('#tagger').mouseleave( function(event) { editingTd.finish(editingTd.elem, false); }); $('.type_select').click( function(event) { editingTd.data = $(event.target).text(); }); } function updateTable() { let do_not_display = new Set(['url_id', 'left', 'right', 'top', 'bottom']); editingTd = null; let editable_html = ` `; $('#table-body').empty(); $.each(data.data, function(nRow, el) { if (nRow < startIndex) return; if (nRow >= endIndex) return; var row = $(""); row.append($(' ')); $.each(el, function(column, content) { if (do_not_display.has(column)) return var clickAction = function() { console.log('Do something different');} if (column == 'No.') clickAction = makeLineSplitMerge if ((column == 'TOKEN') || (column == 'GND-ID')) clickAction = makeTdEditable if ((column == 'NE-TAG') || (column == 'NE-EMB')) clickAction = makeTagEdit row.append( $(editable_html). text(content). data('tableInfo', { 'nRow': nRow, 'column': column , 'clickAction': clickAction }) ); }); $("#table tbody").append(row); }); colorCode(); $(".offset").on('click', gotoLocation); updatePreview(startIndex) if ($("#docpos").val() != startIndex) { $("#docpos").val(data.data.length - startIndex); } } let slider_pos = data.data.length - startIndex; let slider_min = displayRows; let slider_max = data.data.length; let range_html = ` `; $("#region-right").html(range_html) $("#docpos").change( function(evt) { if (startIndex == data.data.length - this.value) return; startIndex = data.data.length - this.value; endIndex = startIndex + displayRows; updateTable(); }); $('#docpos').slider(); let table_html = `
LOCATION POSITION TOKEN NE-TAG NE-EMB GND-ID


`; let save_html = `` $("#tableregion").html(table_html) $("#btn-region").html(save_html) $("#file-region").html('

' + file.name + '

'); function saveFile(evt) { let csv = Papa.unparse(data, { header: true, delimiter: '\t', comments: "#", quoteChar: String.fromCharCode(0), escapeChar: String.fromCharCode(0), skipEmptyLines: true, dynamicTyping: true }); let lines = csv.split(/\r\n|\n/); csv = [ lines[0] ]; let url_id = -1; for(var i = 0; i < data.data.length; i++){ if (data.data[i]['url_id'] > url_id) { url_id = data.data[i]['url_id']; csv.push("# " + urls[url_id]); } csv.push(lines[i+1]); } csv = csv.join('\n'); openSaveFileDialog (csv, file.name, null) } function openSaveFileDialog (data, filename, mimetype) { if (!data) return; var blob = data.constructor !== Blob ? new Blob([data], {type: mimetype || 'application/octet-stream'}) : data ; if (navigator.msSaveBlob) { navigator.msSaveBlob(blob, filename); return; } var lnk = document.createElement('a'), url = window.URL, objectURL; if (mimetype) { lnk.type = mimetype; } lnk.download = filename || 'untitled'; lnk.href = objectURL = url.createObjectURL(blob); lnk.dispatchEvent(new MouseEvent('click')); setTimeout(url.revokeObjectURL.bind(url, objectURL)); } $('.saveButton').on('click', saveFile) $('#table').on('click', function(event) { let target = event.target.closest('.editable'); if (editingTd) { if (target == editingTd.elem) return; editingTd.finish(editingTd.elem, true); } if (!$.contains($('#table')[0], target)) return $(target).data('tableInfo').clickAction(target); }); updateTable(); $('#tableregion')[0].addEventListener("wheel", function(event) { if (event.deltaY < 0) { if (startIndex <= 0) return; startIndex -= 1; endIndex -= 1; } else { if (endIndex >= data.data.length) return; startIndex += 1; endIndex += 1; } updateTable(); }); $('#back').on('click', function(evt) { if (startIndex >= displayRows) { startIndex -= displayRows; endIndex -= displayRows; } else { startIndex = 0; endIndex = displayRows; } updateTable(); } ); $('#next').on('click', function(evt) { if (endIndex + displayRows < data.data.length) { endIndex += displayRows; startIndex = endIndex - displayRows; } else { endIndex = data.data.length; startIndex = endIndex - displayRows; } updateTable(); } ); } $(document).ready( function() { $('#tsv-file').change( function(evt) { loadFile ( evt, function(results, file, urls) { setupInterface(results, file, urls); }) } ); } );