mirror of
https://github.com/qurator-spk/neat.git
synced 2025-06-11 20:59:54 +02:00
improve sanitizer for invalid url_ids ('-')
This commit is contained in:
parent
ecf0553567
commit
270a080e36
1 changed files with 20 additions and 2 deletions
22
neat.js
22
neat.js
|
@ -109,7 +109,7 @@ function setupInterface(data, file, urls) {
|
|||
let counter = 0;
|
||||
let checker =
|
||||
function() {
|
||||
console.log('checker ...', counter);
|
||||
//console.log('checker ...', counter);
|
||||
|
||||
if (counter > 20) return;
|
||||
|
||||
|
@ -411,7 +411,8 @@ function setupInterface(data, file, urls) {
|
|||
}
|
||||
|
||||
function sanitizeData() {
|
||||
word_pos = 0;
|
||||
let last_url_id = 0;
|
||||
let word_pos = 0;
|
||||
for(let i = 0; i < data.data.length; i++){
|
||||
|
||||
min_left = (parseInt(data.data[i]['left']) < min_left) ? parseInt(data.data[i]['left']) : min_left;
|
||||
|
@ -449,6 +450,22 @@ function setupInterface(data, file, urls) {
|
|||
data.data[i]['NE-EMB'] = data.data[i]['NE-EMB'].toString().replace(/(\r\n|\n|\r)/gm, "");
|
||||
}
|
||||
|
||||
if (data.meta.fields.includes('url_id')) {
|
||||
if (typeof data.data[i]['url_id'] === 'string' || data.data[i]['url_id'] instanceof String) {
|
||||
|
||||
let num = parseInt(data.data[i]['url_id']);
|
||||
|
||||
if (!isNaN(num)) {
|
||||
last_url_id = num;
|
||||
}
|
||||
|
||||
data.data[i]['url_id'] = last_url_id;
|
||||
}
|
||||
else {
|
||||
last_url_id = data.data[i]['url_id'];
|
||||
}
|
||||
}
|
||||
|
||||
word_pos++;
|
||||
}
|
||||
}
|
||||
|
@ -974,6 +991,7 @@ function setupInterface(data, file, urls) {
|
|||
let lines = csv.split(/\r\n|\n/);
|
||||
|
||||
csv = [ lines[0] ];
|
||||
|
||||
let url_id = -1;
|
||||
|
||||
for(let i = 0; i < data.data.length; i++){
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue