From 270a080e362804928890ae53f53f957352b8f9e7 Mon Sep 17 00:00:00 2001 From: Kai Labusch Date: Tue, 4 Jul 2023 10:25:27 +0200 Subject: [PATCH] improve sanitizer for invalid url_ids ('-') --- neat.js | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/neat.js b/neat.js index 69ccf30..5f667e6 100644 --- a/neat.js +++ b/neat.js @@ -109,7 +109,7 @@ function setupInterface(data, file, urls) { let counter = 0; let checker = function() { - console.log('checker ...', counter); + //console.log('checker ...', counter); if (counter > 20) return; @@ -411,7 +411,8 @@ function setupInterface(data, file, urls) { } function sanitizeData() { - word_pos = 0; + let last_url_id = 0; + let word_pos = 0; for(let i = 0; i < data.data.length; i++){ min_left = (parseInt(data.data[i]['left']) < min_left) ? parseInt(data.data[i]['left']) : min_left; @@ -449,6 +450,22 @@ function setupInterface(data, file, urls) { data.data[i]['NE-EMB'] = data.data[i]['NE-EMB'].toString().replace(/(\r\n|\n|\r)/gm, ""); } + if (data.meta.fields.includes('url_id')) { + if (typeof data.data[i]['url_id'] === 'string' || data.data[i]['url_id'] instanceof String) { + + let num = parseInt(data.data[i]['url_id']); + + if (!isNaN(num)) { + last_url_id = num; + } + + data.data[i]['url_id'] = last_url_id; + } + else { + last_url_id = data.data[i]['url_id']; + } + } + word_pos++; } } @@ -974,6 +991,7 @@ function setupInterface(data, file, urls) { let lines = csv.split(/\r\n|\n/); csv = [ lines[0] ]; + let url_id = -1; for(let i = 0; i < data.data.length; i++){