diff --git a/README.md b/README.md index 755236b..5694fb4 100644 --- a/README.md +++ b/README.md @@ -76,13 +76,13 @@ No. TOKEN NE-TAG NE-EMB ``` For our purposes we extend this format by adding -* a fifth column for an ``ID`` for the outer ``NE-TAG`` from an authority file (in this case, the [GND](https://www.dnb.de/EN/Professionell/Standardisierung/GND/gnd_node.html) is used) +* a fifth column for an ``ID`` for the outer ``NE-TAG`` from an authority file (in this case [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) is used) * column six for use as a variable ``url_id`` (see [Image Support](https://github.com/qurator-spk/neat/blob/master/README.md#28-image-support) for further details) * finally, columns 7+ are used for storing ``left,right,top,bottom`` pixel coordinates for facsimile snippets Example (full): ```tsv -No. TOKEN NE-TAG NE-EMB GND-ID url_id left,right,top,bottom +No. TOKEN NE-TAG NE-EMB ID url_id left,right,top,bottom # https://example.url/iiif/left,right,top,bottom/full/0/default.jpg 1 Donnerstag O O - 0 174,352,358,390 2 , O O - 0 174,352,358,390 diff --git a/example.tsv b/example.tsv index ad53597..5fba179 100644 --- a/example.tsv +++ b/example.tsv @@ -1,4 +1,4 @@ -No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom +No. TOKEN NE-TAG NE-EMB ID url_id left right top bottom # https://content.staatsbibliothek-berlin.de/zefys/SNP27646518-18800101-0-3-0-0/left,top,width,height/full/0/default.jpg 0 Kampf O O - 0 154 212 400 419 0 , O O - 0 154 212 400 419 @@ -14,7 +14,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom 0 wenn O O - 0 671 701 402 415 0 nicht O O - 0 702 755 399 417 0 Herr O O - 0 155 192 419 437 -0 Gambetta B-PER O 118716263 0 202 277 419 437 +0 Gambetta B-PER O Q295090 0 202 277 419 437 0 als O O - 0 287 311 420 436 0 deus O O - 0 320 357 419 434 0 ex O O - 0 366 385 422 434 @@ -31,7 +31,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom 0 bei O O - 0 309 330 438 453 0 dem O O - 0 339 370 437 453 0 Präſidenten O O - 0 379 468 437 457 -0 Grévy B-PER O 119064693 0 475 524 436 456 +0 Grévy B-PER O Q296083 0 475 524 436 456 0 einen O O - 0 534 572 437 453 0 Ausgleich O O - 0 577 650 437 455 0 herbeigeführt O O - 0 658 755 436 455 @@ -58,15 +58,15 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom 0 dem O O - 0 531 561 474 491 0 Bemühen O O - 0 570 648 474 492 0 , O O - 0 570 648 474 492 -0 Waddington B-PER O 117086630 0 660 756 474 493 +0 Waddington B-PER O Q548696 0 660 756 474 493 0 unb O O - 0 155 185 494 512 -0 Léon B-PER O 117619744 0 200 249 494 512 -0 Say I-PER O - 0 254 288 494 512 +0 Léon B-PER O Q3271322 0 200 249 494 512 +0 Say I-PER O Q3271322 0 254 288 494 512 0 zu O O - 0 308 324 498 512 0 halten O O - 0 343 398 494 512 0 , O O - 0 343 398 494 512 0 dagegen O O - 0 410 477 492 512 -0 Lepère B-PER O 1012607569 0 492 544 493 512 +0 Lepère B-PER O Q670573 0 492 544 493 512 0 zu O O - 0 563 581 497 512 0 entfernen O O - 0 600 678 492 511 0 , O O - 0 600 678 492 511 @@ -84,8 +84,8 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom 0 dominirenden O O - 0 653 756 511 528 0 Einfluß O O - 0 156 216 531 550 0 des O O - 0 240 266 532 548 -0 Palais B-LOC O 4342820-4 0 293 346 530 550 -0 Bourbon I-LOC O - 0 368 437 530 546 +0 Palais B-LOC O Q936633 0 293 346 530 550 +0 Bourbon I-LOC O Q936633 0 368 437 530 546 0 frei O O - 0 462 488 530 549 0 zu O O - 0 511 528 535 550 0 machen O O - 0 552 610 530 549 @@ -94,7 +94,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom 0 Sein O O - 0 644 682 529 546 0 Beſuch O O - 0 706 756 530 548 0 bei O O - 0 159 189 550 567 -0 Grévy B-PER O 119064693 0 195 246 551 569 +0 Grévy B-PER O Q296083 0 195 246 551 569 0 am O O - 0 262 285 554 566 0 Sonntag O O - 0 300 368 550 569 0 Morgen O O - 0 380 442 549 569 @@ -135,7 +135,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom 0 wenn O O - 0 300 339 609 623 0 nicht O O - 0 349 383 605 624 0 Herr O O - 0 393 429 606 624 -0 Gambetta B-PER O 118716263 0 434 509 606 622 +0 Gambetta B-PER O Q295090 0 434 509 606 622 0 ſelbſt O O - 0 519 557 604 624 0 es O O - 0 566 582 607 621 0 hinterher O O - 0 588 656 605 623 @@ -148,15 +148,15 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom 0 , O O - 0 257 330 625 643 0 dem O O - 0 339 370 624 640 0 Schauſpieler O O - 0 380 476 624 643 -0 Coquelin B-PER O 116670673 0 491 559 624 642 +0 Coquelin B-PER O Q142310 0 491 559 624 642 0 dem O O - 0 575 605 624 640 0 „ O O - 0 620 714 623 642 0 Jüngeren O O - 0 620 714 623 642 0 “ O O - 0 620 714 623 642 0 von O O - 0 728 756 626 639 0 der O O - 0 157 181 643 660 -0 Comédie B-ORG O 16295404-9 0 197 262 643 661 -0 françaiſe I-ORG O - 0 277 345 642 661 +0 Comédie B-ORG O Q61460498 0 197 262 643 661 +0 françaiſe I-ORG O Q61460498 0 277 345 642 661 0 anvertraut O O - 0 359 440 644 659 0 hat O O - 0 455 484 644 661 0 . O O - 0 455 484 644 661 @@ -189,7 +189,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom 0 wohlbekannte O O - 0 157 258 700 718 0 kleine O O - 0 271 312 698 715 0 Coupé O O - 0 322 371 699 718 -0 Gambettas B-PER O 119064693 0 382 466 698 716 +0 Gambettas B-PER O Q295090 0 382 466 698 716 0 eine O O - 0 482 510 699 715 0 ganze O O - 0 525 566 702 718 0 Stunde O O - 0 577 633 698 715 @@ -206,7 +206,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom 0 im O O - 0 476 496 718 734 0 Vorhof O O - 0 505 562 717 735 0 des O O - 0 577 602 718 733 -0 Elyſée B-LOC O 4075880-1 0 612 661 717 736 +0 Elyſée B-LOC O Q188190 0 612 661 717 736 0 ſtationiren O O - 0 666 755 703 736 0 geſehen O O - 0 158 211 737 756 0 hatten O O - 0 222 273 737 754 @@ -239,7 +239,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom 0 . O O - 0 651 702 754 770 0 O O - 0 720 756 754 774 0 Herr O O - 0 720 756 754 774 -0 Lepère B-PER O 1012607569 0 156 212 774 793 +0 Lepère B-PER O Q670573 0 156 212 774 793 0 , O O - 0 156 212 774 793 0 der O O - 0 227 250 774 790 0 bereits O O - 0 264 314 774 790 @@ -255,7 +255,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom 0 zu O O - 0 305 322 797 811 0 müſſen O O - 0 332 383 793 811 0 — O O - 0 393 412 801 803 -0 Freycinet B-PER O 118703099 0 421 493 793 811 +0 Freycinet B-PER O Q317957 0 421 493 793 811 0 ſelbſt O O - 0 496 544 792 811 0 hatte O O - 0 554 590 792 810 0 ihm O O - 0 600 629 793 809 @@ -271,10 +271,10 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom 0 geſagt O O - 0 425 475 811 830 0 — O O - 0 484 503 819 822 0 Herr O O - 0 512 548 811 830 -0 Lepre B-PER O 1012607569 0 556 607 811 829 +0 Lepre B-PER O Q670573 0 556 607 811 829 0 erhielt O O - 0 616 664 811 830 0 von O O - 0 674 701 814 826 -0 Gam B-PER O 118716263 0 711 755 811 827 +0 Gam B-PER O Q295090 0 711 755 811 827 0 — I-PER O - 0 711 755 811 827 0 betta I-PER O - 0 156 192 829 846 0 die O O - 0 202 224 830 846 @@ -289,7 +289,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom 0 Gleichzeitig O O - 0 508 592 830 848 0 wurde O O - 0 602 649 829 845 0 Herrn O O - 0 658 703 829 848 -0 Waddington B-PER O 117086630 0 714 756 829 845 +0 Waddington B-PER O Q548696 0 714 756 829 845 0 das O O - 0 230 257 849 865 0 Gegentheil O O - 0 272 354 848 867 0 bedeutet O O - 0 370 437 849 867 @@ -297,7 +297,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom 0 den O O - 0 451 476 849 864 0 Botſchafterpoſten O O - 0 486 617 848 867 0 in O O - 0 633 648 848 864 -0 London B-LOC O 4074335-4 0 658 716 848 864 +0 London B-LOC O Q84 0 658 716 848 864 0 , O O - 0 658 716 848 864 0 der O O - 0 720 756 848 866 0 ihm O O - 0 156 185 866 885 @@ -316,8 +316,8 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom 0 dieſen O O - 0 699 756 866 884 0 Vorgängen O O - 0 159 244 885 905 0 erhielt O O - 0 254 305 886 904 -0 Léon B-PER O 117619744 0 310 350 885 902 -0 Say I-PER O - 0 360 394 886 905 +0 Léon B-PER O Q3271322 0 310 350 885 902 +0 Say I-PER O Q3271322 0 360 394 886 905 0 erſt O O - 0 407 432 886 902 0 in O O - 0 445 460 886 902 0 ſpäter O O - 0 475 519 886 903 @@ -339,13 +339,13 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom 0 fuhr O O - 0 666 690 904 922 0 er O O - 0 692 723 909 920 0 ins O O - 0 733 756 904 919 -0 Elyſée B-LOC O 4075880-1 0 158 207 923 942 +0 Elyſée B-LOC O Q188190 0 158 207 923 942 0 und O O - 0 220 248 924 939 0 legte O O - 0 264 299 924 940 0 ſein O O - 0 313 340 923 940 0 Portefeuille O O - 0 355 445 923 942 0 in O O - 0 461 475 923 939 -0 Grevys B-PER O 119064693 0 490 546 923 942 +0 Grevys B-PER O Q296083 0 490 546 923 942 0 Hände O O - 0 557 606 923 942 0 zurück O O - 0 621 671 923 942 0 . O O - 0 621 671 923 942 diff --git a/neat.js b/neat.js index 03d7a05..aa0ba8e 100644 --- a/neat.js +++ b/neat.js @@ -264,12 +264,12 @@ function setupInterface(data, file, urls) { data.data[i]['No.'] = word_pos; if (data.data[i]['TOKEN'] == null) data.data[i]['TOKEN'] = ''; - if (data.data[i]['GND-ID'] == null) data.data[i]['GND-ID'] = ''; + if (data.data[i]['ID'] == null) data.data[i]['ID'] = ''; if (data.data[i]['NE-TAG'] == null) data.data[i]['NE-TAG'] = ''; if (data.data[i]['NE-EMB'] == null) data.data[i]['NE-EMB'] = ''; data.data[i]['TOKEN'] = data.data[i]['TOKEN'].toString().replace(/(\r\n|\n|\r)/gm, ""); - data.data[i]['GND-ID'] = data.data[i]['GND-ID'].toString().replace(/(\r\n|\n|\r)/gm, ""); + data.data[i]['ID'] = data.data[i]['ID'].toString().replace(/(\r\n|\n|\r)/gm, ""); data.data[i]['NE-TAG'] = data.data[i]['NE-TAG'].toString().replace(/(\r\n|\n|\r)/gm, ""); data.data[i]['NE-EMB'] = data.data[i]['NE-EMB'].toString().replace(/(\r\n|\n|\r)/gm, ""); @@ -309,7 +309,7 @@ function setupInterface(data, file, urls) { new_line['TOKEN'] = ''; new_line['NE-TAG'] = 'O'; new_line['NE-EMB'] = 'O'; - new_line['GND-ID'] = ''; + new_line['ID'] = ''; data.data.splice(nRow, 0, new_line); } @@ -517,12 +517,12 @@ function setupInterface(data, file, urls) { clickAction = makeLineSplitMerge; } - if ((column == 'TOKEN') || (column == 'GND-ID')) { + if ((column == 'TOKEN') || (column == 'ID')) { clickAction = makeTdEditable; listener.simple_combo('enter', function() { $(td).click(); }); - if (column == 'GND-ID') { + if (column == 'ID') { fillAction = function(td,content) { if (String(content).match(/^Q[0-9]+$/g) == null) { @@ -805,7 +805,7 @@ function setupInterface(data, file, urls) {