replace GND-ID with generic ID

pull/52/head
cneud 4 years ago
parent 28e5612a66
commit 91e67bbfcb

@ -76,13 +76,13 @@ No. TOKEN NE-TAG NE-EMB
```
For our purposes we extend this format by adding
* a fifth column for an ``ID`` for the outer ``NE-TAG`` from an authority file (in this case, the [GND](https://www.dnb.de/EN/Professionell/Standardisierung/GND/gnd_node.html) is used)
* a fifth column for an ``ID`` for the outer ``NE-TAG`` from an authority file (in this case [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) is used)
* column six for use as a variable ``url_id`` (see [Image Support](https://github.com/qurator-spk/neat/blob/master/README.md#28-image-support) for further details)
* finally, columns 7+ are used for storing ``left,right,top,bottom`` pixel coordinates for facsimile snippets
Example (full):
```tsv
No. TOKEN NE-TAG NE-EMB GND-ID url_id left,right,top,bottom
No. TOKEN NE-TAG NE-EMB ID url_id left,right,top,bottom
# https://example.url/iiif/left,right,top,bottom/full/0/default.jpg
1 Donnerstag O O - 0 174,352,358,390
2 , O O - 0 174,352,358,390

@ -1,4 +1,4 @@
No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom
No. TOKEN NE-TAG NE-EMB ID url_id left right top bottom
# https://content.staatsbibliothek-berlin.de/zefys/SNP27646518-18800101-0-3-0-0/left,top,width,height/full/0/default.jpg
0 Kampf O O - 0 154 212 400 419
0 , O O - 0 154 212 400 419
@ -14,7 +14,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom
0 wenn O O - 0 671 701 402 415
0 nicht O O - 0 702 755 399 417
0 Herr O O - 0 155 192 419 437
0 Gambetta B-PER O 118716263 0 202 277 419 437
0 Gambetta B-PER O Q295090 0 202 277 419 437
0 als O O - 0 287 311 420 436
0 deus O O - 0 320 357 419 434
0 ex O O - 0 366 385 422 434
@ -31,7 +31,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom
0 bei O O - 0 309 330 438 453
0 dem O O - 0 339 370 437 453
0 Präſidenten O O - 0 379 468 437 457
0 Grévy B-PER O 119064693 0 475 524 436 456
0 Grévy B-PER O Q296083 0 475 524 436 456
0 einen O O - 0 534 572 437 453
0 Ausgleich O O - 0 577 650 437 455
0 herbeigeführt O O - 0 658 755 436 455
@ -58,15 +58,15 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom
0 dem O O - 0 531 561 474 491
0 Bemühen O O - 0 570 648 474 492
0 , O O - 0 570 648 474 492
0 Waddington B-PER O 117086630 0 660 756 474 493
0 Waddington B-PER O Q548696 0 660 756 474 493
0 unb O O - 0 155 185 494 512
0 Léon B-PER O 117619744 0 200 249 494 512
0 Say I-PER O - 0 254 288 494 512
0 Léon B-PER O Q3271322 0 200 249 494 512
0 Say I-PER O Q3271322 0 254 288 494 512
0 zu O O - 0 308 324 498 512
0 halten O O - 0 343 398 494 512
0 , O O - 0 343 398 494 512
0 dagegen O O - 0 410 477 492 512
0 Lepère B-PER O 1012607569 0 492 544 493 512
0 Lepère B-PER O Q670573 0 492 544 493 512
0 zu O O - 0 563 581 497 512
0 entfernen O O - 0 600 678 492 511
0 , O O - 0 600 678 492 511
@ -84,8 +84,8 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom
0 dominirenden O O - 0 653 756 511 528
0 Einfluß O O - 0 156 216 531 550
0 des O O - 0 240 266 532 548
0 Palais B-LOC O 4342820-4 0 293 346 530 550
0 Bourbon I-LOC O - 0 368 437 530 546
0 Palais B-LOC O Q936633 0 293 346 530 550
0 Bourbon I-LOC O Q936633 0 368 437 530 546
0 frei O O - 0 462 488 530 549
0 zu O O - 0 511 528 535 550
0 machen O O - 0 552 610 530 549
@ -94,7 +94,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom
0 Sein O O - 0 644 682 529 546
0 Beſuch O O - 0 706 756 530 548
0 bei O O - 0 159 189 550 567
0 Grévy B-PER O 119064693 0 195 246 551 569
0 Grévy B-PER O Q296083 0 195 246 551 569
0 am O O - 0 262 285 554 566
0 Sonntag O O - 0 300 368 550 569
0 Morgen O O - 0 380 442 549 569
@ -135,7 +135,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom
0 wenn O O - 0 300 339 609 623
0 nicht O O - 0 349 383 605 624
0 Herr O O - 0 393 429 606 624
0 Gambetta B-PER O 118716263 0 434 509 606 622
0 Gambetta B-PER O Q295090 0 434 509 606 622
0 ſelbſt O O - 0 519 557 604 624
0 es O O - 0 566 582 607 621
0 hinterher O O - 0 588 656 605 623
@ -148,15 +148,15 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom
0 , O O - 0 257 330 625 643
0 dem O O - 0 339 370 624 640
0 Schauſpieler O O - 0 380 476 624 643
0 Coquelin B-PER O 116670673 0 491 559 624 642
0 Coquelin B-PER O Q142310 0 491 559 624 642
0 dem O O - 0 575 605 624 640
0 „ O O - 0 620 714 623 642
0 Jüngeren O O - 0 620 714 623 642
0 “ O O - 0 620 714 623 642
0 von O O - 0 728 756 626 639
0 der O O - 0 157 181 643 660
0 Comédie B-ORG O 16295404-9 0 197 262 643 661
0 françaiſe I-ORG O - 0 277 345 642 661
0 Comédie B-ORG O Q61460498 0 197 262 643 661
0 françaiſe I-ORG O Q61460498 0 277 345 642 661
0 anvertraut O O - 0 359 440 644 659
0 hat O O - 0 455 484 644 661
0 . O O - 0 455 484 644 661
@ -189,7 +189,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom
0 wohlbekannte O O - 0 157 258 700 718
0 kleine O O - 0 271 312 698 715
0 Coupé O O - 0 322 371 699 718
0 Gambettas B-PER O 119064693 0 382 466 698 716
0 Gambettas B-PER O Q295090 0 382 466 698 716
0 eine O O - 0 482 510 699 715
0 ganze O O - 0 525 566 702 718
0 Stunde O O - 0 577 633 698 715
@ -206,7 +206,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom
0 im O O - 0 476 496 718 734
0 Vorhof O O - 0 505 562 717 735
0 des O O - 0 577 602 718 733
0 Elyſée B-LOC O 4075880-1 0 612 661 717 736
0 Elyſée B-LOC O Q188190 0 612 661 717 736
0 ſtationiren O O - 0 666 755 703 736
0 geſehen O O - 0 158 211 737 756
0 hatten O O - 0 222 273 737 754
@ -239,7 +239,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom
0 . O O - 0 651 702 754 770
0 O O - 0 720 756 754 774
0 Herr O O - 0 720 756 754 774
0 Lepère B-PER O 1012607569 0 156 212 774 793
0 Lepère B-PER O Q670573 0 156 212 774 793
0 , O O - 0 156 212 774 793
0 der O O - 0 227 250 774 790
0 bereits O O - 0 264 314 774 790
@ -255,7 +255,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom
0 zu O O - 0 305 322 797 811
0 müſſen O O - 0 332 383 793 811
0 — O O - 0 393 412 801 803
0 Freycinet B-PER O 118703099 0 421 493 793 811
0 Freycinet B-PER O Q317957 0 421 493 793 811
0 ſelbſt O O - 0 496 544 792 811
0 hatte O O - 0 554 590 792 810
0 ihm O O - 0 600 629 793 809
@ -271,10 +271,10 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom
0 geſagt O O - 0 425 475 811 830
0 — O O - 0 484 503 819 822
0 Herr O O - 0 512 548 811 830
0 Lepre B-PER O 1012607569 0 556 607 811 829
0 Lepre B-PER O Q670573 0 556 607 811 829
0 erhielt O O - 0 616 664 811 830
0 von O O - 0 674 701 814 826
0 Gam B-PER O 118716263 0 711 755 811 827
0 Gam B-PER O Q295090 0 711 755 811 827
0 — I-PER O - 0 711 755 811 827
0 betta I-PER O - 0 156 192 829 846
0 die O O - 0 202 224 830 846
@ -289,7 +289,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom
0 Gleichzeitig O O - 0 508 592 830 848
0 wurde O O - 0 602 649 829 845
0 Herrn O O - 0 658 703 829 848
0 Waddington B-PER O 117086630 0 714 756 829 845
0 Waddington B-PER O Q548696 0 714 756 829 845
0 das O O - 0 230 257 849 865
0 Gegentheil O O - 0 272 354 848 867
0 bedeutet O O - 0 370 437 849 867
@ -297,7 +297,7 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom
0 den O O - 0 451 476 849 864
0 Botſchafterpoſten O O - 0 486 617 848 867
0 in O O - 0 633 648 848 864
0 London B-LOC O 4074335-4 0 658 716 848 864
0 London B-LOC O Q84 0 658 716 848 864
0 , O O - 0 658 716 848 864
0 der O O - 0 720 756 848 866
0 ihm O O - 0 156 185 866 885
@ -316,8 +316,8 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom
0 dieſen O O - 0 699 756 866 884
0 Vorgängen O O - 0 159 244 885 905
0 erhielt O O - 0 254 305 886 904
0 Léon B-PER O 117619744 0 310 350 885 902
0 Say I-PER O - 0 360 394 886 905
0 Léon B-PER O Q3271322 0 310 350 885 902
0 Say I-PER O Q3271322 0 360 394 886 905
0 erſt O O - 0 407 432 886 902
0 in O O - 0 445 460 886 902
0 ſpäter O O - 0 475 519 886 903
@ -339,13 +339,13 @@ No. TOKEN NE-TAG NE-EMB GND-ID url_id left right top bottom
0 fuhr O O - 0 666 690 904 922
0 er O O - 0 692 723 909 920
0 ins O O - 0 733 756 904 919
0 Elyſée B-LOC O 4075880-1 0 158 207 923 942
0 Elyſée B-LOC O Q188190 0 158 207 923 942
0 und O O - 0 220 248 924 939
0 legte O O - 0 264 299 924 940
0 ſein O O - 0 313 340 923 940
0 Portefeuille O O - 0 355 445 923 942
0 in O O - 0 461 475 923 939
0 Grevys B-PER O 119064693 0 490 546 923 942
0 Grevys B-PER O Q296083 0 490 546 923 942
0 Hände O O - 0 557 606 923 942
0 zurück O O - 0 621 671 923 942
0 . O O - 0 621 671 923 942

Can't render this file because it has a wrong number of fields in line 2.

@ -264,12 +264,12 @@ function setupInterface(data, file, urls) {
data.data[i]['No.'] = word_pos;
if (data.data[i]['TOKEN'] == null) data.data[i]['TOKEN'] = '';
if (data.data[i]['GND-ID'] == null) data.data[i]['GND-ID'] = '';
if (data.data[i]['ID'] == null) data.data[i]['ID'] = '';
if (data.data[i]['NE-TAG'] == null) data.data[i]['NE-TAG'] = '';
if (data.data[i]['NE-EMB'] == null) data.data[i]['NE-EMB'] = '';
data.data[i]['TOKEN'] = data.data[i]['TOKEN'].toString().replace(/(\r\n|\n|\r)/gm, "");
data.data[i]['GND-ID'] = data.data[i]['GND-ID'].toString().replace(/(\r\n|\n|\r)/gm, "");
data.data[i]['ID'] = data.data[i]['ID'].toString().replace(/(\r\n|\n|\r)/gm, "");
data.data[i]['NE-TAG'] = data.data[i]['NE-TAG'].toString().replace(/(\r\n|\n|\r)/gm, "");
data.data[i]['NE-EMB'] = data.data[i]['NE-EMB'].toString().replace(/(\r\n|\n|\r)/gm, "");
@ -309,7 +309,7 @@ function setupInterface(data, file, urls) {
new_line['TOKEN'] = '';
new_line['NE-TAG'] = 'O';
new_line['NE-EMB'] = 'O';
new_line['GND-ID'] = '';
new_line['ID'] = '';
data.data.splice(nRow, 0, new_line);
}
@ -517,12 +517,12 @@ function setupInterface(data, file, urls) {
clickAction = makeLineSplitMerge;
}
if ((column == 'TOKEN') || (column == 'GND-ID')) {
if ((column == 'TOKEN') || (column == 'ID')) {
clickAction = makeTdEditable;
listener.simple_combo('enter', function() { $(td).click(); });
if (column == 'GND-ID') {
if (column == 'ID') {
fillAction =
function(td,content) {
if (String(content).match(/^Q[0-9]+$/g) == null) {
@ -805,7 +805,7 @@ function setupInterface(data, file, urls) {
<th>TOKEN</th>
<th>NE-TAG</th>
<th>NE-EMB</th>
<th>GND-ID<button class="btn btn-link" id="next" tabindex="-1">>></button></th>
<th>ID<button class="btn btn-link" id="next" tabindex="-1">>></button></th>
</tr>
</thead>
<tbody id="table-body"></tbody>

Loading…
Cancel
Save