From 92391747a76d0991a4cd0a09618804c2644bc59b Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 14 Aug 2020 14:38:02 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=B9=20Remove=20obsolete=20xsd/=20direc?= =?UTF-8?q?tory?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- xsd/pagecontent.2017-07-15.xsd | 2137 ------------------------- xsd/pagecontent.2018-07-15.xsd | 2496 ----------------------------- xsd/pagecontent.2019-07-15.xsd | 2674 -------------------------------- 3 files changed, 7307 deletions(-) delete mode 100644 xsd/pagecontent.2017-07-15.xsd delete mode 100644 xsd/pagecontent.2018-07-15.xsd delete mode 100644 xsd/pagecontent.2019-07-15.xsd diff --git a/xsd/pagecontent.2017-07-15.xsd b/xsd/pagecontent.2017-07-15.xsd deleted file mode 100644 index b4b2266..0000000 --- a/xsd/pagecontent.2017-07-15.xsd +++ /dev/null @@ -1,2137 +0,0 @@ - - - - - - Page Content - Ground Truth and Storage - - - - - - - - - - - - - - - The timestamp has to be in UTC (Coordinated - Universal Time) and not local time. - - - - - - - The timestamp has to be in UTC (Coordinated - Universal Time) and not local time. - - - - - - - - - - External reference of any kind - - - - - - - - Alternative document page images (e.g. - black-and-white) - - - - - - - - - - - - - - - - Unassigned regions are considered to be in the - (virtual) default layer which is to be treated - as below any other layers. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - For generic use - - - - - Page type - - - - - - The primary language used in the page (lower-level definitions override the page-level definition) - - - - - - - The secondary language used in the page (lower-level definitions override the page-level definition) - - - - - - - The primary script used in the page (lower-level definitions override the page-level definition) - - - - - - - The secondary script used in the page (lower-level definitions override the page-level definition) - - - - - - - The direction in which text in a region should be - read (within lines) (lower-level definitions override the page-level definition) - - - - - - Inner-block order of text lines (in addition to “readingDirection” which is the inner-text line order of words and characters) (lower-level definitions override the page-level definition) - - - - - - - Pure text is represented as a text region. This includes - drop capitals, but practically ornate text may be - considered as a graphic. - - - - - - - - - - - - - - - The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - The nature of the text in the region - - - - - - - The degree of space in points between the lines of - text (line spacing) - - - - - - - The direction in which text in a region should be - read (within lines) - - - - - - Inner-block order of text lines (in addition to “readingDirection” which is the inner-text line order of words and characters) - - - - - The angle the baseline of text withing a region has to be rotated (relative to the rectangle encapsulating the region) in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - Defines whether a region of text is indented or not - - - - - - Text align - - - - - - The primary language used in the region - - - - - - - The secondary language used in the region - - - - - - - The primary script used in the region - - - - - - - The secondary script used in the region - - - - - - - - - - - Point list with format "x1,y1 x2,y2 ..." - - - - - - - - - - Multiple connected points that mark the baseline - of the glyphs - - - - - - - - - - - - - - - - Overrides primaryLanguage attribute of parent text - region - - - - - - - The primary script used in the text line - - - - - - - The secondary script used in the text line - - - - - - - The direction in which text in a text line should be read - - - - - - - Overrides the production attribute of the parent - text region - - - - - - For generic use - - - - - - - - - - - - - - - - - - - - Overrides primaryLanguage attribute of parent line - and/or text region - - - - - - - The primary script used in the word - - - - - - - The secondary script used in the word - - - - - - - The direction in which characters in a word should be read - - - - - - - Overrides the production attribute of the parent - text line and/or text region. - - - - - - For generic use - - - - - - - - - - - Container for graphemes, grapheme groups and - non-printing characters - - - - - - - - - - - - - - - - - - The script used for the glyph - - - - - - - Overrides the production attribute of the parent - word / text line / text region. - - - - - - For generic use - - - - - - - - - - Text in a "simple" form (ASCII or extended ASCII - as mostly used for typing). I.e. no use of - special characters for ligatures (should be - stored as two separate characters) etc. - - - - - - - Correct encoding of the original, always using - the corresponding Unicode code point. I.e. - ligatures have to be represented as one - character etc. - - - - - - - Used for sort order in case multiple TextEquivs are defined. The text content with the lowest index should be interpreted as the main text content. - - - - - - - - - - OCR confidence value (between 0 and 1) - - - - - - - - - - - Type of text content (is it free text or a number, for instance) -This is only a descriptive attribute, the text type is not checked during XML validation - - - - - Refinement for dataType attribute. Can be a regular expression, for instance. - - - - - - - - - An image is considered to be more intricate and complex - than a graphic. These can be photos or drawings. - - - - - - - The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - The colour bit depth required for the region - - - - - - - The background colour of the region - - - - - - - Specifies whether the region also contains - text - - - - - - - - - - A line drawing is a single colour illustration without - solid areas. - - - - - - - The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - The pen (foreground) colour of the region - - - - - - - The background colour of the region - - - - - - - Specifies whether the region also contains - text - - - - - - - - - - Regions containing simple graphics, such as a company - logo, should be marked as graphic regions. - - - - - - - The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - The type of graphic in the region - - - - - - - An approximation of the number of colours - used in the region - - - - - - - Specifies whether the region also contains - text. - - - - - - - - - - Tabular data in any form is represented with a table - region. Rows and columns may or may not have separator - lines; these lines are not separator regions. - - - - - - - The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - The number of rows present in the table - - - - - - - The number of columns present in the table - - - - - - - The colour of the lines used in the region - - - - - - - The background colour of the region - - - - - - - Specifies the presence of line separators - - - - - - - Specifies whether the region also contains - text - - - - - - - - - - Regions containing charts or graphs of any type, should - be marked as chart regions. - - - - - - - The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - The type of chart in the region - - - - - - - An approximation of the number of colours - used in the region - - - - - - - The background colour of the region - - - - - - - Specifies whether the region also contains - text - - - - - - - - - - Separators are lines that lie between columns and - paragraphs and can be used to logically separate - different articles from each other. - - - - - - - The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - The colour of the separator - - - - - - - - - - Regions containing equations and mathematical symbols - should be marked as maths regions. - - - - - - - The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - The background colour of the region - - - - - - - - - - Regions containing chemical formulas. - - - - - - - - The angle the rectangle encapsulating a - region has to be rotated in clockwise - direction in order to correct the present - skew (negative values indicate - anti-clockwise rotation). Range: - -179.999,180 - - - - - - - - The background colour of the region - - - - - - - - - - - Regions containing musical notations. - - - - - - - The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - The background colour of the region - - - - - - - - - - Regions containing advertisements. - - - - - - - The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - - The background colour of the region - - - - - - - - - - Noise regions are regions where no real data lies, only - false data created by artifacts on the document or - scanner noise. - - - - - - - - - - To be used if the region type cannot be ascertained. - - - - - - - - - - Determines the effective area on the paper of a printed page. Its size is equal for all pages of a book (exceptions: titlepage, multipage pictures). -It contains all living elements (except marginals) like body type, footnotes, headings, running titles. -It does not contain pagenumber (if not part of running title), marginals, signature mark, preview words. - - - - - - - - - - Definition of the reading order within the page. To express a reading order between elements they have to be included in an OrderedGroup. Groups may contain further groups. - - - - - - - - - - Numbered region - - - - Position (order number) of this item within the current hierarchy level. - - - - - - - - Indexed group containing ordered elements - - - - - - - - - - - - - - - - - - Optional link to a parent region of nested regions. The parent region doubles as reading order group. Only the nested regions should be allowed as group members. - - - - - Position (order number) of this item within the - current hierarchy level. - - - - - - - - - Is this group a continuation of another group (from - previous column or page, for example)? - - - - - - - - - - - Indexed group containing unordered elements - - - - - - - - - - - - - - - - Optional link to a parent region of nested regions. The parent region doubles as reading order group. Only the nested regions should be allowed as group members. - - - - - Position (order number) of this item within the - current hierarchy level. - - - - - - - - Is this group a continuation of another group (from previous column or page, for example)? - - - - - - - - - - - - Numbered group (contains ordered elements) - - - - - - - - - - - - - - - - - Optional link to a parent region of nested regions. The parent region doubles as reading order group. Only the nested regions should be allowed as group members. - - - - - - Is this group a continuation of another group (from previous column or page, for example)? - - - - - - - - - Numbered group (contains unordered elements) - - - - - - - - - - - - - - - - Optional link to a parent region of nested regions. The parent region doubles as reading order group. Only the nested regions should be allowed as group members. - - - - - - Is this group a continuation of another group (from previous column or page, for example)? - - - - - - - Border of the actual page (if the scanned image contains parts not belonging to the page). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - iso15924 2016-07-14 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Can be used to express the z-index of overlapping - regions. An element with a greater z-index is always in - front of another element with lower z-index. - - - - - - - - - - - - - - - - - - - - - - - - - - Point list with format "x1,y1 x2,y2 ..." - - - - - - - - - - Container for one-to-one relations between layout - objects (for example: DropCap - paragraph, caption - - image) - - - - - - - - - - - One-to-one relation between to layout object. Use 'link' - for loose relations and 'join' for strong relations - (where something is fragmented for instance). - - Examples for 'link': caption - image floating - - paragraph paragraph - paragraph (when a pragraph is - split across columns and the last word of the first - paragraph DOES NOT continue in the second paragraph) - drop-cap - paragraph (when the drop-cap is a whole word) - - Examples for 'join': word - word (separated word at the - end of a line) drop-cap - paragraph (when the drop-cap - is not a whole word) paragraph - paragraph (when a - pragraph is split across columns and the last word of - the first paragraph DOES continue in the second - paragraph) - - - - - - - - - - - - - - - - For generic use - - - - - - Text production type - - - - - - - - - - - - - - - Monospace (fixed-pitch, non-proportional) or - proportional font - - - - - - For instance: Arial, Times New Roman. Add more - information if necessary (e.g. blackletter, - antiqua). - - - - - - - Serif or sans-serif typeface - - - - - - - - The size of the characters in points - - - - - - The x-height or corpus size refers to the distance between the baseline and the mean line of lower-case letters in a typeface. The unit is assumed to be pixels. - - - - - - The degree of space (in points) between the - characters in a string of text - - - - - - - Text colour in RGB encoded format (red value) + (256 x green value) + (65536 x blue value) - - - - - Background colour - - - - - Background colour in RGB encoded format (red value) + (256 x green value) + (65536 x blue value) - - - - - - Specifies whether the colour of the text appears - reversed against a background colour - - - - - - - - - - - - - - - - - - - - - - Roles the region takes (e.g. in context of a - parent region) - - - - - - - - - - - - - - - - - - - - - - - - - - - - For generic use - - - - - - Is this region a continuation of another region (in previous column or page, for example)? - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Examples: "123.456", "+1234.456", "-1234.456", "-.456", "-456" - - - - Examples: "123.456", "+1234.456", "-1.2344e56", "-.45E-6", "INF", "-INF", "NaN" - - - - Examples: "123456", "+00000012", "-1", "-456" - - - - Examples: "true", "false", "1", "0" - - - - Examples: "2001-10-26", "2001-10-26+02:00", "2001-10-26Z", "2001-10-26+00:00", "-2001-10-26", "-20000-04-01" - - - - Examples: "21:32:52", "21:32:52+02:00", "19:32:52Z", "19:32:52+00:00", "21:32:52.12679" - - - - Examples: "2001-10-26T21:32:52", "2001-10-26T21:32:52+02:00", "2001-10-26T19:32:52Z", "2001-10-26T19:32:52+00:00", "-2001-10-26T21:32:52", "2001-10-26T21:32:52.12679" - - - - Generic text string - - - - An XSD type that is not listed or a custom type (use dataTypeDetails attribute) - - - - - - - - - - Container for graphemes, grapheme groups and - non-printing characters - - - - - - - - - - - - Base type for graphemes, grapheme groups and non-printing characters - - - - - - - - - Order index of grapheme, group, or non-printing character within the parent container (graphemes or glyph or grapheme group) - - - - - - - - - - - Type of character represented by the grapheme/group/non-printing character element - - - - - - - - - - - For generic use - - - For generic use - - - - - Represents a sub-element of a glyph. Smallest graphical unit that can be assigned a Unicode code point - - - - - - - - - - - - - A glyph component without visual representation but with Unicode code point. Non-visual / non-printing / control character. Part of grapheme container (of glyph) or grapheme sub group. - - - - - - - - - - - - - - - - - - - - - Container for user-defined attributes - - - - - - - - - Structured custom data defined by name, type and value. - - - - - - - - - - - - - - - - - - - - Cell position in table starting with row 0 - - - - Cell position in table starting with column 0 - - - - Number of rows the cell spans (optional; default is 1) - - - - Number of columns the cell spans (optional; default is 1) - - - - - - - - Data for a region that takes on the role of a table cell within a parent table region - - - - \ No newline at end of file diff --git a/xsd/pagecontent.2018-07-15.xsd b/xsd/pagecontent.2018-07-15.xsd deleted file mode 100644 index c6b7e93..0000000 --- a/xsd/pagecontent.2018-07-15.xsd +++ /dev/null @@ -1,2496 +0,0 @@ - - - - - - Page Content - Ground Truth and Storage - - - - - - - - - - - - - - - The timestamp has to be in UTC (Coordinated - Universal Time) and not local time. - - - - - - - The timestamp has to be in UTC (Coordinated - Universal Time) and not local time. - - - - - - - - - - - - External reference of any kind - - - - - - - Semantic labels / tags - - - - - - Type of metadata (e.g. author) - - - - - - - - - - - - - - - E.g. imagePhotometricInterpretation - - - - - - E.g. RGB - - - - - - - - - - A semantic label / tag - - - - - - - - Reference to external model / ontology / schema - - - - - - E.g. an RDF resource identifier (to be used as subject or object of an RDF triple) - - - - - Prefix for all labels (e.g. first part of an URI) - - - - - - - - - Semantic label - - - - - The label / tag (e.g. 'person'). Can be an RDF resource identifier (e.g. object of an RDF triple). - - - - - - - Additional information on the label (e.g. 'YYYY-mm-dd' for a date label). Can be used as predicate of an RDF triple. - - - - - - - - - - - - Alternative document page images (e.g. - black-and-white) - - - - - - - - - - - - - - - - Unassigned regions are considered to be in the - (virtual) default layer which is to be treated - as below any other layers. - - - - - - - - - Semantic labels / tags - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Contains the image file name including the file extension. - - - - - - Specifies the width of the image. - - - - - Specifies the height of the image. - - - - - Specifies the image resolution in width. - - - - - Specifies the image resolution in height. - - - - - - Specifies the unit of the resolution information - referring to a standardised unit of measurement (pixels per inch, pixels per centimeter or other). - - - - - - - - - - - - - - For generic use - - - - - Page type - - - - - - The primary language used in the page (lower-level definitions override the page-level definition) - - - - - - - The secondary language used in the page (lower-level definitions override the page-level definition) - - - - - - - The primary script used in the page (lower-level definitions override the page-level definition) - - - - - - - The secondary script used in the page (lower-level definitions override the page-level definition) - - - - - - - The direction in which text in a region should be - read (within lines) (lower-level definitions override the page-level definition) - - - - - - Inner-block order of text lines (in addition to “readingDirection” which is the inner-text line order of words and characters) (lower-level definitions override the page-level definition) - - - - - Confidence value for whole page (between 0 and 1) - - - - - - - - Pure text is represented as a text region. This includes - drop capitals, but practically ornate text may be - considered as a graphic. - - - - - - - - - - - - - - - The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - The nature of the text in the region - - - - - - - The degree of space in points between the lines of - text (line spacing) - - - - - - - The direction in which text in a region should be - read (within lines) - - - - - - Inner-block order of text lines (in addition to “readingDirection” which is the inner-text line order of words and characters) - - - - - The angle the baseline of text withing a region has to be rotated (relative to the rectangle encapsulating the region) in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - Defines whether a region of text is indented or not - - - - - - Text align - - - - - - The primary language used in the region - - - - - - - The secondary language used in the region - - - - - - - The primary script used in the region - - - - - - - The secondary script used in the region - - - - - - - - - - - Point list with format "x1,y1 x2,y2 ..." - - - - - Confidence value (between 0 and 1) - - - - - - - - - Alternative text line images (e.g. - black-and-white) - - - - - - - - Multiple connected points that mark the baseline - of the glyphs - - - - - - - - - - - - - Semantic labels / tags - - - - - - - - Overrides primaryLanguage attribute of parent text - region - - - - - - - The primary script used in the text line - - - - - - - The secondary script used in the text line - - - - - - - The direction in which text in a text line should be read - - - - - - - Overrides the production attribute of the parent - text region - - - - - - For generic use - - - - - - - Position (order number) of this text line within the - parent text region. - - - - - - - - - - - Alternative word images (e.g. - black-and-white) - - - - - - - - - - - - - - Semantic labels / tags - - - - - - - - Overrides primaryLanguage attribute of parent line - and/or text region - - - - - - - The primary script used in the word - - - - - - - The secondary script used in the word - - - - - - - The direction in which characters in a word should be read - - - - - - - Overrides the production attribute of the parent - text line and/or text region. - - - - - - For generic use - - - - - - - - - - Alternative glyph images (e.g. - black-and-white) - - - - - - - - Container for graphemes, grapheme groups and - non-printing characters - - - - - - - - - - - Semantic labels / tags - - - - - - - - - - - - The script used for the glyph - - - - - - - Overrides the production attribute of the parent - word / text line / text region. - - - - - - For generic use - - - - - - - - - - Text in a "simple" form (ASCII or extended ASCII - as mostly used for typing). I.e. no use of - special characters for ligatures (should be - stored as two separate characters) etc. - - - - - - - Correct encoding of the original, always using - the corresponding Unicode code point. I.e. - ligatures have to be represented as one - character etc. - - - - - - - Used for sort order in case multiple TextEquivs are defined. The text content with the lowest index should be interpreted as the main text content. - - - - - - - - - - OCR confidence value (between 0 and 1) - - - - - Type of text content (is it free text or a number, for instance) -This is only a descriptive attribute, the text type is not checked during XML validation - - - - - Refinement for dataType attribute. Can be a regular expression, for instance. - - - - - - - - - An image is considered to be more intricate and complex - than a graphic. These can be photos or drawings. - - - - - - - The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - The colour bit depth required for the region - - - - - - - The background colour of the region - - - - - - - Specifies whether the region also contains - text - - - - - - - - - - A line drawing is a single colour illustration without - solid areas. - - - - - - - The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - The pen (foreground) colour of the region - - - - - - - The background colour of the region - - - - - - - Specifies whether the region also contains - text - - - - - - - - - - Regions containing simple graphics, such as a company - logo, should be marked as graphic regions. - - - - - - - The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - The type of graphic in the region - - - - - - - An approximation of the number of colours - used in the region - - - - - - - Specifies whether the region also contains - text. - - - - - - - - - - Tabular data in any form is represented with a table - region. Rows and columns may or may not have separator - lines; these lines are not separator regions. - - - - - - - - Table grid (visible or virtual grid lines) - - - - - - The angle the rectangle encapsulating a - region has to be rotated in clockwise - direction in order to correct the present - skew (negative values indicate - anti-clockwise rotation). Range: - -179.999,180 - - - - - - - The number of rows present in the table - - - - - - - The number of columns present in the table - - - - - - - The colour of the lines used in the region - - - - - - - The background colour of the region - - - - - - - Specifies the presence of line separators - - - - - - - Specifies whether the region also contains - text - - - - - - - - - Matrix of grid points defining the table grid on the page - - - - - One row in the grid point matrix. Points with x,y coordinates. (note: for a table with n table rows there should be n+1 grid rows) - - - - - - Points with x,y coordinates. - - - - - The grid row index - - - - - - - - - - Regions containing charts or graphs of any type, should - be marked as chart regions. - - - - - - - The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - The type of chart in the region - - - - - - - An approximation of the number of colours - used in the region - - - - - - - The background colour of the region - - - - - - - Specifies whether the region also contains - text - - - - - - - - - - Separators are lines that lie between columns and - paragraphs and can be used to logically separate - different articles from each other. - - - - - - - The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - The colour of the separator - - - - - - - - - - Regions containing equations and mathematical symbols - should be marked as maths regions. - - - - - - - The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - The background colour of the region - - - - - - - - - - Regions containing chemical formulas. - - - - - - - - The angle the rectangle encapsulating a - region has to be rotated in clockwise - direction in order to correct the present - skew (negative values indicate - anti-clockwise rotation). Range: - -179.999,180 - - - - - - - - The background colour of the region - - - - - - - - - - - Regions containing maps. - - - - - - - - The angle the rectangle encapsulating a - region has to be rotated in clockwise - direction in order to correct the present - skew (negative values indicate - anti-clockwise rotation). Range: - -179.999,180 - - - - - - - - - - Regions containing musical notations. - - - - - - - The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - The background colour of the region - - - - - - - - - - Regions containing advertisements. - - - - - - - The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). -Range: -179.999,180 - - - - - - - The background colour of the region - - - - - - - - - - Noise regions are regions where no real data lies, only - false data created by artifacts on the document or - scanner noise. - - - - - - - - - - To be used if the region type cannot be ascertained. - - - - - - - - - - Regions containing content that is not covered by the default types (text, graphic, image, line drawing, chart, table, separator, maths, map, music, chem, advert, noise, unknown) - - - - - - - - Information on the type of content represented by this region - - - - - - - - - - Determines the effective area on the paper of a printed page. Its size is equal for all pages of a book (exceptions: titlepage, multipage pictures). -It contains all living elements (except marginals) like body type, footnotes, headings, running titles. -It does not contain pagenumber (if not part of running title), marginals, signature mark, preview words. - - - - - - - - - - Definition of the reading order within the page. To express a reading order between elements they have to be included in an OrderedGroup. Groups may contain further groups. - - - - - - - - - - Confidence value (between 0 and 1) - - - - - - - - Numbered region - - - - Position (order number) of this item within the current hierarchy level. - - - - - - - - Indexed group containing ordered elements - - - - - - - - Semantic labels / tags - - - - - - - - - - - - - - Optional link to a parent region of nested regions. The parent region doubles as reading order group. Only the nested regions should be allowed as group members. - - - - - Position (order number) of this item within the - current hierarchy level. - - - - - - - - - Is this group a continuation of another group (from - previous column or page, for example)? - - - - - - - - - - - Indexed group containing unordered elements - - - - - - - - Semantic labels / tags - - - - - - - - - - - - - Optional link to a parent region of nested regions. The parent region doubles as reading order group. Only the nested regions should be allowed as group members. - - - - - Position (order number) of this item within the - current hierarchy level. - - - - - - - - Is this group a continuation of another group (from previous column or page, for example)? - - - - - - - - - - - - Numbered group (contains ordered elements) - - - - - - - - Semantic labels / tags - - - - - - - - - - - - - - Optional link to a parent region of nested regions. The parent region doubles as reading order group. Only the nested regions should be allowed as group members. - - - - - - Is this group a continuation of another group (from previous column or page, for example)? - - - - - - - - - Numbered group (contains unordered elements) - - - - - - - - Semantic labels / tags - - - - - - - - - - - - - Optional link to a parent region of nested regions. The parent region doubles as reading order group. Only the nested regions should be allowed as group members. - - - - - - Is this group a continuation of another group (from previous column or page, for example)? - - - - - - - Border of the actual page (if the scanned image contains parts not belonging to the page). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ISO 639.x 2016-07-14 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - iso15924 2016-07-14 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Can be used to express the z-index of overlapping - regions. An element with a greater z-index is always in - front of another element with lower z-index. - - - - - - - - - - - - - - - - - - - - - - - Confidence value (between 0 and 1) - - - - - - - Point list with format "x1,y1 x2,y2 ..." - - - - - - - - - - Container for one-to-one relations between layout - objects (for example: DropCap - paragraph, caption - - image) - - - - - - - - - - - One-to-one relation between to layout object. Use 'link' - for loose relations and 'join' for strong relations - (where something is fragmented for instance). - - Examples for 'link': caption - image floating - - paragraph paragraph - paragraph (when a paragraph is - split across columns and the last word of the first - paragraph DOES NOT continue in the second paragraph) - drop-cap - paragraph (when the drop-cap is a whole word) - - Examples for 'join': word - word (separated word at the - end of a line) drop-cap - paragraph (when the drop-cap - is not a whole word) paragraph - paragraph (when a - pragraph is split across columns and the last word of - the first paragraph DOES continue in the second - paragraph) - - - - - - - Semantic labels / tags - - - - - - - - - - - - - - - - - For generic use - - - - - - Text production type - - - - - - - - - - - - - - - Monospace (fixed-pitch, non-proportional) or - proportional font - - - - - - For instance: Arial, Times New Roman. Add more - information if necessary (e.g. blackletter, - antiqua). - - - - - - - Serif or sans-serif typeface - - - - - - - - The size of the characters in points - - - - - - The x-height or corpus size refers to the distance between the baseline and the mean line of lower-case letters in a typeface. The unit is assumed to be pixels. - - - - - - The degree of space (in points) between the - characters in a string of text - - - - - - - Text colour in RGB encoded format (red value) + (256 x green value) + (65536 x blue value) - - - - - Background colour - - - - - Background colour in RGB encoded format (red value) + (256 x green value) + (65536 x blue value) - - - - - - Specifies whether the colour of the text appears - reversed against a background colour - - - - - - - - - - - - - - - - - - - Alternative region images (e.g. - black-and-white) - - - - - - - - - Semantic labels / tags - - - - - - Roles the region takes (e.g. in context of a - parent region) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - For generic use - - - - - - Is this region a continuation of another region (in previous column or page, for example)? - - - - - - - - - Confidence value (between 0 and 1) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Examples: "123.456", "+1234.456", "-1234.456", "-.456", "-456" - - - - Examples: "123.456", "+1234.456", "-1.2344e56", "-.45E-6", "INF", "-INF", "NaN" - - - - Examples: "123456", "+00000012", "-1", "-456" - - - - Examples: "true", "false", "1", "0" - - - - Examples: "2001-10-26", "2001-10-26+02:00", "2001-10-26Z", "2001-10-26+00:00", "-2001-10-26", "-20000-04-01" - - - - Examples: "21:32:52", "21:32:52+02:00", "19:32:52Z", "19:32:52+00:00", "21:32:52.12679" - - - - Examples: "2001-10-26T21:32:52", "2001-10-26T21:32:52+02:00", "2001-10-26T19:32:52Z", "2001-10-26T19:32:52+00:00", "-2001-10-26T21:32:52", "2001-10-26T21:32:52.12679" - - - - Generic text string - - - - An XSD type that is not listed or a custom type (use dataTypeDetails attribute) - - - - - - - - - - Container for graphemes, grapheme groups and - non-printing characters - - - - - - - - - - - - Base type for graphemes, grapheme groups and non-printing characters - - - - - - - - - Order index of grapheme, group, or non-printing character within the parent container (graphemes or glyph or grapheme group) - - - - - - - - - - - Type of character represented by the grapheme/group/non-printing character element - - - - - - - - - - - For generic use - - - For generic use - - - - - Represents a sub-element of a glyph. Smallest graphical unit that can be assigned a Unicode code point - - - - - - - - - - - - - A glyph component without visual representation but with Unicode code point. Non-visual / non-printing / control character. Part of grapheme container (of glyph) or grapheme sub group. - - - - - - - - - - - - - - - - - - - - - Container for user-defined attributes - - - - - - - - - Structured custom data defined by name, type and value. - - - - - - - - - - - - - - - - - - - - Cell position in table starting with row 0 - - - - Cell position in table starting with column 0 - - - - Number of rows the cell spans (optional; default is 1) - - - - Number of columns the cell spans (optional; default is 1) - - - - - Is the cell a column or row header? - - - - - - - - - - Data for a region that takes on the role of a table cell within a parent table region - - - - diff --git a/xsd/pagecontent.2019-07-15.xsd b/xsd/pagecontent.2019-07-15.xsd deleted file mode 100644 index 40c6f24..0000000 --- a/xsd/pagecontent.2019-07-15.xsd +++ /dev/null @@ -1,2674 +0,0 @@ - - - - - - - Page Content - Ground Truth and Storage - - - - - - - - - - - - - - - - The timestamp has to be in UTC (Coordinated - Universal Time) and not local time. - - - - - - - The timestamp has to be in UTC - (Coordinated Universal Time) - and not local time. - - - - - - - - - - - - - External reference of any kind - - - - - - - - Semantic labels / tags - - - - - - - Type of metadata (e.g. author) - - - - - - - - - - - - - - - E.g. imagePhotometricInterpretation - - - - - - E.g. RGB - - - - - - - - - - A semantic label / tag - - - - - - - - Reference to external model / ontology / schema - - - - - - - E.g. an RDF resource identifier - (to be used as subject or object of an RDF triple) - - - - - - - Prefix for all labels (e.g. first part of an URI) - - - - - - - - Semantic label - - - - - The label / tag (e.g. 'person'). - Can be an RDF resource identifier - (e.g. object of an RDF triple). - - - - - - - Additional information on the label - (e.g. 'YYYY-mm-dd' for a date label). - Can be used as predicate of an RDF triple. - - - - - - - - - - - - Alternative document page images - (e.g. black-and-white). - - - - - - - - - - Order of blocks within the page. - - - - - - Unassigned regions are considered to be in the - (virtual) default layer which is to be treated - as below any other layers. - - - - - - - - Default text style - - - - - - - Semantic labels / tags - - - - - - - - - - - - - - - - - - - - - - - - Contains the image file name including the file extension. - - - - - - Specifies the width of the image. - - - - - Specifies the height of the image. - - - - - Specifies the image resolution in width. - - - - - Specifies the image resolution in height. - - - - - - Specifies the unit of the resolution information - referring to a standardised unit of measurement - (pixels per inch, pixels per centimeter or other). - - - - - - - - - - - - - For generic use - - - - - - The angle the rectangle encapsulating the page - (or its Border) has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - (The rotated image can be further referenced - via “AlternativeImage”.) - Range: -179.999,180 - - - - - - - The type of the page within the document - (e.g. cover page). - - - - - - - The primary language used in the page - (lower-level definitions override the page-level definition). - - - - - - - The secondary language used in the page - (lower-level definitions override the page-level definition). - - - - - - - The primary script used in the page - (lower-level definitions override the page-level definition). - - - - - - - The secondary script used in the page - (lower-level definitions override the page-level definition). - - - - - - - The direction in which text within lines - should be read (order of words and characters), - in addition to “textLineOrder” - (lower-level definitions override the page-level definition). - - - - - - - The order of text lines within a block, - in addition to “readingDirection” - (lower-level definitions override the page-level definition). - - - - - - Confidence value for whole page (between 0 and 1) - - - - - - - Pure text is represented as a text region. This includes - drop capitals, but practically ornate text may be - considered as a graphic. - - - - - - - - - - - - - The angle the rectangle encapsulating the region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - (The rotated image can be further referenced - via “AlternativeImage”.) - Range: -179.999,180 - - - - - - - The nature of the text in the region - - - - - - - The degree of space in points between the lines of - text (line spacing) - - - - - - - The direction in which text within lines - should be read (order of words and characters), - in addition to “textLineOrder”. - - - - - - - The order of text lines within the block, - in addition to “readingDirection”. - - - - - - - The angle the baseline of text within the region - has to be rotated (relative to the rectangle - encapsulating the region) in clockwise direction - in order to correct the present skew, - in addition to “orientation” - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - - - - - - - Defines whether a region of text is indented or not - - - - - - Text align - - - - - - The primary language used in the region - - - - - - - The secondary language used in the region - - - - - - - The primary script used in the region - - - - - - - The secondary script used in the region - - - - - - - - - - - Point list with format "x1,y1 x2,y2 ..." - - - - - Confidence value (between 0 and 1) - - - - - - - - - Alternative text line images (e.g. - black-and-white) - - - - - - - - Multiple connected points that mark the baseline - of the glyphs - - - - - - - - - - - - - - Semantic labels / tags - - - - - - - - Overrides primaryLanguage attribute of parent text - region - - - - - - - The primary script used in the text line - - - - - - - The secondary script used in the text line - - - - - - - The direction in which text within the line - should be read (order of words and characters). - - - - - - - Overrides the production attribute of the parent - text region - - - - - - For generic use - - - - - - - Position (order number) of this text line within the - parent text region. - - - - - - - - - - Alternative word images (e.g. - black-and-white) - - - - - - - - - - - - - - - - Semantic labels / tags - - - - - - - - Overrides primaryLanguage attribute of parent line - and/or text region - - - - - - - The primary script used in the word - - - - - - - The secondary script used in the word - - - - - - - The direction in which text within the word - should be read (order of characters). - - - - - - - Overrides the production attribute of the parent - text line and/or text region. - - - - - - For generic use - - - - - - - - - - Alternative glyph images (e.g. - black-and-white) - - - - - - - - Container for graphemes, grapheme groups and - non-printing characters - - - - - - - - - - - - Semantic labels / tags - - - - - - - - - - The script used for the glyph - - - - - - - Overrides the production attribute of the parent - word / text line / text region. - - - - - - For generic use - - - - - - - - - - Text in a "simple" form (ASCII or extended ASCII - as mostly used for typing). I.e. no use of - special characters for ligatures (should be - stored as two separate characters) etc. - - - - - - - Correct encoding of the original, always using - the corresponding Unicode code point. I.e. - ligatures have to be represented as one - character etc. - - - - - - - - Used for sort order in case multiple TextEquivs are defined. - The text content with the lowest index should be interpreted - as the main text content. - - - - - - - - - - - OCR confidence value (between 0 and 1) - - - - - - Type of text content (is it free text or a number, for instance). - This is only a descriptive attribute, the text type - is not checked during XML validation. - - - - - - - Refinement for dataType attribute. Can be a regular expression, for instance. - - - - - - - - - - An image is considered to be more intricate and complex - than a graphic. These can be photos or drawings. - - - - - - - - The angle the rectangle encapsulating a region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - - - - - - - The colour bit depth required for the region - - - - - - - The background colour of the region - - - - - - - Specifies whether the region also contains - text - - - - - - - - - - A line drawing is a single colour illustration without - solid areas. - - - - - - - - The angle the rectangle encapsulating a region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - - - - - - - The pen (foreground) colour of the region - - - - - - - The background colour of the region - - - - - - - Specifies whether the region also contains - text - - - - - - - - - - Regions containing simple graphics, such as a company - logo, should be marked as graphic regions. - - - - - - - - The angle the rectangle encapsulating a region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - - - - - - - The type of graphic in the region - - - - - - - An approximation of the number of colours - used in the region - - - - - - - Specifies whether the region also contains - text. - - - - - - - - - - Tabular data in any form is represented with a table - region. Rows and columns may or may not have separator - lines; these lines are not separator regions. - - - - - - - - Table grid (visible or virtual grid lines) - - - - - - - The angle the rectangle encapsulating a region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - - - - - - - The number of rows present in the table - - - - - - - The number of columns present in the table - - - - - - - The colour of the lines used in the region - - - - - - - The background colour of the region - - - - - - - Specifies the presence of line separators - - - - - - - Specifies whether the region also contains - text - - - - - - - - - - Matrix of grid points defining the table grid on the page. - - - - - - - One row in the grid point matrix. - Points with x,y coordinates. - (note: for a table with n table rows there should be n+1 grid rows) - - - - - - - - Points with x,y coordinates. - - - - - The grid row index - - - - - - - - - Regions containing charts or graphs of any type, should - be marked as chart regions. - - - - - - - - The angle the rectangle encapsulating a region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - - - - - - - The type of chart in the region - - - - - - - An approximation of the number of colours - used in the region - - - - - - - The background colour of the region - - - - - - - Specifies whether the region also contains - text - - - - - - - - - - Separators are lines that lie between columns and - paragraphs and can be used to logically separate - different articles from each other. - - - - - - - - The angle the rectangle encapsulating a region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - - - - - - - The colour of the separator - - - - - - - - - - Regions containing equations and mathematical symbols - should be marked as maths regions. - - - - - - - - The angle the rectangle encapsulating a region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - - - - - - - The background colour of the region - - - - - - - - - - Regions containing chemical formulas. - - - - - - - - The angle the rectangle encapsulating a - region has to be rotated in clockwise - direction in order to correct the present - skew (negative values indicate - anti-clockwise rotation). Range: - -179.999,180 - - - - - - - The background colour of the region - - - - - - - - - - Regions containing maps. - - - - - - - - The angle the rectangle encapsulating a - region has to be rotated in clockwise - direction in order to correct the present - skew (negative values indicate - anti-clockwise rotation). Range: - -179.999,180 - - - - - - - - - - Regions containing musical notations. - - - - - - - - The angle the rectangle encapsulating a region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - - - - - - - The background colour of the region - - - - - - - - - - Regions containing advertisements. - - - - - - - - The angle the rectangle encapsulating a region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - - - - - - - The background colour of the region - - - - - - - - - - Noise regions are regions where no real data lies, only - false data created by artifacts on the document or - scanner noise. - - - - - - - - - - To be used if the region type cannot be ascertained. - - - - - - - - - - Regions containing content that is not covered - by the default types (text, graphic, image, - line drawing, chart, table, separator, maths, - map, music, chem, advert, noise, unknown). - - - - - - - - Information on the type of content represented by this region - - - - - - - - - - Determines the effective area on the paper of a printed page. - Its size is equal for all pages of a book - (exceptions: titlepage, multipage pictures). - It contains all living elements (except marginals) - like body type, footnotes, headings, running titles. - It does not contain pagenumber (if not part of running title), - marginals, signature mark, preview words. - - - - - - - - - - Definition of the reading order within the page. - To express a reading order between elements - they have to be included in an OrderedGroup. - Groups may contain further groups. - - - - - - - - - Confidence value (between 0 and 1) - - - - - - Numbered region - - - - Position (order number) of this item within the current hierarchy level. - - - - - - - - Indexed group containing ordered elements - - - - - - - Semantic labels / tags - - - - - - - - - - - - - Optional link to a parent region of nested regions. - The parent region doubles as reading order group. - Only the nested regions should be allowed as group members. - - - - - - - Position (order number) of this item within the - current hierarchy level. - - - - - - - - - Is this group a continuation of another group (from - previous column or page, for example)? - - - - - - For generic use - - - - - - - - Indexed group containing unordered elements - - - - - - - - Semantic labels / tags - - - - - - - - - - - - - Optional link to a parent region of nested regions. - The parent region doubles as reading order group. - Only the nested regions should be allowed as group members. - - - - - - - Position (order number) of this item within the - current hierarchy level. - - - - - - - - - Is this group a continuation of another group - (from previous column or page, for example)? - - - - - - For generic use - - - - - - - - - - - Numbered group (contains ordered elements) - - - - - - - - Semantic labels / tags - - - - - - - - - - - - - Optional link to a parent region of nested regions. - The parent region doubles as reading order group. - Only the nested regions should be allowed as group members. - - - - - - - - - Is this group a continuation of another group - (from previous column or page, for example)? - - - - - - For generic use - - - - - - - - Numbered group (contains unordered elements) - - - - - - - - Semantic labels / tags - - - - - - - - - - - - - Optional link to a parent region of nested regions. - The parent region doubles as reading order group. - Only the nested regions should be allowed as group members. - - - - - - - - - Is this group a continuation of another group - (from previous column or page, for example)? - - - - - - For generic use - - - - - - - - Border of the actual page (if the scanned image - contains parts not belonging to the page). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ISO 639.x 2016-07-14 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - iso15924 2016-07-14 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Can be used to express the z-index of overlapping - regions. An element with a greater z-index is always in - front of another element with lower z-index. - - - - - - - - - - - - - - - - - - - - - - Confidence value (between 0 and 1) - - - - - - - Point list with format "x1,y1 x2,y2 ..." - - - - - - - - - - Container for one-to-one relations between layout - objects (for example: DropCap - paragraph, caption - - image). - - - - - - - - - - - One-to-one relation between to layout object. Use 'link' - for loose relations and 'join' for strong relations - (where something is fragmented for instance). - - Examples for 'link': caption - image floating - - paragraph paragraph - paragraph (when a paragraph is - split across columns and the last word of the first - paragraph DOES NOT continue in the second paragraph) - drop-cap - paragraph (when the drop-cap is a whole word) - - Examples for 'join': word - word (separated word at the - end of a line) drop-cap - paragraph (when the drop-cap - is not a whole word) paragraph - paragraph (when a - pragraph is split across columns and the last word of - the first paragraph DOES continue in the second - paragraph) - - - - - - Semantic labels / tags - - - - - - - - - - - - - - - - - - - For generic use - - - - - - - - Text production type - - - - - - - - - - - - - - - Monospace (fixed-pitch, non-proportional) or - proportional font. - - - - - - For instance: Arial, Times New Roman. - Add more information if necessary - (e.g. blackletter, antiqua). - - - - - - - Serif or sans-serif typeface. - - - - - - - - The size of the characters in points. - - - - - - - The x-height or corpus size refers to the distance - between the baseline and the mean line of - lower-case letters in a typeface. - The unit is assumed to be pixels. - - - - - - - The degree of space (in points) between - the characters in a string of text. - - - - - - - - Text colour in RGB encoded format - (red value) + (256 x green value) + (65536 x blue value). - - - - - - Background colour - - - - - - Background colour in RGB encoded format - (red value) + (256 x green value) + (65536 x blue value). - - - - - - - Specifies whether the colour of the text appears - reversed against a background colour. - - - - - - - - - Line style details if "underlined" is TRUE - - - - - - - - - - - - - - - - - Alternative region images - (e.g. black-and-white). - - - - - - - - - Semantic labels / tags - - - - - - Roles the region takes - (e.g. in context of a parent region). - - - - - - - - - - - - - - - - - - - - - - - - For generic use - - - - - - - Is this region a continuation of another region - (in previous column or page, for example)? - - - - - - - - - - - Confidence value (between 0 and 1) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Examples: - "123.456", "+1234.456", - "-1234.456", "-.456", "-456" - - - - - - - Examples: - "123.456", "+1234.456", "-1.2344e56", - "-.45E-6", "INF", "-INF", "NaN" - - - - - - - Examples: - "123456", "+00000012", "-1", "-456" - - - - - - - Examples: "true", "false", "1", "0" - - - - - - - Examples: - "2001-10-26", "2001-10-26+02:00", - "2001-10-26Z", "2001-10-26+00:00", - "-2001-10-26", "-20000-04-01" - - - - - - - Examples: - "21:32:52", "21:32:52+02:00", "19:32:52Z", - "19:32:52+00:00", "21:32:52.12679" - - - - - - - Examples: - "2001-10-26T21:32:52", "2001-10-26T21:32:52+02:00", - "2001-10-26T19:32:52Z", "2001-10-26T19:32:52+00:00", - "-2001-10-26T21:32:52", "2001-10-26T21:32:52.12679" - - - - - - Generic text string - - - - - - An XSD type that is not listed or a custom type - (use dataTypeDetails attribute). - - - - - - - - - - - - Container for graphemes, grapheme groups and - non-printing characters. - - - - - - - - - - - - Base type for graphemes, grapheme groups and non-printing characters. - - - - - - - - - - Order index of grapheme, group, or non-printing character - within the parent container (graphemes or glyph or grapheme group). - - - - - - - - - - - - - Type of character represented by the - grapheme, group, or non-printing character element. - - - - - - - - - - - - For generic use - - - - - For generic use - - - - - - - Represents a sub-element of a glyph. - Smallest graphical unit that can be - assigned a Unicode code point. - - - - - - - - - - - - - - A glyph component without visual representation - but with Unicode code point. - Non-visual / non-printing / control character. - Part of grapheme container (of glyph) or grapheme sub group. - - - - - - - - - - - - - - - - - - - - - Container for user-defined attributes - - - - - - - - - Structured custom data defined by name, type and value. - - - - - - - - - - - - - - - - - - - - Cell position in table starting with row 0 - - - - - Cell position in table starting with column 0 - - - - - Number of rows the cell spans (optional; default is 1) - - - - - Number of columns the cell spans (optional; default is 1) - - - - - - Is the cell a column or row header? - - - - - - - - - - Data for a region that takes on the role - of a table cell within a parent table region. - - - - - - - - - - - - -