diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5f96ee7e3377d01c9eb188520bed08d099f23469..1f9a3072d826769442f390d1cc718db9d640c604 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -276,6 +276,7 @@ get-collation-results: - 'curl --output jobs.json "https://gitlab.gwdg.de/api/v4/projects/$COLLATEX_REPO_ID/pipelines/$PIPELINE_ID/jobs?private_token=$GITLAB_TOKEN"' - RELEVANT_JOB_ID=$(jq ".[]|select(.stage==\"summarize\")" < jobs.json | jq .id) - 'curl --output collation-results.zip "https://gitlab.gwdg.de/api/v4/projects/$COLLATEX_REPO_ID/jobs/$RELEVANT_JOB_ID/artifacts?private_token=$GITLAB_TOKEN"' + - rm -rf backend/exist-app/data/collation-results - unzip -u -d backend/exist-app/data/collation-results collation-results.zip - cd backend - git add exist-app/data/collation-results/* && git commit -m "update collation results" && git push diff --git a/CHANGELOG.md b/CHANGELOG.md index e96cdfd859a2322c12bbae395d5a322732e004ff..840935996c76753768c652236c3aa7001fbe85bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [5.11.0] - 2021-04-27 + +### Added + +- the variants have been added to the AnnotationAPI. + ## [5.10.0] - 2021-04-22 ### Changed diff --git a/exist-app/build.properties b/exist-app/build.properties index 64537ba59d850e01f546d92943a647038d0ed827..62f71104d5e62adf7f1eb95879b7d2fec943d32c 100644 --- a/exist-app/build.properties +++ b/exist-app/build.properties @@ -1,5 +1,5 @@ project.name=https://ahikar-test.sub.uni-goettingen.de/ -project.version=5.10.0 +project.version=5.11.0 project.title=Ahiqar project.abbrev=ahikar-test project.processorversion=5.2.0 diff --git a/exist-app/modules/AnnotationAPI/annotations-variants.xqm b/exist-app/modules/AnnotationAPI/annotations-variants.xqm new file mode 100644 index 0000000000000000000000000000000000000000..edafce0410f0c943b026dc64ad26ae9c0e666b18 --- /dev/null +++ b/exist-app/modules/AnnotationAPI/annotations-variants.xqm @@ -0,0 +1,296 @@ +xquery version "3.1"; + +(:~ + : This module is responsible for retrieving the variants of a given text on a + : given page. For this, it leverages the tokenization of the text that takes + : place during commons:get-page-fragment. + : + : The output of the main function, vars:get-variants, is a sequence of maps of + : the following format: + : + : map { + : "body" + : map { + : "x-content-type" : "Variant", + : "value" : [ map { + : "entry" : "${text-of-respective-witness}", + : "witness" : "${ms-id}" + : }], + : "format" : "text/plain", + : "type" : "TextualBody" + : }, + : "target" : map { + : "format" : "text/xml", + : "language" : "syc", + : "id" : "http://ahikar.sub.uni-goettingen.de/ns/annotations/${teixml-uri}/${token-id}" + : }, + : "type" : "Annotation", + : "id" : "http://ahikar.sub.uni-goettingen.de/ns/annotations/${teixml-uri}/annotation-variants-${token-id}" + : } + : + :) + +module namespace vars="http://ahikar.sub.uni-goettingen.de/ns/annotations/variants"; + +declare namespace tei="http://www.tei-c.org/ns/1.0"; + +import module namespace commons="http://ahikar.sub.uni-goettingen.de/ns/commons" at "../commons.xqm"; +import module namespace functx = "http://www.functx.com"; + +declare variable $vars:ns := "http://ahikar.sub.uni-goettingen.de/ns/annotations"; + +declare function vars:get-variants($teixml-uri as xs:string, + $page as xs:string) +as map(*)* { + let $variants := vars:get-variants-on-page-as-maps($teixml-uri, $page) + + for $map in $variants + let $id := map:get($map, "current") => map:get("id") + return + map { + "id": $vars:ns || "/" || $teixml-uri || "/annotation-variants-" || $id, + "type": "Annotation", + "body": vars:get-body-object($map), + "target": vars:get-target-information($map, $teixml-uri, $id) + } +}; + +declare function vars:get-body-object($map as map(*)) +as map() { + map { + "type": "TextualBody", + "value": vars:make-annotation-value($map), + "format": "text/plain", + "x-content-type": "Variant" + } +}; + +declare function vars:make-annotation-value($map as map(*)) +as map(*)+ { + for $variant in map:get($map, "variants") return + let $entry := map:get($variant, "entry") + let $witness := map:get($variant, "witness") + return + map { + "entry": if ($entry instance of xs:string) then $entry else "omisit", + "witness": map:get($commons:idno-to-sigils-map, $witness) + } +}; + +declare function vars:get-target-information($map as map(*), + $teixml-uri as xs:string, + $id as xs:string) +as map(*) { + map { + "id": $vars:ns || "/" || $teixml-uri || "/"|| $id, + "format": "text/xml", + "language": vars:get-target-language($teixml-uri) + } +}; + +declare function vars:get-target-language($teixml-uri as xs:string) +as xs:string { + let $doc := commons:open-tei-xml($teixml-uri) + let $language := $doc//tei:text[@xml:lang][matches(descendant::text(), "[\w]")]/@xml:lang + return + $language +}; + +(:~ + : Returns a map holding the current token as well as all its resp. variants per + : page. The exact value of the current token depends on the manuscript we are + : currently looking at. + : + : @param $teixml-uri The current TEI document's base URI, e.g. "12345" + : @param $page The page as given in tei:pb/@n + : @return A map with information about the current token and its variants + :) +declare function vars:get-variants-on-page-as-maps($teixml-uri as xs:string, + $page as xs:string) +as map()* { + let $ms-id := vars:get-ms-id-from-idno($teixml-uri) + let $relevant-files-for-ms-id := vars:get-relevant-files($ms-id) + (: the MS identifier position is the same in all relevant files, so we simply + choose the first for looking it up :) + let $ms-id-position := vars:determine-id-position($ms-id, $relevant-files-for-ms-id[1]) + let $tokens := vars:get-token-ids-on-page($teixml-uri, $page) + let $files-relevant-for-page := vars:get-files-relevant-for-page($relevant-files-for-ms-id, $ms-id-position, $tokens) + + for $file in $files-relevant-for-page return + let $table := map:get($file, "table") + let $no-of-sequence := array:size($table) + let $indices-relevant-for-page := vars:get-indices-relevant-for-page($table, $no-of-sequence, $ms-id-position, $tokens) + let $non-ms-id-positions := vars:get-non-ms-id-positions-in-array($file, $ms-id-position) + + for $iii in $indices-relevant-for-page return + vars:make-map-for-token($file, $table, $iii, $ms-id-position, $non-ms-id-positions) +}; + + +(:~ + : Each token (= relevant word) is encoded in a tei:w which has an @xml:id + : attribute. + : + : @param $teixml-uri The base URI of the document, e.g. "12345" + : @param $page The current page as provided in tei:pb/@n + :) +declare function vars:get-token-ids-on-page($teixml-uri as xs:string, + $page as xs:string) +as xs:string+ { + let $page-chunks := commons:get-transcription-and-transliteration-per-page($teixml-uri, $page) + return + $page-chunks//tei:w/@xml:id +}; + +declare function vars:get-ms-id-from-idno($teixml-uri as xs:string) +as xs:string { + let $TEI := commons:open-tei-xml($teixml-uri)//tei:TEI + return + commons:make-id-from-idno($TEI) +}; + +(:~ + : In the CollateX results, we first have an aray of witnesses before the result + : of the collation is given in the JSON file. The position of a MS identifier + : in this array is reflected in the collation result; The witness on position 2 + : will always have its tokens in the second position of the result. + : + : @param $ms-id The MS identifier as given in the array of witnesses + : @param $json The collation result as JSON + : @return The position of the current MS identifier in the witnesses array + :) +declare function vars:determine-id-position($ms-id as xs:string, + $json as map(*)) +as xs:integer { + let $witnesses := map:get($json, "witnesses") + return + index-of($witnesses?*, $ms-id) +}; + +declare function vars:get-relevant-files($ms-id as xs:string) +as item()+ { + let $collation-collection := collection("/db/apps/ahikar/data/collation-results") + let $relevant-base-uris := + for $doc in $collation-collection return + if(matches(base-uri($doc), replace($ms-id, "/", "")) + and matches(base-uri($doc), "json")) then + base-uri($doc) + else + () + for $uri in $relevant-base-uris return + util:binary-doc($uri) + => util:base64-decode() + => parse-json() +}; + + +declare function vars:get-files-relevant-for-page($relevant-files-for-ms-id as map()+, + $ms-id-position as xs:integer, + $tokens as xs:string+) +as map()+ { + let $first-token := $tokens[1] + let $last-token := $tokens[last()] + for $file in $relevant-files-for-ms-id return + let $table := map:get($file, "table") + let $no-of-sequences := array:size($table) + for $iii in 1 to $no-of-sequences return + let $sequence-entry := $table?($iii) + let $ms-id-entry := $sequence-entry?($ms-id-position) + let $this-manuscripts-ids := + if (array:size($ms-id-entry) gt 0) then + $ms-id-entry?(1) + => map:get("id") + else + () + return + if ($this-manuscripts-ids = ($first-token, $last-token)) then + $file + else + () +}; + +declare function vars:get-witness($file as map(), + $witness-position as xs:integer) +as xs:string { + map:get($file, "witnesses") + => array:get($witness-position) +}; + +declare function vars:get-non-ms-id-positions-in-array($file as map(), + $witness-position as xs:integer) +as xs:integer+ { + let $no-of-witnesses := map:get($file, "witnesses") => array:size() + let $no-of-witnesses-as-sequence := + for $iii in 1 to $no-of-witnesses return + $iii + return + functx:value-except($no-of-witnesses-as-sequence, $witness-position) +}; + +declare function vars:get-witness-entry($table as array(*), + $entry-no as xs:integer, + $witness-position as xs:integer) +as array(*) { + let $sequence-entry := $table?($entry-no) + return + $sequence-entry?($witness-position) +}; + +(:~ + : Returns a sequence of index positions that are relevant for the current witness. + : If a witness doesn't have an entry in the collation result or if the entry is + : not on the current page, no index position is returned. + : + : @param $table The CollateX results + : @param $no-of-sequences The number of result sequences in $table + : @param $ms-id-position An integer determining which position within the result entry is relevant for the current witness + : @param $tokens A list of tokens relevant for the current page + :) +declare function vars:get-indices-relevant-for-page($table as array(*), + $no-of-sequences as xs:integer, + $ms-id-position as xs:integer, + $tokens as xs:string+) +as xs:integer+ { + for $iii in 1 to $no-of-sequences return + let $sequence-entry := $table?($iii) + let $ms-id-entry := $sequence-entry?($ms-id-position) + return + if (array:size($ms-id-entry) gt 0 + and array:get($ms-id-entry, 1) => map:get("id") = $tokens) then + $iii + else + () +}; + +(:~ + : Creates a map in for every token on a page in which all variants are listed. + : + : @param $table The CollateX results + : @param $no-of-sequences The number of result sequences in $table + : @param $ms-id-position An integer determining which position within the result entry is relevant for the current witness + : @param $tokens A list of tokens relevant for the current page + :) +declare function vars:make-map-for-token($file as map(), + $table as array(*), + $entry-pos as xs:integer, + $ms-id-position as xs:integer, + $non-ms-id-positions as xs:integer*) +as map(*) { + let $sequence-entry := $table?($entry-pos) + let $ms-id-entry := $sequence-entry?($ms-id-position) + let $tokens-in-sequence := array:size($ms-id-entry) + for $token-no in $tokens-in-sequence return + map { + "current": $sequence-entry?($ms-id-position) => array:get($token-no), + "variants": + for $jjj in $non-ms-id-positions return + map { + "witness": vars:get-witness($file, $jjj), + "entry": + if ($sequence-entry?($jjj) => array:size() ge $token-no) then + $sequence-entry?($jjj) => array:get($token-no) => map:get("t") + else + [] + } + } +}; diff --git a/exist-app/modules/AnnotationAPI/annotations.xqm b/exist-app/modules/AnnotationAPI/annotations.xqm index 26e5877f6c63292743daf7a29f48ada2ad92b32d..45912a95bec7eb99e0eca42c3d71cede6e7cd7d5 100644 --- a/exist-app/modules/AnnotationAPI/annotations.xqm +++ b/exist-app/modules/AnnotationAPI/annotations.xqm @@ -18,6 +18,7 @@ declare namespace tgmd="http://textgrid.info/namespaces/metadata/core/2010"; import module namespace commons="http://ahikar.sub.uni-goettingen.de/ns/commons" at "../commons.xqm"; import module namespace functx = "http://www.functx.com"; +import module namespace vars="http://ahikar.sub.uni-goettingen.de/ns/annotations/variants" at "annotations-variants.xqm"; declare variable $anno:ns := "http://ahikar.sub.uni-goettingen.de/ns/annotations"; @@ -263,8 +264,8 @@ as map() { anno:find-in-map($anno:uris, $document) => anno:get-all-xml-uris-for-submap() let $annotations := for $xml in $xmls return - for $page in anno:get-pages-in-TEI($xml)return - anno:get-annotations($xml, $page) + for $page in anno:get-pages-in-TEI($xml) return + anno:make-complete-annotations($xml, $page) return map { @@ -374,7 +375,7 @@ as map() { "next": $nextPageURL, "prev": $prevPageURL, "startIndex": anno:determine-start-index-for-page($document, $page), - "items": anno:get-annotations($xml, $page) + "items": anno:make-complete-annotations($xml, $page) } } }; @@ -392,15 +393,7 @@ as map() { declare function anno:get-annotations($teixml-uri as xs:string, $page as xs:string) as map()* { - let $xml-doc := commons:open-tei-xml($teixml-uri) - let $langs := $xml-doc//tei:text[@xml:lang[. = ("syc", "ara", "karshuni")]]/@xml:lang/string() - let $pageChunks := - if ($langs = "karshuni") then - (anno:get-page-fragment($teixml-uri, $page, "transcription"), - anno:get-page-fragment($teixml-uri, $page, "transliteration")) - else - anno:get-page-fragment($teixml-uri, $page, "transcription") - + let $pageChunks := commons:get-transcription-and-transliteration-per-page($teixml-uri, $page) let $annotation-elements := for $chunk in $pageChunks return @@ -418,23 +411,6 @@ as map()* { } }; - -(:~ - : Returns a single page from a TEI resource, i.e. all content from the given $page - : up to the next page break. - : - : @param $documentURI The resource's URI. Attention: This refers to the TEI file itself! - : @param $page The page to be returned as tei:pb/@n/string() - :) -declare function anno:get-page-fragment($documentURI as xs:string, - $page as xs:string, - $text-type as xs:string) -as element(tei:TEI)? { - let $nodeURI := commons:get-document($documentURI, "data")/base-uri() - return - commons:get-page-fragment($nodeURI, $page, $text-type) -}; - (:~ : Returns the Body Object for an annotation. : @@ -918,3 +894,22 @@ as xs:integer { return sum($noOfAnnotationsPerElement) }; + +(:~ + : Gets a complete list of all relevant annotations per page. + : + : @param $teixml-uri The current document's URI, e.g. "12345" + : @param $page The current page + : @return A sequence of maps containing the annotations + :) +declare function anno:make-complete-annotations($teixml-uri as xs:string, + $page as xs:string) +as map(*)* { + anno:get-annotations($teixml-uri, $page), + (: the sample data has to be excluded here since it doesn't have any variants + and no CollateX output. :) + if (starts-with(map:get($anno:lang-aggs, "syriac"), "sample")) then + () + else + vars:get-variants($teixml-uri, $page) +}; \ No newline at end of file diff --git a/exist-app/modules/commons.xqm b/exist-app/modules/commons.xqm index 6f9b3aeb9f40c59eb5d814b6c58bc2f228287ce6..11acf5ca8a55d45b30a6209558e5c3b9bcdf76e1 100644 --- a/exist-app/modules/commons.xqm +++ b/exist-app/modules/commons.xqm @@ -22,6 +22,49 @@ declare variable $commons:appHome := "/db/apps/ahikar"; declare variable $commons:ns := "http://ahikar.sub.uni-goettingen.de/ns/commons"; +declare variable $commons:idno-to-sigils-map := + map { + "Borg_ar_201": "Borg. ar. 201", + "Add_2020": "C", + "Sachau_162": "S", + "syr_611": "K", + "syr_612": "I", + "syr_434": "B", + "Add_7200": "L", + "Brit_Mus_Add_7209": "Brit. Add. 7209", + "Brit_Libr_Or_9321": "Brit. Or. 9321", + "Cambrigde_Add_3497": "Cam. Add. 3497", + "Camb_Add_2886": "Cam. Add. 2886", + "Cod_Arab_236": "Cod. Arab. 236", + "Paris_Arabe_3637": "Paris. ar. 3637", + "Sachau_290_Sachau_339": "Sach. 339", + "DFM_00614": "DFM 614", + "GCAA_00486": "GCAA 486", + "Mingana_syr_133": "Ming. syr. 133", + "Mingana_ar_christ_93_84": "Ming. ar. 93", + "Mingana_Syr_258": "Ming. syr. 258", + "433": "M", + "Ms_orient_A_2652": "Gotha 2652", + "430": "D", + "Or_1292b": "Leiden Or. 1292", + "Paris_Arabe_3656": "Paris. ar. 3656", + "syr_422": "N", + "Sado_no_9": "P", + "Salhani": "Salhani", + "Sbath_25": "Sbath 25", + "Ar_7/229": "A", + "Manuscrit_4122": "T", + "Or_2313": "O", + "Syr_80": "H", + "162": "J", + "Sachau_336": "U", + "Vat_ar_74_Scandar_40": "Vat. ar. 74", + "Vat_ar_2054": "Vat. ar. 2054", + "Vat_sir_159": "Vat. syr. 159", + "Vat_sir_199": "Vat. syr. 199", + "Vat_sir_424": "Vat. syr. 424" + }; + declare variable $commons:responseHeader200 := <rest:response> <http:response xmlns:http="http://expath.org/ns/http-client" status="200"> @@ -75,6 +118,35 @@ as xs:string* { () }; +declare function commons:get-transcription-and-transliteration-per-page($teixml-uri as xs:string, + $page as xs:string) +as element(tei:TEI)+ { + let $xml-doc := commons:open-tei-xml($teixml-uri) + let $langs := $xml-doc//tei:text[@xml:lang[. = ("syc", "ara", "karshuni")]]/@xml:lang/string() + return + if ($langs = "karshuni") then + (commons:get-page-fragment-from-uri($teixml-uri, $page, "transcription"), + commons:get-page-fragment-from-uri($teixml-uri, $page, "transliteration")) + else + commons:get-page-fragment-from-uri($teixml-uri, $page, "transcription") +}; + +(:~ + : Returns a single page from a TEI resource, i.e. all content from the given $page + : up to the next page break. + : + : @param $documentURI The resource's URI + : @param $page The page to be returned as tei:pb/@n/string() + :) +declare function commons:get-page-fragment-from-uri($documentURI as xs:string, + $page as xs:string, + $text-type as xs:string) +as element(tei:TEI)? { + let $nodeURI := commons:get-document($documentURI, "data")/base-uri() + return + commons:get-page-fragment($nodeURI, $page, $text-type) +}; + (:~ : Returns a given page from a requested TEI document and from the requested text type. : In some cases the requested text type isn't available or doesn't have any text, so that @@ -91,8 +163,9 @@ declare function commons:get-page-fragment($tei-xml-base-uri as xs:string, as element() { if (local:has-text-content($tei-xml-base-uri, $page, $text-type)) then let $node := doc($tei-xml-base-uri)/tei:TEI + => tokenize:main() => commons:add-IDs() - => tokenize:main(), + , $start-node := $node//tei:text[@type = $text-type]//tei:pb[@n = $page], $end-node := commons:get-end-node($start-node), $wrap-in-first-common-ancestor-only := false(), @@ -235,7 +308,7 @@ declare %private function local:create-textgrid-session-id() { declare function commons:compress-to-zip($collection-uri as xs:string) as xs:string* { - if (commons:does-zip-need-update()) then +(: if (commons:does-zip-need-update()) then:) let $valid-uris := for $doc in collection($collection-uri) return if (contains(base-uri($doc), "sample")) then @@ -245,11 +318,11 @@ as xs:string* { let $zip := compression:zip($valid-uris, false()) return ( - commons:make-last-zip-created(), +(: commons:make-last-zip-created(),:) xmldb:store-as-binary("/db/data", "ahikar-json.zip", $zip) ) - else - () +(: else:) +(: ():) }; declare function commons:does-zip-need-update() diff --git a/exist-app/modules/tei2json.xqm b/exist-app/modules/tei2json.xqm index a656d7445f433d0b6deddeefe37d4284a904aaf4..3757d0443d72232d5eb02af3cc826736871eae75 100644 --- a/exist-app/modules/tei2json.xqm +++ b/exist-app/modules/tei2json.xqm @@ -106,18 +106,23 @@ declare variable $tei2json:lines-of-transmission := declare function tei2json:main() as xs:string+ { - tei2json:create-json-collection-if-not-available(), + tei2json:remove-old-jsons(), + tei2json:create-json-collection(), tei2json:tokenize-teis() => tei2json:make-jsons-per-section-and-transmission-line() }; +declare function tei2json:remove-old-jsons() +as item()? { + if (xmldb:collection-available($commons:tg-collection || "/json")) then + xmldb:remove($commons:tg-collection || "/json") + else + () +}; -declare function tei2json:create-json-collection-if-not-available() +declare function tei2json:create-json-collection() as xs:string { - if (xmldb:collection-available($commons:json)) then - $commons:json - else - xmldb:create-collection($commons:tg-collection, "json") + xmldb:create-collection($commons:tg-collection, "json") }; diff --git a/exist-app/modules/testtrigger.xqm b/exist-app/modules/testtrigger.xqm index 1de99e1d3f12de1c3bb1d2d81127d32210ad8630..876318aed23c589f32faf31b92714b60b20183bc 100644 --- a/exist-app/modules/testtrigger.xqm +++ b/exist-app/modules/testtrigger.xqm @@ -15,6 +15,7 @@ import module namespace rest="http://exquery.org/ns/restxq"; import module namespace test="http://exist-db.org/xquery/xqsuite" at "resource:org/exist/xquery/lib/xqsuite/xqsuite.xql"; import module namespace at="http://ahikar.sub.uni-goettingen.de/ns/annotations/tests" at "../tests/annotation-tests.xqm"; +import module namespace av="http://ahikar.sub.uni-goettingen.de/ns/annotations/variants/tests" at "../tests/annotation-variants-tests.xqm"; import module namespace art="http://ahikar.sub.uni-goettingen.de/ns/annotations/rest/tests" at "../tests/annotation-rest-tests.xqm"; import module namespace ct="http://ahikar.sub.uni-goettingen.de/ns/commons-tests" at "../tests/commons-tests.xqm"; import module namespace tct="http://ahikar.sub.uni-goettingen.de/ns/tapi/collection/tests" at "../tests/tapi-collection-tests.xqm"; @@ -79,6 +80,7 @@ as element()+ { test:suite(util:list-functions("http://ahikar.sub.uni-goettingen.de/ns/tei2html-tests")), test:suite(util:list-functions("http://ahikar.sub.uni-goettingen.de/ns/tei2html-textprocessing-tests")), test:suite(util:list-functions("http://ahikar.sub.uni-goettingen.de/ns/annotations/tests")), + test:suite(util:list-functions("http://ahikar.sub.uni-goettingen.de/ns/annotations/variants/tests")), test:suite(util:list-functions("http://ahikar.sub.uni-goettingen.de/ns/tapi/txt/normalization/tests")), test:suite(util:list-functions("http://ahikar.sub.uni-goettingen.de/ns/annotations/rest/tests")), test:suite(util:list-functions("http://ahikar.sub.uni-goettingen.de/ns/tapi/images/tests")), @@ -126,6 +128,7 @@ as xs:string? { case "http://ahikar.sub.uni-goettingen.de/ns/tei2html-textprocessing-tests" return "TEI2HTML text processing" case "http://ahikar.sub.uni-goettingen.de/ns/tapi/txt/normalization/tests" return "TXT normalization" case "http://ahikar.sub.uni-goettingen.de/ns/annotations/tests" return "AnnotationAPI" + case "http://ahikar.sub.uni-goettingen.de/ns/annotations/variants/tests" return "Annotation variants" case "http://ahikar.sub.uni-goettingen.de/ns/annotations/rest/tests" return "AnnotationAPI REST" case "http://ahikar.sub.uni-goettingen.de/ns/tapi/images/tests" return "Image Sections" case "http://ahikar.sub.uni-goettingen.de/ns/tokenize/tests" return "Tokenize" diff --git a/exist-app/tests/annotation-tests.xqm b/exist-app/tests/annotation-tests.xqm index 9c19518907d3ba994098557bb4b29fa5a726e99e..acd87dbcd3946fcc0b11cc98c5823ec689060500 100644 --- a/exist-app/tests/annotation-tests.xqm +++ b/exist-app/tests/annotation-tests.xqm @@ -137,15 +137,6 @@ as map() { anno:make-annotationCollection-for-manifest($collection, $document, $page, $server) }; -declare - %test:args("sample_teixml", "82a", "transcription") %test:assertXPath("$result//* = 'حقًا'") - %test:args("sample_teixml", "82a", "transliteration") %test:assertXPath("$result//* = 'الحاسوب'") -function at:get-page-fragment($documentURI as xs:string, - $page as xs:string, - $text-type as xs:string) -as element(tei:TEI) { - anno:get-page-fragment($documentURI, $page, $text-type) -}; declare %test:args("sample_main_edition") %test:assertEquals("476") @@ -292,7 +283,7 @@ as map() { declare %test:args("sample_teixml", "84a") - %test:assertXPath("$result = 'http://ahikar.sub.uni-goettingen.de/ns/annotations/sample_teixml/annotation-N4.4.2.4.4.354.2'") + %test:assertXPath("$result = 'http://ahikar.sub.uni-goettingen.de/ns/annotations/sample_teixml/annotation-N1.2.1.2.3.100.3'") function at:get-annotations-detailed-id($teixml-uri as xs:string, $page as xs:string) as xs:string { diff --git a/exist-app/tests/annotation-variants-tests.xqm b/exist-app/tests/annotation-variants-tests.xqm new file mode 100644 index 0000000000000000000000000000000000000000..500dc930ab5c6fdead99522f9ef90b0ff9231280 --- /dev/null +++ b/exist-app/tests/annotation-variants-tests.xqm @@ -0,0 +1,165 @@ +xquery version "3.1"; + +module namespace t="http://ahikar.sub.uni-goettingen.de/ns/annotations/variants/tests"; + +import module namespace test="http://exist-db.org/xquery/xqsuite" at "resource:org/exist/xquery/lib/xqsuite/xqsuite.xql"; +import module namespace vars="http://ahikar.sub.uni-goettingen.de/ns/annotations/variants" at "../modules/AnnotationAPI/annotations-variants.xqm"; + +declare variable $t:sample-map := + map { + "variants": (map { + "entry": "ܐܘ", + "witness": "430" + },map { + "entry": "ܕܐܚܛܛ", + "witness": "syr_422" + },map { + "entry": [], + "witness": "syr_611" + },map { + "entry": [], + "witness": "syr_612" + }), + "current": map { + "t": "ܐܡܪ", + "id": "syr_434_N4.4.2.6.4.8.1.3_1" + } + }; + +declare variable $t:sample-file := + util:binary-doc("/db/apps/ahikar/data/collation-results/ara-karshuni_Sbath_25_Vat_sir_424_Vat_sir_199_parables_result.json") + => util:base64-decode() + => parse-json(); + +declare + %test:args("sample_teixml", "82a") %test:assertXPath("count($result) = 349") +function t:get-token-ids-on-page($teixml-uri as xs:string, + $page as xs:string) +as xs:string+ { + vars:get-token-ids-on-page($teixml-uri, $page) +}; + + +declare + %test:args("sample_teixml") %test:assertEquals("Add_2020") +function t:get-ms-id-from-idno($teixml-uri as xs:string) +as xs:string { + vars:get-ms-id-from-idno($teixml-uri) +}; + +declare + %test:args("Sachau_336") %test:assertXPath("count($result) = 5") +function t:get-relevant-files($ms-id as xs:string) +as item()+ { + vars:get-relevant-files($ms-id) +}; + +declare + %test:args("Ar_7/229") %test:assertEquals("4") +function t:determine-id-position($ms-id as xs:string) +as xs:integer { + let $json := vars:get-relevant-files($ms-id) + return + vars:determine-id-position($ms-id, $json[1]) +}; + +declare + %test:assertEquals("Variant") +function t:get-body-object() { + vars:get-body-object($t:sample-map) + => map:get("x-content-type") +}; + +declare + %test:assertXPath("count($result) = 2") +function t:get-files-relevant-for-page() { + let $relevant-files-for-ms-id := vars:get-relevant-files("Sbath_25") + let $ms-id-position := 1 + let $tokens := ("Sbath_25_N4.4.2.4.4.2552.1_1", "Sbath_25_N4.4.2.4.4.500.1_1") + return + vars:get-files-relevant-for-page($relevant-files-for-ms-id, $ms-id-position, $tokens) +}; + +declare + %test:assertEquals("1") +function t:get-indices-relevant-for-page() { + let $table := + $t:sample-file + => map:get("table") + let $no-of-sequences := array:size($table) + let $ms-id-position := 1 + let $tokens := ("Sbath_25_N4.4.2.4.4.2276.1_1", "Sbath_25_N4.4.2.4.4.2276.2.1_1", "Sbath_25_N4.4.2.4.4.2276.3_1", "Sbath_25_N4.4.2.4.4.2276.3_2", "Sbath_25_N4.4.2.4.4.2276.3_3") + return + vars:get-indices-relevant-for-page($table, $no-of-sequences, $ms-id-position, $tokens) +}; + +declare + %test:assertXPath("count($result) = 2 and $result = (2, 3)") +function t:get-non-ms-id-positions-in-array() { + vars:get-non-ms-id-positions-in-array($t:sample-file, 1) +}; + +declare + %test:assertXPath("$result = ('ܕܐܚܛܛ', 'N')") +function t:get-target-information() { + vars:make-annotation-value($t:sample-map)[2] + => map:get("entry"), + vars:make-annotation-value($t:sample-map)[2] + => map:get("witness") +}; + +declare + %test:assertEquals("ara") +function t:get-target-language() { + vars:get-target-language("kant_sample_teixml") +}; + +declare + %test:assertEquals("Sbath_25") +function t:get-witness() { + vars:get-witness($t:sample-file, 1) +}; + +declare + %test:assertEquals("5") +function t:get-witness-entry() { + let $table := + $t:sample-file + => map:get("table") + let $entry-no := "1" + let $witness-position := "1" + return + vars:get-witness-entry($table, $entry-no, $witness-position) + => array:size() +}; + +declare + %test:assertEquals("D") +function t:make-annotation-value() { + vars:make-annotation-value($t:sample-map)[1] + => map:get("witness") +}; + +declare + %test:assertXPath("$result = ('Sbath_25_N4.4.2.4.4.2276.3_3', 'Vat_sir_424', 'Vat_sir_199')") +function t:make-map-for-token() { + let $table := map:get($t:sample-file, "table") + let $entry-pos := 1 + let $ms-id-position := 1 + let $non-ms-id-positions := (2, 3) + let $current := + vars:make-map-for-token($t:sample-file, $table, $entry-pos, $ms-id-position, $non-ms-id-positions) + => map:get("current") + => map:get("id") + let $variants := + vars:make-map-for-token($t:sample-file, $table, $entry-pos, $ms-id-position, $non-ms-id-positions) + => map:get("variants") + return + ( + $current, + $variants[1] => map:get("witness"), + $variants[2] => map:get("witness") + + ) + +}; \ No newline at end of file diff --git a/exist-app/tests/commons-tests.xqm b/exist-app/tests/commons-tests.xqm index 6cc032d36bfd2be582c90be7b50588b0731bf386..e614fe1529ae868d0df72f6c8bf946e2d2c67a74 100644 --- a/exist-app/tests/commons-tests.xqm +++ b/exist-app/tests/commons-tests.xqm @@ -87,6 +87,25 @@ as document-node() { commons:get-metadata-file($uri) }; +declare + %test:args("sample_teixml", "82a", "transcription") %test:assertXPath("$result//* = 'حقًا'") + %test:args("sample_teixml", "82a", "transliteration") %test:assertXPath("$result//* = 'الحاسوب'") +function ct:get-page-fragment($documentURI as xs:string, + $page as xs:string, + $text-type as xs:string) +as element(tei:TEI) { + commons:get-page-fragment-from-uri($documentURI, $page, $text-type) +}; + +declare + %test:args("sample_teixml", "82a") %test:assertXPath("count($result) = 2") + %test:args("sample_syriac_teixml", "86r") %test:assertXPath("count($result) = 1") +function ct:get-transcription-and-transliteration-per-page($documentURI as xs:string, + $page as xs:string) +as element(tei:TEI)+ { + commons:get-transcription-and-transliteration-per-page($documentURI, $page) +}; + declare %test:assertXPath("$result = 'Add_2020'") %test:assertXPath("$result = 'Sachau_290_Sachau_339'") diff --git a/exist-app/tests/tei2json-tests.xqm b/exist-app/tests/tei2json-tests.xqm index 9498cbf5e8fbe65a662a2722c2055407eedf05a3..07d259cec41c4df8fa4c3ee6ec44b35f095ca667 100644 --- a/exist-app/tests/tei2json-tests.xqm +++ b/exist-app/tests/tei2json-tests.xqm @@ -22,8 +22,8 @@ function t:main() { declare %test:assertTrue -function t:create-json-collection-if-not-available() { - let $create-collection := tei2json:create-json-collection-if-not-available() +function t:create-json-collection() { + let $create-collection := tei2json:create-json-collection() return xmldb:collection-available("/db/data/textgrid/json") }; diff --git a/exist-app/tests/tokenize-tests.xqm b/exist-app/tests/tokenize-tests.xqm index 05588d407f91488c11052f2ab21766c5194e1f7d..1f7c438b3f4faaacf49c57a894ecf6ee59994b8b 100644 --- a/exist-app/tests/tokenize-tests.xqm +++ b/exist-app/tests/tokenize-tests.xqm @@ -20,7 +20,6 @@ function t:main() { deep-equal(tokenize:main($src-TEI), $target-TEI) }; - declare %test:assertXPath("$result/local-name() = 'ab' and count($result//text()) = 2 ") function t:add-ids-single-node() @@ -103,42 +102,6 @@ as element(tei:TEI) { </TEI> }; -declare function local:get-tei-header-1() -as element(tei:TEI) { - <TEI xmlns="http://www.tei-c.org/ns/1.0"> - <teiHeader> - <fileDesc> - <sourceDesc> - <msDesc> - <msIdentifier> - <institution>University of Cambridge - Cambridge University Library</institution> - <idno>Sachau 290 (=Sachau 339)</idno> - </msIdentifier> - </msDesc> - </sourceDesc> - </fileDesc> - </teiHeader> - </TEI> -}; - -declare function local:get-tei-header-2() -as element(tei:TEI) { - <TEI xmlns="http://www.tei-c.org/ns/1.0"> - <teiHeader> - <fileDesc> - <sourceDesc> - <msDesc> - <msIdentifier> - <institution>University of Cambridge - Cambridge University Library</institution> - <idno>Mingana ar. christ. 93[84]</idno> - </msIdentifier> - </msDesc> - </sourceDesc> - </fileDesc> - </teiHeader> - </TEI> -}; - declare function local:get-sample-result() { <TEI xmlns="http://www.tei-c.org/ns/1.0"> <teiHeader>