Gitlab Community Edition Instance

Skip to content
Snippets Groups Projects

Feature/#56 section wise collation

Merged Michelle Weidling requested to merge feature/#56-section-wise-collation into develop
2 files
+ 12
13
Compare changes
  • Side-by-side
  • Inline
Files
2
@@ -5,9 +5,6 @@ xquery version "3.1";
: the TEI files and uses them as an input for the plain text creation. These
: serve as a basis for the collation with CollateX in the project's CI/CD
: pipelines.
:
: The chunks are defined in the file "xpaths_for_collation.xml" which has the
: TextGrid URI textgrid:3rzkb.
:)
module namespace coll="http://ahikar.sub.uni-goettingen.de/ns/collate";
@@ -19,14 +16,13 @@ import module namespace fragment="https://wiki.tei-c.org/index.php?title=Milesto
declare variable $coll:textgrid := "/db/apps/sade/textgrid";
declare variable $coll:data := $coll:textgrid || "/data";
declare variable $coll:xpaths := doc($coll:data || "/3rzkb.xml");
declare variable $coll:txt := $coll:textgrid || "/txt";
declare function coll:main()
as xs:string+ {
coll:create-txt-collection-if-not-available(),
for $text in coll:get-transcriptions-and-transliterations()[descendant::tei:milestone] return
for $text in coll:get-transcriptions-and-transliterations() return
let $relevant-text := coll:get-relevant-text($text)
let $file-name := coll:make-file-name($text)
return
@@ -55,13 +51,17 @@ as xs:boolean {
false()
};
(:~
: An example for the file name is
: syriac-Brit_Lib_Add_7200-3r131-transcription.txt
:)
declare function coll:make-file-name($text as element(tei:text))
as xs:string {
let $lang-prefix := coll:get-language-prefix($text)
let $title-from-metadata := coll:create-metadata-title-for-file-name($text)
let $file-name-plus-text-type := coll:make-file-name-suffix($text)
let $uri-plus-text-type := coll:make-file-name-suffix($text)
return
$lang-prefix || "-" || $title-from-metadata || "-" || $file-name-plus-text-type
$lang-prefix || "-" || $title-from-metadata || "-" || $uri-plus-text-type
};
declare function coll:get-language-prefix($text as element(tei:text))
@@ -77,8 +77,7 @@ as xs:string? {
(: although the transliteration may have another language than
the transcription, the latter remains decisive for the prefix :)
case "transliteration" return
(: transliterations are always encoded before transcriptions :)
switch ($text/following-sibling::tei:text[@type = "transcription"]/@xml:lang)
switch ($text/root()//tei:text[@type = "transcription"]/@xml:lang)
case "ara" return "arabic"
case "karshuni" return "karshuni"
case "syc" return "syriac"
@@ -188,7 +187,7 @@ as xs:string {
for $text in $texts return
coll:prepare-plain-text-creation($text)
return
coll:format-string($prepared-texts)
coll:format-and-normalize-string($prepared-texts)
};
(:~
@@ -223,7 +222,7 @@ as xs:string {
$text
};
declare function coll:format-string($strings as xs:string+)
declare function coll:format-and-normalize-string($strings as xs:string+)
as xs:string {
string-join($strings, " ")
=> replace(" @", "")
Loading