Gitlab Community Edition Instance

Skip to content
Snippets Groups Projects
get_dK80.r 3.72 KiB
Newer Older
Kristian Ullrich's avatar
Kristian Ullrich committed
#author: Kristian K Ullrich
#date: July 2017
#email: ullrich@evolbio.mpg.de

#chr1 - IRA_AFG
#[X]: Mmd_FRA
#[Y]: Mmd_GER
#[Z]: Mmd_IRA
#[O]: Mmm_AFG

popX <- "Mmd_FRA"
popY <- "Mmd_GER"
popZ <- "Mmd_IRA"
popO <- "Mmm_AFG"

TMP_DIR <- "/tmp"

popX.pos <- 2
popY.pos <- 3
popZ.pos <- 4
popO.pos <- 5

SEQ_FILE <- "http://wwwuser.gwdg.de/~evolbio/evolgen/wildmouse/introgression/mpileup_pop_mv/fasta/consensus/CAS_FRA_GER_IRA_AFG_SPRE.mpileup.q0Q10.chr1.bcfcall.mv.remIndels.recode.refmajorsample.ref.consensus.fasta"
chr <- "chr1"
OUT_FILE <- paste0(TMP_DIR,"/",popX,"_",popY,".",popZ,".",popO,".",chr,".tsv")

WSIZE <- 25000
WJUMP <- 25000

DISTMODEL <- "K80"

#####

library(Biostrings)
library(ape)
source("https://gitlab.gwdg.de/evolgen/tree/master/scripts/sliding_window_steps_generator.r")
Kristian Ullrich's avatar
Kristian Ullrich committed
options(scipen=22)

calc_delta_consensus_four<-function(seq,popX.pos,popY.pos,popZ.pos,popO.pos,TMP,distMODEL="K80"){
    MISSING<-NA
    TREELENGTH<-NA
    TREE<-NA
    TREETOPOLOGY<-NA
    dK80.XZO<-NA
    dK80.YZO<-NA
    OUT<-list(MISSING,dK80.XZO,dK80.YZO,TREELENGTH,TREE,TREETOPOLOGY)
    names(OUT)<-c("MISSING","dK80.XZO","dK80.YZO","TREELENGTH","TREE","TREETOPOLOGY")
    tmp.seq<-seq[c(popX.pos,popY.pos,popZ.pos,popO.pos)]
    names(tmp.seq)<-c("X","Y","Z","O")
    tmp.seq.XX<-gregexpr("X",tmp.seq[1])
    tmp.seq.XY<-gregexpr("X",tmp.seq[2])
    tmp.seq.XZ<-gregexpr("X",tmp.seq[3])
    tmp.seq.XO<-gregexpr("X",tmp.seq[4])
    tmp.seq.NX<-gregexpr("N",tmp.seq[1])
    tmp.seq.NY<-gregexpr("N",tmp.seq[2])
    tmp.seq.NZ<-gregexpr("N",tmp.seq[3])
    tmp.seq.NO<-gregexpr("N",tmp.seq[4])
    remove.pos<-unique(c(tmp.seq.XX[[1]],tmp.seq.XY[[1]],tmp.seq.XZ[[1]],tmp.seq.XO[[1]],tmp.seq.NX[[1]],tmp.seq.NY[[1]],tmp.seq.NZ[[1]],tmp.seq.NO[[1]]))
    remove.pos<-remove.pos[remove.pos!="-1"]
    if(length(remove.pos)!=0){
        tmp.seq.rem<-BStringSet(apply(as.matrix(tmp.seq)[,-remove.pos,drop=FALSE],1,function(x) paste(x,collapse="")))
    }
    if(length(remove.pos)==0){
        tmp.seq.rem<-tmp.seq
    }
    OUT$MISSING<-length(remove.pos)
    writeXStringSet(tmp.seq.rem,file=paste0(TMP,"/tmp.seq.rem.fasta"))
    tmp.dna<-read.dna(paste0(TMP,"/tmp.seq.rem.fasta"),format="fasta")
    if(length(tmp.dna)==0){
        return(OUT)
    }
    if(all(is.na(base.freq(tmp.dna)))){
        return(OUT)
    }
    tmp.dist.dna.MODEL<-dist.dna(tmp.dna,as.matrix=T,model=distMODEL)
    if(any(is.na(tmp.dist.dna.MODEL))){
        return(OUT)
    }
    dXY<-tmp.dist.dna.MODEL[1,2]
    dXZ<-tmp.dist.dna.MODEL[1,3]
    dXO<-tmp.dist.dna.MODEL[1,4]
    dYZ<-tmp.dist.dna.MODEL[2,3]
    dYO<-tmp.dist.dna.MODEL[2,4]
    dZO<-tmp.dist.dna.MODEL[3,4]
    dK80.XZO<-dXO-dXZ
    dK80.YZO<-dYO-dYZ
    OUT$dK80.XZO<-dK80.XZO
    OUT$dK80.YZO<-dK80.YZO
    tmp.dna.nj<-midpoint(nj(tmp.dist.dna.MODEL))
    OUT$TREELENGTH<-sum(tmp.dna.nj$edge.length)
    OUT$TREE<-write.tree(tmp.dna.nj)
    tmp.dna.nj$edge.length<-NULL
    OUT$TREETOPOLOGY<-write.tree(tmp.dna.nj)
    return(OUT)
}

#####MAIN#####

sink(OUT_FILE)

cat("#[X]: ")
cat(popX)
cat("\n")
cat("#[Y]: ")
cat(popY)
cat("\n")
cat("#[Z]: ")
cat(popZ)
cat("\n")
cat("#[O]: ")
cat(popO)
cat("\n")
cat("CHR\tSTART\tEND\t")
cat("MISSING\tdK80.XZO\tdK80.YZO\tTREELENGTH\tTREE\tTREETOPOLOGY\n")

tmp.chr.seq<-readBStringSet(SEQ_FILE)
tmp.sw<-sliding_window_steps_generator(window=WSIZE,jump=WJUMP,start.by=1,end.by=unique(width(tmp.chr.seq)))
pb<-txtProgressBar(min=1,max=dim(tmp.sw)[2],style=3)
for(i in 1:dim(tmp.sw)[2]){
    tmp.out<-subseq(tmp.chr.seq,tmp.sw[1,i],tmp.sw[2,i])
    fourout<-calc_delta_consensus_four(tmp.out,popX.pos,popY.pos,popZ.pos,popO.pos,TMP_DIR,DISTMODEL)
    cat(chr)
    cat("\t")
    cat(tmp.sw[1,i])
    cat("\t")
    cat(tmp.sw[2,i])
    cat("\t")
    cat(unlist(fourout),sep="\t")
    cat("\n")
}
close(pb)

sink(NULL)

#####

quit()