evolgen
introgression

Repository

#example for populaton Mmm_AFG:

#used BAM files:

#AFG1_396.bam
#AFG2_413.bam
#AFG3_416.bam
#AFG4_424.bam
#AFG5_435.bam
#AFG6_444.bam

REFERENCE=mm10.fasta

for file in *.bam; do genomeCoverageBed -ibam $file -bga -g $REFFERENCE > $file".bga";done
INPUT1=AFG1_396.bam.bga
INPUT2=AFG2_413.bam.bga
INPUT3=AFG3_416.bam.bga
INPUT4=AFG4_424.bam.bga
INPUT5=AFG5_435.bam.bga
INPUT6=AFG6_444.bam.bga

OUTPUT=Mmm_AFG.combined.bga

unionBedGraphs -i $INPUT1 $INPUT2 $INPUT3 $INPUT4 $INPUT5 $INPUT6 | awk -v OFS='\t' 'BEGIN {sum=0} {for (i=4; i<=NF; i++) sum+=$i; print $1,$2,$3,sum; sum=0}' > $OUTPUT
INPUT=Mmm_AFG.combined.bga
OUTPUT=Mmm_AFG.combined.bga.stcov5

awk '{if($4<5) print $0}' $INPUT > $INPUT".stcov5"
bedtools merge -i $INPUT".stcov5" > $INPUT".stcov5.merge"
awk -v OFS='\t' '{print $1,$2,$3,4}' $INPUT".stcov5.merge" > $OUTPUT
INPUT=AFG1_396.bam.bga
OUTPUT=AFG1_396.bam.bga.stcov5

awk '{if($4<5) print $0}' $INPUT > $INPUT".stcov5"
bedtools merge -i $INPUT".stcov5" > $INPUT".stcov5.merge"
awk -v OFS='\t' '{print $1,$2,$3,4}' $INPUT".stcov5.merge" > $OUTPUT
#example for population Mmm_AFG:

#used BAM files:

#AFG1_396.bam
#AFG2_413.bam
#AFG3_416.bam
#AFG4_424.bam
#AFG5_435.bam
#AFG6_444.bam

REFERENCE=mm10.fasta

PLOIDY=ploidy.txt
#chrX    1       171031299       M       1
#chrY    1       91744698        M       1
#chrY    1       91744698        F       0
#chrM    1       16299   F       1
#chrM    1       16299   M       1
#*       *       *       M       2
#*       *       *       F       2

SAMPLES=AFG_samples.txt
#396     M
#413     M
#416     M
#424     M
#435     F
#444     M

OUTPUT=mpileup.q0Q10.chr1.Mmm_AFG.bcfcall.mv.vcf

INPUT1=AFG1_396.bam
INPUT2=AFG2_413.bam
INPUT3=AFG3_416.bam
INPUT4=AFG4_424.bam
INPUT5=AFG5_435.bam
INPUT6=AFG6_444.bam

BAMLIST=Mmm_AFG.list

echo $INPUT1 >> $BAMLIST
echo $INPUT2 >> $BAMLIST
echo $INPUT3 >> $BAMLIST
echo $INPUT4 >> $BAMLIST
echo $INPUT5 >> $BAMLIST
echo $INPUT6 >> $BAMLIST

#example for chromosome 1

samtools mpileup -q 0 -Q 10 -A -d 99999 -t DP,AD,ADF,ADR -r chr1 -uf $REFERENCE -b $BAMLIST | bcftools call -O v -f GQ -m -v --ploidy-file $PLOIDY -S $SAMPLES > $OUTPUT
bgzip $OUTPUT
tabix $OUTPUT".gz"
#example for population Mmm_AFG:

#VCF IDs:

#396
#413
#416
#424
#435
#444

POPIDS=AFG.vcf.ids

echo "396" >> $POPIDS
echo "413" >> $POPIDS
echo "416" >> $POPIDS
echo "424" >> $POPIDS
echo "435" >> $POPIDS
echo "444" >> $POPIDS

GZVCF=mpileup.q0Q10.chr1.Mmm_AFG.bcfcall.mv.vcf.gz
OUTPUT=Mmm_AFG.mpileup.q0Q10.chr1.bcfcall.mv.remIndels

vcftools --gzvcf $GZVCF --remove-indels --recode --recode-INFO-all --non-ref-ac-any 1 --keep $POPIDS --out $OUTPUT
#example for population Mmm_AFG:

#VCF IDs:

#396
#413
#416
#424
#435
#444

INPUT=Mmm_AFG.mpileup.q0Q10.chr1.bcfcall.mv.remIndels.recode.vcf
OUTPUT=Mmm_AFG.mpileup.q0Q10.chr1.bcfcall.mv.remIndels.recode.consensus

python vcfparser.py mvcf2consensus -ivcf $INPUT -o $OUTPUT -cdp 11 -chr chr1 -samples 396,413,416,424,435,444 -id Mmm_AFG.mv
#example for population Mmm_AFG:

REFERENCE=mm10.fasta
INPUT=Mmm_AFG.mpileup.q0Q10.chr1.bcfcall.mv.remIndels.recode.consensus.vcf
OUTPUT=Mmm_AFG.mpileup.q0Q10.chr1.bcfcall.mv.remIndels.recode.consensus.chr1
MASKFILE=Mmm_AFG.combined.bga.stcov5

python vcfparser.py vcf2fasta -ivcf $INPUT -o $OUTPUT -R $REFERENCE -samples Mmm_AFG.mv -chr chr1 -ibga $MASKFILE -cov2N 4
#example for the quartet [X]: FRA; [Y]: GER; [Z]: IRA; [O]: AFG

#change the bottom part of the script 'get_dK80.r' for each chromosome and quartet

#here you can find the example for chromosome 1

popX <- "Mmd_FRA"
popY <- "Mmd_GER"
popZ <- "Mmd_IRA"
popO <- "Mmm_AFG"

TMP_DIR <- "/tmp"

popX.pos <- 2
popY.pos <- 3
popZ.pos <- 4
popO.pos <- 5

SEQ_FILE <- "http://wwwuser.gwdg.de/~evolbio/evolgen/wildmouse/introgression/mpileup_pop_mv/fasta/consensus/CAS_FRA_GER_IRA_AFG_SPRE.mpileup.q0Q10.chr1.bcfcall.mv.remIndels.recode.refmajorsample.ref.consensus.fasta"
chr <- "chr1"
OUT_FILE <- paste0(TMP_DIR,"/",popX,"_",popY,".",popZ,".",popO,".",chr,".tsv")

WSIZE <- 25000
WJUMP <- 25000

DISTMODEL <- "K80"