From edafc670880803433b7f2255058bf9696699b581 Mon Sep 17 00:00:00 2001
From: jlboat <jboatw2@clemson.edu>
Date: Sun, 27 Jun 2021 14:37:37 -0400
Subject: [PATCH] Added bash scripts from sbatch, converted py2 to py3, edited
 design files

---
 bash/run_ase_align_and_count_testData.bash    | 220 ++++++++++++++++++
 bash/run_ase_bayesian.bash                    |  57 +++++
 ...n_ase_create_design_file_4_prior_calc.bash |   9 +
 ...genotype_specific_references_testData.bash |  51 ++++
 ...ase_merge_priors_2_comparate_testData.bash |  32 +++
 bash/run_ase_prior_calculation_testData.bash  |  22 ++
 bash/run_ase_summarize_counts_testData.bash   |  46 ++++
 example_in/comparate_design_file.csv          |   1 -
 example_in/df_BASE_galaxy_W55_noEmptyFq.csv   |   1 -
 example_in/df_list_G2_VCF_split.csv           |   1 -
 hpc/ase_scripts/BWASplitSAM_07mai.py          |  38 +--
 hpc/ase_scripts/sam_compare_w_feature.py      |  10 +-
 ...e_cnts_table_1cond_and_output_APN_06amm.py |   2 +-
 .../NBmodel_stan2_slurm_02amm.py              |   2 +-
 14 files changed, 463 insertions(+), 29 deletions(-)
 create mode 100644 bash/run_ase_align_and_count_testData.bash
 create mode 100644 bash/run_ase_bayesian.bash
 create mode 100644 bash/run_ase_create_design_file_4_prior_calc.bash
 create mode 100644 bash/run_ase_genotype_specific_references_testData.bash
 create mode 100644 bash/run_ase_merge_priors_2_comparate_testData.bash
 create mode 100644 bash/run_ase_prior_calculation_testData.bash
 create mode 100644 bash/run_ase_summarize_counts_testData.bash

diff --git a/bash/run_ase_align_and_count_testData.bash b/bash/run_ase_align_and_count_testData.bash
new file mode 100644
index 0000000..c33b358
--- /dev/null
+++ b/bash/run_ase_align_and_count_testData.bash
@@ -0,0 +1,220 @@
+#!/usr/bin/env bash
+
+## Set / Create Directories and Variables
+#Everything is relative to current directory
+SCRIPTS=hpc/ase_scripts
+REF=example_out/reference/updated_genomes_vcfConsensus
+ORIG=example_in/reads
+
+BEDFILE=example_in/reformatted_BASE_testdata_bedfile.bed
+
+## Create output directory for alignments to updated genome
+OUTALN=example_out/aln_upd_genome
+    if [ ! -e $OUTALN ]; then mkdir -p $OUTALN; fi
+PSAM=example_out/aln_upd_genome_bwa_parse
+    if [ ! -e $PSAM ]; then mkdir -p $PSAM; fi
+
+## Create output directory for ase counts from sam compare
+SAMC=example_out/ase_counts_updated
+    if [ ! -e $SAMC ]; then mkdir -p $SAMC; fi
+
+## Create output directory for upd aln summaries and checks
+CHKALN=example_out/check_aln
+    if [ ! -e $CHKALN ]; then mkdir -p $CHKALN; fi
+
+## Create output directory for Sam Compare check
+CHKSC=example_out/check_samcomp
+    if [ ! -e $CHKSC ]; then mkdir -p $CHKSC; fi
+
+# G1,G2,sampleID,fqName,fqExtension,readLength,techRep
+DESIGN_FILE=example_in/df_BASE_galaxy_W55_noEmptyFq.csv
+DESIGN=$(sed -n "${SLURM_ARRAY_TASK_ID}p" $DESIGN_FILE)
+IFS=',' read -ra ARRAY <<< "$DESIGN"
+
+G1=${ARRAY[0]}
+G2=${ARRAY[1]}
+READLEN=${ARRAY[5]}
+FQ=${ARRAY[3]}
+EXT=${ARRAY[4]}
+
+## Create temp dir
+ROZ=roz_${FQ}
+    mkdir -p ${ROZ}
+
+## set READ and calculate ave readlength
+READ=${ORIG}/${FQ}${EXT}
+echo " read is $READ 
+"
+
+AVE_RL=$(awk '{if(NR%4==2 && length) {count++; bases += length} } END {print bases/count}' ${READ} | awk '{ printf "%.0f\\n", $1 }')
+
+######  Create modified BED file to use in SAM compare - this bed has features 1st with start and end positions in genome
+## use awk to reorder columms in bed file
+SBED=example_out/snp_feature_first.bed
+
+echo "Reformatting $BEDFILE"
+awk -v OFS='\t' '{print $4,$2,$3,$1}' $BEDFILE > $SBED
+
+
+###### (1) Align Reads to Updated Genomes - first to G2 ref then to G1 reference - and Parse sam file
+FQLINEFN=$(wc -l $READ)
+FQLINE=$(echo $FQLINEFN | cut -d" " -f1)
+NUMREAD=$(( FQLINE / 4 )) 
+FN=$(echo $FQLINEFN | cut -d" " -f2)
+    
+## count number of starting reads - same for G1 and G2 refs
+echo $NUMREAD | awk -v fq=${FQ} -v gq=pre_aln_read_count '{print "filename" "," gq "\n" fq "," $0}' > $CHKALN/pre_aln_reads_${FQ}.csv
+
+
+
+for FOO in G1 G2
+do
+    if [[ $FOO == 'G2' ]]
+    then
+        BREF=$REF/${G2}_snp_upd_genome_BWA
+
+        echo "Aligning G2 to reference $BREF"
+		bwa mem -t 8 -M $BREF $READ > $OUTALN/${G2}_${FQ}_upd.sam
+
+        echo -e "Start BWASplitSam on: $OUTALN/${G2}_${FQ}_upd.sam"
+        $SCRIPTS/BWASplitSAM_07mai.py -s $OUTALN/${G2}_${FQ}_upd.sam --outdir $ROZ -fq1 $READ
+ 
+        ## cat together mapped and opposite
+        cat $ROZ/${G2}_${FQ}_upd_mapped.sam $ROZ/${G2}_${FQ}_upd_oposite.sam > $PSAM/${G2}_${FQ}_upd_uniq.sam
+
+    elif [[ $FOO == 'G1' ]]
+    then
+        BREF=$REF/${G1}_snp_upd_genome_BWA
+
+        echo "Aligning G1 to reference $BREF"
+        bwa mem -t 8 -M $BREF $READ > $OUTALN/${G1}_${FQ}_upd.sam
+
+        echo -e "Start BWASplitSam on: $OUTALN/${G1}_${FQ}_upd.sam"
+        $SCRIPTS/BWASplitSAM_07mai.py -s $OUTALN/${G1}_${FQ}_upd.sam --outdir $ROZ -fq1 $READ
+
+        ## cat together mapped and opposite
+        cat $ROZ/${G1}_${FQ}_upd_mapped.sam $ROZ/${G1}_${FQ}_upd_oposite.sam > $PSAM/${G1}_${FQ}_upd_uniq.sam
+    fi
+done
+
+### move alignment summary csv files
+mv ${ROZ}/${G1}_${FQ}_upd_summary.csv ${CHKALN}/${G1}_${FQ}_upd_summary.csv
+mv ${ROZ}/${G2}_${FQ}_upd_summary.csv ${CHKALN}/${G2}_${FQ}_upd_summary.csv
+
+### for every FQ file run, should have 2 sam files (NOTE default is SE)
+echo "
+    checking for 2 sam files"
+    
+python $SCRIPTS/check_sam_present_04amm.py \
+    -fq $FQ \
+    -alnType SE \
+    -samPath $PSAM \
+    -G1 $G1 \
+    -G2 $G2 \
+    -o $CHKALN/check_2_sam_files_${FQ}.txt
+
+## run python script to count reads into aln and in each SAM file
+echo "
+    checking for missing reads in sam files"
+
+python $SCRIPTS/check_for_lost_reads_05amm.py \
+    -a1 $CHKALN/${G1}_${FQ}_upd_summary.csv \
+    -a2 $CHKALN/${G2}_${FQ}_upd_summary.csv \
+    -numread $CHKALN/pre_aln_reads_${FQ}.csv \
+    -fq $FQ \
+    -o $CHKALN/check_start_reads_vs_aln_reads_${FQ}.csv
+
+
+## Insert bedtools intersect script + any checks  (reads in aln sam output)
+###### (2) Bedtools Intersect:   Here we will call the shell script to reformat the sam file so that the have feature names instead of CHR names
+## In parsed SAM, 
+
+for SAMFILE in $PSAM/*_${FQ}_upd_uniq.sam
+do
+    MYSAMFILE2=$(basename $SAMFILE)
+
+    AWKTMP=$PSAM/${MYSAMFILE2/_uniq.sam/_uniq_AWK.txt}
+    NEWSAM=$PSAM/${MYSAMFILE2/_uniq.sam/_uniq_FEATURE.sam}    
+
+    #Create a bed file to write the  starting position of every read and an end postion (end = start + readlength)
+    awk -v readLen=${READLEN} -v OFS='\t' '{print $3,$4,$4+readLen}' $SAMFILE > $AWKTMP
+
+    BED4=${PSAM}/${MYSAMFILE2/_uniq.sam/_uniq_int_all.bed}
+    BED3=$PSAM/${MYSAMFILE2/_uniq.sam/_uniq_int.bed}
+    SUM=${PSAM}/${MYSAMFILE2/_uniq.sam/_drop_summary.txt}
+
+    #Run bedtools intersect with -loj between the reads and the features. 
+    #We will have one result for each region 
+    # pipe to awk to remove rows where a read does NOT overlap with a feature
+    bedtools intersect -a $AWKTMP -b $SBED -loj > ${BED4}
+
+    awk -v OFS='\t' '$4 !="."' ${BED4} > $BED3
+    
+    ## create file with counts for before and after dropping
+    awk -v a=0 -v b=${G2}_${FQ} -v OFS=',' 'BEGIN{print "fqName", "number_overlapping_rows", "total_number_rows"delimited}; { if ($4 !=".") a++} END { print b, a, NR}' ${BED4} > ${SUM}
+
+    #With awk substitute column 3 of sam file with column 7 (Feature name) of bed file (using chrom and pos as keys).
+    ##omit reads with no feature assigned
+    awk -v OFS='\t' 'FNR==NR{a[$1,$2]=$7; next} {$3=a[$3,$4]}1' ${BED3} $SAMFILE | awk -F'\t'  '$3!=""' > $NEWSAM 
+
+    echo initial sam file $SAMFILE 
+    echo awk outfile $AWKTMP
+    echo bed intersect outfile $BED3
+    echo new sam file "$NEWSAM"
+
+done 
+
+       
+
+## Grab sam files and bed files
+	## sam1 (samA) = G1 and sam2 (samB) = G2
+SAM1=$PSAM/${G1}_${FQ}_upd_uniq_FEATURE.sam
+SAM2=$PSAM/${G2}_${FQ}_upd_uniq_FEATURE.sam
+BED1=$PSAM/${G1}_${FQ}_upd_uniq_int.bed
+BED2=$PSAM/${G2}_${FQ}_upd_uniq_int.bed
+
+awk 'NR==FNR{c[$3]++;next};c[$7] == 0' $SAM1 $BED1 > $CHKSC/check_sam_bed_${G1}_${FQ}.txt
+awk 'NR==FNR{c[$3]++;next};c[$7] == 0' $SAM2 $BED2 > $CHKSC/check_sam_bed_${G2}_${FQ}.txt
+
+###### (3) Run Sam Compare
+
+READ1=${ORIG}/${FQ}${EXT}
+
+echo -e "READ1: '${READ1}"
+echo -e "SAM1: '${SAM1}'"
+echo -e "SAM2: '${SAM2}'"
+echo -e "BED: '${SBED}'"
+
+echo -e "starting sam compare for $FQ "
+## NOTE using average read length!!
+python $SCRIPTS/sam_compare_w_feature.py \
+    -n \
+    -l ${AVE_RL} \
+    -f $BEDFILE \
+    -q $READ1 \
+    -A $SAM1 \
+    -B $SAM2 \
+    -c $SAMC/ase_counts_${FQ}.csv \
+    -t $SAMC/ase_totals_${FQ}.txt \
+    --log $CHKSC/ase_log_${FQ}.log
+
+### mv drop summary to check_aln dir
+echo "
+    Moving drop read summary files to check_aln dir"
+mv ${PSAM}/*_${FQ}_upd_drop_summary.txt ${CHKALN}/
+
+echo -e "run sam compare check for $FQ "
+# Check to make sure counts in csv summary file is within range of minimum unique reads from respective sam files and 
+# the summation of the unique reads of both sam files
+python $SCRIPTS/check_samcomp_for_lost_reads_03amm.py \
+    -b1 $CHKALN/${G1}_${FQ}_upd_drop_summary.txt \
+    -b2 $CHKALN/${G2}_${FQ}_upd_drop_summary.txt \
+    -G1 $G1 \
+    -G2 $G2 \
+    -s $SAMC/ase_totals_${FQ}.txt \
+    -fq $FQ \
+    -o $CHKSC/check_samcomp_${FQ}_aln_2_upd.csv
+
+rm -r $ROZ
+
+
diff --git a/bash/run_ase_bayesian.bash b/bash/run_ase_bayesian.bash
new file mode 100644
index 0000000..3c566b8
--- /dev/null
+++ b/bash/run_ase_bayesian.bash
@@ -0,0 +1,57 @@
+#!/usr/bin/env bash
+
+## 2 conditions - M vs V
+
+## Set / Create Directories and Variables
+SCRIPTS=hpc/stan2_scripts
+
+## Create temp output directory for initial datafile with modified headers before entering Bayesian
+ROZ=example_out/temp_directory
+    mkdir -p $ROZ
+
+## directory for model testing
+BAYES=example_out/bayesian_in
+
+BAYESOUT=example_out/bayesian_out
+    mkdir -p $BAYESOUT
+
+## Set design file with G1, G2, c1, c2, and input filename columns
+DESIGN_FILE=example_in/comparate_design_file.csv
+
+DESIGN=$(sed -n "${SLURM_ARRAY_TASK_ID}p" $DESIGN_FILE)
+IFS=',' read -ra ARRAY <<< "$DESIGN"
+
+COMP_1=${ARRAY[0]}
+COMP_2=${ARRAY[1]}
+COMPID=${ARRAY[2]}
+
+echo "comparate 1 is $COMP_1"
+
+## Set number of comparates to be analyzed
+	## M vs F for each line = 2
+COMPNUM=2
+
+## set number of iterations = 100,000
+ITER=100000
+
+## set burn in of 10,000
+WARMUP=10000
+
+######  Run python script calling environmental bayesian model (stan2, 2 conditions)
+python3 $SCRIPTS/NBmodel_stan2_slurm_02amm.py \
+    -comparate_1 $COMP_1 \
+    -comparate_2 $COMP_2 \
+    -c1_g1 M \
+    -c1_g2 M \
+    -c2_g1 V \
+    -c2_g2 V \
+    -compID $COMPID \
+    -datafile $BAYES \
+    -datafile2 $BAYESOUT/ \
+    -cond $COMPNUM \
+    -workdir . \
+    -routput $BAYESOUT \
+    -subpath $SCRIPTS/NBModel_stan2_flex_prior.R \
+    -iterations $ITER \
+    -warmup $WARMUP \
+    -o $BAYESOUT
diff --git a/bash/run_ase_create_design_file_4_prior_calc.bash b/bash/run_ase_create_design_file_4_prior_calc.bash
new file mode 100644
index 0000000..33951d9
--- /dev/null
+++ b/bash/run_ase_create_design_file_4_prior_calc.bash
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+### create design file for calculating priors from df_ase_samcomp_summed.dsv 
+
+## Set / Create Directories and Variables
+DF=example_out
+DESIGN=example_in/df_ase_samcomp_summed.csv
+
+awk 'BEGIN{FS=","; OFS=","} { print $1, $2, $4}' ${DESIGN} | sort -u > ${DF}/df_priors.csv
diff --git a/bash/run_ase_genotype_specific_references_testData.bash b/bash/run_ase_genotype_specific_references_testData.bash
new file mode 100644
index 0000000..f18e569
--- /dev/null
+++ b/bash/run_ase_genotype_specific_references_testData.bash
@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+
+## set directories
+## All directories are relative to the main BayesASE folder
+VCFIN=example_in/vcf_by_genotype
+VCFOUT=example_out/vcf_by_genotype
+UREF=example_out/reference/updated_genomes_vcfConsensus
+mkdir -p $VCFOUT
+mkdir -p $UREF
+
+FASTA=example_in/reference/dmel_r551_chromosome_2R_X.fasta
+
+# line,old
+# W55,Winters_55
+# W1118,w1118_w118
+DESIGN_FILE=example_in/df_list_G2_VCF_split.csv
+DESIGN=$(sed -n "${SLURM_ARRAY_TASK_ID}p" $DESIGN_FILE)
+IFS=',' read -ra ARRAY <<< "$DESIGN"
+
+G2=${ARRAY[0]}
+
+#### (1) UPDATE SNPs
+### Index starting VCF files
+
+
+    ## remove and replace if gz file already exists (so we can overwrite)
+    if [[ -f ${VCFOUT}/${G2}_snp_testGenes_sorted.vcf.gz ]] ; then
+        rm ${VCFOUT}/${G2}_snp_testGenes_sorted.vcf.gz
+    fi
+
+    # sort split VCF files
+    bcftools sort -O v ${VCFIN}/${G2}_snp_testGenes.vcf -o ${VCFOUT}/${G2}_snp_testGenes_sorted.vcf
+
+    echo "indexing individual SNP vcf files: ${G2} "
+    bgzip ${VCFOUT}/${G2}_snp_testGenes_sorted.vcf
+    tabix ${VCFOUT}/${G2}_snp_testGenes_sorted.vcf.gz
+
+    echo "updating SNPs: ${G2} "
+    cat ${FASTA} | vcf-consensus ${VCFOUT}/${G2}_snp_testGenes_sorted.vcf.gz > ${UREF}/${G2}_snp_upd_genome.fasta
+
+
+    #### (2) Build indexes from the references
+    echo `date`": building BWA index for ${G2} UPD genome "
+    ##BWA-mem references  -p is the prefix for the output database, -a is the indexing algorithm ('bwtsw' is for ref>2G, 'is' for ref<2G).
+    bwa index -p ${UREF}/${G2}_snp_upd_genome_BWA -a bwtsw ${UREF}/${G2}_snp_upd_genome.fasta
+
+    echo `date`": building samtools index for ${G2} UPD genome"
+    samtools faidx ${UREF}/${G2}_snp_upd_genome.fasta
+
+	
+	
diff --git a/bash/run_ase_merge_priors_2_comparate_testData.bash b/bash/run_ase_merge_priors_2_comparate_testData.bash
new file mode 100644
index 0000000..29a1ae9
--- /dev/null
+++ b/bash/run_ase_merge_priors_2_comparate_testData.bash
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+
+## merge priors to comparate
+## merge comparates together
+
+
+SCRIPTS=hpc/ase_scripts
+
+## user must provide following design file for merging comparates:
+DESIGN2=example_in/df_merge_comparates_4_bayesian.csv
+
+DESIGN=example_out/df_priors.csv
+PRIORS=example_out/priors_fromData
+FILT=example_out/ase_counts_summarized
+BAYESIN=example_out/bayesian_in
+    mkdir -p $BAYESIN
+
+    echo "running merge priors to comparate
+    "
+    python3 ${SCRIPTS}/merge_priors_to_comparate_03amm.py \
+        --output ${BAYESIN} \
+        --comp ${FILT} \
+        --prior ${PRIORS} \
+        --design ${DESIGN}
+
+
+    python3 ${SCRIPTS}/merge_comparates_and_gen_headers_for_bayesian_02amm.py \
+    --output ${BAYESIN} \
+    --comp ${BAYESIN} \
+    --design ${DESIGN2}
+
+
diff --git a/bash/run_ase_prior_calculation_testData.bash b/bash/run_ase_prior_calculation_testData.bash
new file mode 100644
index 0000000..1bde190
--- /dev/null
+++ b/bash/run_ase_prior_calculation_testData.bash
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+### (1) calculate priors from ASE count tables (generated from simulated reads, DNA reads, or RNA data reads)
+
+## Set / Create Directories and Variables
+SCRIPTS=hpc/ase_scripts
+
+## this example calculates from data
+FILT=example_out/ase_counts_summarized
+
+OUTPUT=example_out/priors_fromData
+mkdir -p $OUTPUT
+
+DESIGN=example_out/df_priors.csv
+
+##### (1) calculate priors from ASE counts - data
+
+python3 $SCRIPTS/calculate_priors_from_ase_count_tables_03amm.py \
+    --output $OUTPUT \
+    --design $DESIGN \
+    --input $FILT \
+    --debug
diff --git a/bash/run_ase_summarize_counts_testData.bash b/bash/run_ase_summarize_counts_testData.bash
new file mode 100644
index 0000000..f901cfa
--- /dev/null
+++ b/bash/run_ase_summarize_counts_testData.bash
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+
+## combine ase count tables
+## summarize data
+
+
+SCRIPTS=hpc/ase_scripts
+SAMC=example_out/ase_counts_updated
+
+SSUM=example_out/ase_counts_summed_techReps
+    mkdir -p $SSUM
+FILT=example_out/ase_counts_summarized
+    mkdir -p $FILT
+
+BEDFILE=example_out/snp_feature_first.bed
+DESIGN=example_in/df_BASE_galaxy_W55_combinedFq.csv
+
+
+    mkdir -p ${SSUM}
+
+    SIM=False
+    APN=1
+
+    python3 ${SCRIPTS}/combine_cnt_tables_13amm.py \
+        -design ${DESIGN} \
+        -sim ${SIM} \
+        --bed ${BEDFILE} \
+        --path ${SAMC} \
+        --designdir example_in \
+        --out ${SSUM}
+
+
+    ## design2 is created in above script
+    DESIGN2=example_in/df_ase_samcomp_summed.csv
+
+    mkdir -p ${FILT}
+
+    python3 ${SCRIPTS}/summarize_sam_compare_cnts_table_1cond_and_output_APN_06amm.py \
+        --output ${FILT} \
+        --design ${DESIGN2} \
+        --parent1 G1 \
+        --parent2 G2 \
+        --sampleCol sample \
+        --sampleIDCol sampleID \
+        --sam-compare-dir ${SSUM} \
+        --apn ${APN} \
diff --git a/example_in/comparate_design_file.csv b/example_in/comparate_design_file.csv
index fd29fb6..0439f19 100755
--- a/example_in/comparate_design_file.csv
+++ b/example_in/comparate_design_file.csv
@@ -1,2 +1 @@
-Comparate_1,Comparate_2,compID
 W55_M,W55_V,W55_M_V
diff --git a/example_in/df_BASE_galaxy_W55_noEmptyFq.csv b/example_in/df_BASE_galaxy_W55_noEmptyFq.csv
index f29c561..bb98727 100755
--- a/example_in/df_BASE_galaxy_W55_noEmptyFq.csv
+++ b/example_in/df_BASE_galaxy_W55_noEmptyFq.csv
@@ -1,4 +1,3 @@
-G1,G2,sampleID,fqName,fqExtension,readLength,techRep
 W1118,W55,W55_M_1,W55_M_1_1,.fastq,96,1
 W1118,W55,W55_M_1,W55_M_1_2,.fastq,96,2
 W1118,W55,W55_M_1,W55_M_1_3,.fastq,96,3
diff --git a/example_in/df_list_G2_VCF_split.csv b/example_in/df_list_G2_VCF_split.csv
index 3d51daa..bd9531f 100644
--- a/example_in/df_list_G2_VCF_split.csv
+++ b/example_in/df_list_G2_VCF_split.csv
@@ -1,3 +1,2 @@
-line,old
 W55,Winters_55
 W1118,w1118_w118
diff --git a/hpc/ase_scripts/BWASplitSAM_07mai.py b/hpc/ase_scripts/BWASplitSAM_07mai.py
index 5dd3c45..a869013 100755
--- a/hpc/ase_scripts/BWASplitSAM_07mai.py
+++ b/hpc/ase_scripts/BWASplitSAM_07mai.py
@@ -132,12 +132,12 @@ def SplitSAMPE (fname,odir,summname):
 
         # Getting & printing "gray" reads
         elif elements[1] in flags_grayones:
-            print >> GRAY,'\t'.join(elements)
+            print('\t'.join(elements), file=GRAY)
             counter_grayones += 1
             total += 1
         # Getting & printing "OPOSITE" reads
         elif elements[1] in flags_opositestrand:
-            print >> OPOSITE,'\t'.join(elements)
+            print('\t'.join(elements), file=OPOSITE)
             counter_opositestrand += 1
             total += 1
 
@@ -146,31 +146,31 @@ def SplitSAMPE (fname,odir,summname):
         elif elements[1] in flags_mapped1:
             regmatch=re.match(".+\tAS:i:([0-9]+)\tXS:i:([0-9]+).*",line)
             if int(regmatch.group(1))-int(regmatch.group(2))==0:
-                print >> AMBIGUOUS,'\t'.join(elements)
+                print('\t'.join(elements), file=AMBIGUOUS)
                 ambiguous1.append(elements[0])
                 counter_ambiguous1 += 1
                 total += 1
             else:
-                print >> MAPPED,'\t'.join(elements)
+                print('\t'.join(elements), file=MAPPED)
                 counter_mapped1 += 1
                 total += 1
 
         elif elements[1] in flags_mapped2:
             regmatch=re.match(".+\tAS:i:([0-9]+)\tXS:i:([0-9]+).*",line)
             if int(regmatch.group(1))-int(regmatch.group(2))==0:
-                print >> AMBIGUOUS,'\t'.join(elements)
+                print('\t'.join(elements), file=AMBIGUOUS)
                 ambiguous2.append(elements[0])
                 counter_ambiguous2 += 1
                 total += 1
             else:
-                print >> MAPPED,'\t'.join(elements)
+                print('\t'.join(elements), file=MAPPED)
                 counter_mapped2 += 1
                 total += 1
 
         # If not in the previous categories then unknown
         else:
-            print "Warning: "+elements[1]+" key is not recognized"
-            print >> UNRECOGNIZED,'\t'.join(elements)
+            print("Warning: "+elements[1]+" key is not recognized")
+            print('\t'.join(elements), file=UNRECOGNIZED)
             
 
     # Print summary
@@ -186,9 +186,9 @@ def SplitSAMPE (fname,odir,summname):
                     counter_bothunmapped2,counter_mapped1,
                     counter_mapped2,counter_ambiguous1,
                     counter_ambiguous2]
-    count_values = map(str,count_values)
-    print >> SUMMARY,','.join(count_names)
-    print >> SUMMARY,','.join(count_values)
+    count_values = list(map(str,count_values))
+    print(','.join(count_names), file=SUMMARY)
+    print(','.join(count_values), file=SUMMARY)
 
 
     # Clossing all files
@@ -253,12 +253,12 @@ def SplitSAMSE (sam,odir,summname):
             counter_unmappedread += 1
         # Getting & printing "OPOSITE" reads
         elif elements[1] in flags_opositestrand:
-            print >> OPOSITE,'\t'.join(elements)
+            print('\t'.join(elements), file=OPOSITE)
             counter_total         += 1
             counter_opositestrand += 1
         # Getting & printing "CHIMERIC" reads
         elif elements[1] in flags_chimeric:
-            print >> CHIMERIC,"\t".join(elements)
+            print("\t".join(elements), file=CHIMERIC)
             counter_total    += 1
             counter_chimeric += 1
         # Getting & printing AMBIGUOUS reads, those who are not ambiguous are 
@@ -266,18 +266,18 @@ def SplitSAMSE (sam,odir,summname):
         elif elements[1] in flags_mapped:
             regmatch=re.match(".+\tAS:i:([0-9]+)\tXS:i:([0-9]+).*",line)
             if int(regmatch.group(1))-int(regmatch.group(2))==0:
-                print >> AMBIGUOUS,'\t'.join(elements)
+                print('\t'.join(elements), file=AMBIGUOUS)
                 ambiguous.append(elements[0])
                 counter_total     += 1
                 counter_ambiguous += 1
             else:
-                print >> MAPPED,'\t'.join(elements)
+                print('\t'.join(elements), file=MAPPED)
                 counter_total  += 1
                 counter_mapped += 1
 
         #If not in the previous categories then unknown
         else:
-            print "Warning: "+elements[1]+" key is not recognized"
+            print("Warning: "+elements[1]+" key is not recognized")
 
 
     #Print summary
@@ -296,9 +296,9 @@ def SplitSAMSE (sam,odir,summname):
                     counter_ambiguous,
                     counter_chimeric]
 
-    count_values = map(str,count_values)
-    print >> SUMMARY  ,','.join(count_names)
-    print >> SUMMARY  ,','.join(count_values)
+    count_values = list(map(str,count_values))
+    print(','.join(count_names), file=SUMMARY)
+    print(','.join(count_values), file=SUMMARY)
 
     #Clossing all files
     SAM.close()
diff --git a/hpc/ase_scripts/sam_compare_w_feature.py b/hpc/ase_scripts/sam_compare_w_feature.py
index bda9454..fe8139e 100755
--- a/hpc/ase_scripts/sam_compare_w_feature.py
+++ b/hpc/ase_scripts/sam_compare_w_feature.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-from __future__ import print_function
+
 
 ## @package sam_compare
 #  This script compares two SAM files (sequence alignment maps) and
@@ -21,7 +21,7 @@
 # Changed from sending data to stderr to file/stdout option
 
 import sys, os, time, datetime, locale
-import csv, cStringIO, logging, pprint, re, mmap, argparse, subprocess
+import csv, io, logging, pprint, re, mmap, argparse, subprocess
 import shlex, gc, getpass, random
 from Bio import SeqIO
 
@@ -246,11 +246,11 @@ def get_sam_reads(samdata, fqids, read_length, samid):
                         reads[name][1:] = [1, 1, 0]
                 else:
                     # not an exact match, check if we got closer than previous (read_length initially)
-                    if edit_distance < reads[name][3]:
+                    if edit_distance < int(reads[name][3].replace("\\n","")):
                         # we got closer to a match, set new edit distance and reset count
                         reads[name][3] = edit_distance
                         reads[name][1] = 1
-                    elif edit_distance == reads[name][3]:
+                    elif edit_distance == int(reads[name][3].replace("\\n","")):
                         # same edit distance, inc count
                         reads[name][1] += 1
             else:
@@ -565,7 +565,7 @@ def write_counts(filename, counts):
         f = open(filename, 'w') if filename.lower() != "stdout" else sys.stdout
         f.write(",".join(header))
         f.write("\n")
-        for row in counts.itervalues():
+        for row in counts.values():
             rowstr = ""
             for field in fields:
                 if rowstr:
diff --git a/hpc/ase_scripts/summarize_sam_compare_cnts_table_1cond_and_output_APN_06amm.py b/hpc/ase_scripts/summarize_sam_compare_cnts_table_1cond_and_output_APN_06amm.py
index df74a7d..298ee92 100755
--- a/hpc/ase_scripts/summarize_sam_compare_cnts_table_1cond_and_output_APN_06amm.py
+++ b/hpc/ase_scripts/summarize_sam_compare_cnts_table_1cond_and_output_APN_06amm.py
@@ -65,7 +65,7 @@ def main():
     sampleIDCol = args.sampleIDCol    
 
     # groupby G2 and comparison
-    df_design['numReps'] = df_design.groupby([parent2, sampleCol]).sample.transform('count')
+    df_design['numReps'] = df_design.groupby([parent2, sampleCol])['sample'].transform('count')
     print(df_design[:5])
 
     #counter variables for each comparate 
diff --git a/hpc/stan2_scripts/NBmodel_stan2_slurm_02amm.py b/hpc/stan2_scripts/NBmodel_stan2_slurm_02amm.py
index a1af91e..65bb08d 100644
--- a/hpc/stan2_scripts/NBmodel_stan2_slurm_02amm.py
+++ b/hpc/stan2_scripts/NBmodel_stan2_slurm_02amm.py
@@ -81,7 +81,7 @@ def main():
     infile=pd.read_csv(os.path.join(args.datafile, infileName))
     infile.set_index('FEATURE_ID')
 
-    pre_headers=list(infile.columns.get_values())
+    pre_headers=list(infile.columns)
     pre_headers_split=pre_headers[:4]
     pre_headers_split2=pre_headers[4:]