#!/bin/bash

runpath=`dirname $0`

psipreddir=$runpath/bin/psipred32Fast
binpath=$runpath/bin
scampipath=$runpath/bin/scampi
modhmmpath=$runpath/bin/
checkMemProBin=$runpath/bin/newscampiscript

usage="
Usage:  run_kalignP.sh [-i] fasta-seq-file

run kalignP, add the posotion specific gap penalties estimated from
secondary structures predicted by PSIPRED_single
multiple input files can be supplied

the result will be output to \$outpath/\$rootname.kalignP.\$format 

Options:
    -psipreddir <dir> : set the path for executables, default=$psipreddir 
    -outpath    <dir> : set the outpath path, default=$outpath
    -o         <file> : output the result to the specified file
    -t          <int> : set the sequence type, default=0
    -i         <file> : set the input file
    -f          <str> : set the format, can be msf, gcg, aln, fa
    -q                : quite mode, do not write messages
    -nc               : not clean the temporary file
    -no-psgp          : do not add position specific gap penalties

    -h|--help         : print this help message and exit

Created 2010-12-03
Updated 2011-09-01
nanjiang@sbc.su.se

sequence type:
0. un-categorized
1. non membrane proteins
2. membrane proteins

Examples: 
    run_kalignP.sh test.fasta
    run_kalignP.sh -f fasta test.fasta -o test.msa.fa
"
function PrintHelp()
{
    echo "$usage"
}
function CheckFile()
{
    local file=$1
    if [ ! -s $file ]; then
        echo "Input file \"$file\" do not exist or empty. Ignoring." >&2
        echo "BAD"
    else
        echo "OK"
    fi
}
function PredictSeqType() 
{
    local file=$1
    percent_TMpro=`$checkMemProBin/isMemPro_scampi.sh $file -scampipath $scampipath -modhmmpath $modhmmpath | awk 'BEGIN{y=0;n=0;total=0}{if($2=="yes"){y++}else if($2=="no"){n++} total++}END{printf( "%.0f", y/total*100)}'`
    if [ "$percent_TMpro" == "" ]; then 
        echo "isMemPro_scampi error"  >&2
    elif [ $percent_TMpro -ge 50 ]; then 
        echo "2"
    else 
        echo "1"
    fi
}
function RunKalignP() #$file#{{{
{
    local file=$1
    local basename=`basename $file`
    local rootname=${basename%.*}
    local tmpdir=/tmp/$rootname$$
    local alnFile=$outpath/$rootname.kalignP.$format
    if [ "$isOutFileSet" == "true" ]; then 
        alnFile=$outFile
    fi 
    local isGPSuppliedInFastaFile=`$binpath/IsGPSuppliedInFastaFile.py $file`

    if [ "$isGPSuppliedInFastaFile" == "yes" -o "$isNotAddGP" == "true" ]; then
        $binpath/kalignP $file $quietOptionStr -format $format -o $alnFile
    else 
        mkdir -p $tmpdir
        $binpath/splitfasta.py $file -nameseq -outpath $tmpdir $quietOptionStr 2> $errFile
        if [ -s $errFile ]; then 
            cat $errFile  >&2                                          
            exit 1;
        fi

        local aaFileListFile=$tmpdir/aafilelist.txt
        find $tmpdir -name "*.aa" > $aaFileListFile

        #predict secondary structure
        
        $psipreddir/runpsipred_single $quietOptionStr -l $aaFileListFile -outpath $tmpdir/$rootname

        #add gap penalties
        $binpath/addGapPenaltyByPredSS.py $quietOptionStr -p-shift-c $p_shift_C -p-shift-he $p_shift_HE -p-threshold-c $p_threshold_C -p-threshold-he $p_threshold_HE -weight-c $weight_C -weight-he $weight_HE $file -sspath $tmpdir -outpath $tmpdir 2> $errFile
        if [ -s $errFile ]; then 
            cat $errFile >&2
            exit 1;
        fi
        gpFastaFile=$tmpdir/$rootname.gp.fa
        $binpath/kalignP $gpFastaFile $quietOptionStr -format $format -o $alnFile
        if [ "$isNotClean" != "true" ] ;then
            rm -rf $tmpdir
        else
            if [ "$isQuiet"  != "true" ]; then
                    echo "Temporary file can be found at $tmpdir"
            fi
        fi
    fi

    if [ "$isQuiet"  != "true" ]; then
        echo "The alignment has been output to $alnFile"
    fi
}
#}}}
function SetParaBySeqType() # seqType#{{{
{
    case $seqType in
        0) # un-categorized
            p_shift_C=0.50
            p_shift_HE=0.52
            p_threshold_C=0.0
            p_threshold_HE=0.0
            weight_C=0.57
            weight_HE=0.48
            ;;
        1) # non membrane protein
            p_shift_C=0.50
            p_shift_HE=0.52
            p_threshold_C=0.0
            p_threshold_HE=0.0
            weight_C=0.57
            weight_HE=0.48
            ;;
        2) # membrane protein
            p_shift_C=0.5
            p_shift_HE=0.53
            p_threshold_C=0
            p_threshold_HE=0
            weight_C=0.1
            weight_HE=2.0
            ;;
        *)
            echo "Wrong seqtype = $seqType" 
            exit 1;
    esac
}
#}}}
if [ $# -lt 1 ]; then
    PrintHelp
    exit
fi

fileList=
listFile=
errFile=$(mktemp /tmp/tmperr.XXXXXXX) || { echo "Failed to create temp file"; exit 1; } 
outpath=./
isQuiet=false
quietOptionStr=
format=msf
seqType=0
isSeqTypeSet=false
isNotClean=false
isNotAddGP=false
isOutFileSet=false

isNonOptionArg=false
while [ "$1" != "" ]; do
    if [ "$isNonOptionArg" == "true" ]; then 
        isNonOptionArg=false
    elif [ "$1" == "--" ]; then
        isNonOptionArg=true
    elif [ "${1:0:1}" == "-" ]; then
        case $1 in
            -h|--help) PrintHelp; exit;;
            -psipreddir|--psipreddir) psipreddir=$2;shift;;
            -outpath|--outpath) outpath=$2;shift;;
            -l|--list) listFile=$2;shift;;
            -f|-foramt|--format) format=$2;shift;;
            -o|-outfile|--outfile) outFile=$2;isOutFileSet=true;shift;;
            -i|-infile|--infile) fileList="$fileList $2 ";shift;;
            -t|-type|--type) seqType=$2;
                if [ $seqType != 0 ]; then
                    isSeqTypeSet=true
                fi
                shift;;
            -q|--q|-quiet|--quiet) isQuiet=true;quietOptionStr=-q;;
            -no-psgp |--no-psgp)isNotAddGP=true;;
            -nc|--nc)isNotClean=true;;
            -*) echo "Error! Wrong argument: $1"; exit;;
        esac
    else
        fileList="$fileList $1 "
    fi
    shift
done

if [ "$fileList"  == "" -a "$listFile" == "" ]; then 
    echo "Error! not input is set. Exit..."  >&2
    exit
fi
if [ ! -d "$psipreddir" ]; then 
    echo "Error!  psipreddir= \"$psipreddir\" does not exist. Exit..." >&2
    exit
fi

if [ "$isOutFileSet" == "false" ]; then 
    mkdir -p $outpath
fi

if [ "$fileList" != "" ]; then 
    for file in $fileList; do 
        check=`CheckFile $file`
        if [ "$check" == "OK" ]; then 
            if [ "$isSeqTypeSet" == "false" ]; then 
                seqType=`PredictSeqType $file`
                if [ "$isQuiet" == "false" ]; then 
                    echo "predicted seqtype  = $seqType"
                fi
            fi
            SetParaBySeqType $seqType
            RunKalignP $file
        fi
    done
fi

if [ -s "$listFile" ]; then 
    for file in $(cat $listFile); do 
        check=`CheckFile $file`
        if [ "$check" == "OK" ]; then 
            if [ "$isSeqTypeSet" == "false" ]; then 
                seqType=`PredictSeqType $file`
                if [ "$isQuiet" == "false" ]; then 
                    echo "predicted seqtype  = $seqType"
                fi
            fi
            SetParaBySeqType $seqType
            RunKalignP $file
        fi
    done
fi 

rm -f $errFile
