#!/usr/bin/python

# Parse SCAMPI result files.
# The generate method yields the title, topology and the length of the topology.
# The get method gives the same for the first hit in the file for the query string.
# Input: file with lines like:
# i3-23o211-231i872-892o 893 Q4U9M9|104K_THEAN 104 kDa microneme/rhoptry antigen precursor - Theileria annulata
#

import sys,re,os;


class ScampiParser :
    def __init__(self, file) :
        self.strScampi = file;

    def _deenc(self, strEnc, intTot) :
        strTop = '';
        if ( re.match('^([io]\d+-\d+)+[io]$', strEnc) ) :
            lstHlxs = re.findall('(([io])(\d+)-(\d+))', strEnc);
            for hlx in lstHlxs :
                chrBefore = hlx[1];
                intStart = int(hlx[2]);
                intEnd = int(hlx[3]);
                strTop += chrBefore * (intStart - len(strTop) - 1);
                strTop += 'M' * (intEnd - intStart + 1);
            strTop += strEnc[-1] * (intTot - len(strTop));
        elif ( re.match('^[Ii]+$', strEnc) or re.match('^[Oo]+$', strEnc) ) :
            strTop = strEnc.lower();
        else :
            # scream loudly
            raise Exception("Faulty topology encoding: '%s'" % strEnc);
        return strTop;

    def _parseLine(self, s) :
        lstSplit = s.split();
        enc = lstSplit[0];
        length = int(lstSplit[1]);
        title = " ".join(lstSplit[2:]);
        return title, enc, length;
        
    def generate(self) :
        flhScampi = file(self.strScampi, 'r');
        for strLine in flhScampi :
            strTitle, strTopoEnc, intLength = self._parseLine(strLine);
            strTop = self._deenc(strTopoEnc, intLength);
            yield strTitle, strTop, intLength;
        flhScampi.close();

    def get(self, strQuery) :
        flhScampi = file(self.strScampi, 'r');
        for strLine in flhScampi :
            if strQuery in " ".join(strLine.split()[2:]) :
                strTitle, strTopoEnc, intLength = self._parseLine(strLine);
                strTop = self._deenc(strTopoEnc, intLength);
                flhScampi.close();
                return strTitle, strTop, intLength;
        flhScampi.close();
        raise Exception("Query '%s' got no hits in '%s'" % (strQuery, self.strScampi));

        
        


if __name__ == '__main__' :
    # Check $argv
    strUsage = "Usage: %s <scampi result file> [query]";
    if ( len(sys.argv) < len(strUsage.split('<')) ) :
        print strUsage % os.path.basename(sys.argv[0]);
        sys.exit(1);

    strFile = sys.argv[1];

    strQ = '';
    if ( len(sys.argv) > 2 ) :
        strQ = sys.argv[2];
    
    p = ScampiParser(strFile);

    if ( strQ == '' ) :
        for title, top, length in p.generate() :
            print ">%s" % title;
            print top;
    else :
        title, top, length = p.get(strQ);
        print ">%s" % title;
        print top;

