#!/usr/bin/perl -w

(($infile, $fastadir)=@ARGV) || die("Syntax: res2compacttopo.pl infile fastadir\n");
open(IN,$infile) || die("Could not open file $infile\n");
<IN>;<IN>;
$new=1; $topo="";
$TM=1; # No filtering of non-TM (SCAMPI's opinion) proteins
while(<IN>) {
    chomp;
    if ($new) {
	next if (/^$/);
	$seq_file=$_;
	$new=0;
    } elsif (/^Seq\slength:\s(\d+)/) {
	$seq_length=$1;
    } elsif (/^((Is)|(No))\sTM\sprotein/) {
    } elsif (/^Labeling:/) {
    } elsif (/^$/) {
	print_compacttopo($seq_file, $seq_length, $TM, $topo, $fastadir);
	$new=1;
	$seq_file="";
	$seq_length="";
	$topo="";
    } else {
	$topo .= $_;
    }
}
close(IN);


sub print_compacttopo {
    my ($seq_file, $seq_length, $TM, $topo, $fastadir) = @_;
    if ($TM) {
	($seq_length == length($topo)) || die("$seq_file $seq_length " . $topo . " ");
	$topo =~ tr/[A-Z]/[a-z]/;
	$topo =~ s/e/m/g;
	$topo =~ s/l/m/g;
	$topo =~ s/j/i/g;
	unless (($topo =~ /^o+$/) || ($topo =~ /^i+$/)) {
	    my $seq_name = `head -n 1 $fastadir/$seq_file`;
	    chomp($seq_name);
	    $seq_name =~ s/^>//;
	    @loops = split(/m+/,$topo);
	    @TMs = split(/[io]+/,$topo);
	    shift(@TMs);
	    $current_pos=0;
	    $compact_topo = "";
	    while($loop = shift(@loops)) {
		if ($TM = shift(@TMs)) {
		    $io = substr($loop,0,1);
		    $ll = $current_pos + length($loop) + 1;
		    $ul = $ll + length($TM) - 1;
		    $compact_topo .= $io . $ll . "-" . $ul;
		    $current_pos=$ul;
		} else {
		    $compact_topo .= substr($loop,0,1);
		}
	    }
	    print "$compact_topo $seq_length $seq_name\n";
	}
	else {
	    my $seq_name = `head -n 1 $fastadir/$seq_file`;
	    chomp($seq_name);
	    $seq_name =~ s/^>//;
	    print "$topo $seq_length $seq_name\n";
	}
    }
}

		    
	
