#!/usr/local/bin/perl

if(@ARGV<6) {
  print "run_smap.pl -i templChain (-l templList) -d dbList -o outdir -p pvalue_cutoff\n";
  exit(1);
}

my $pvalue = 0.01;
my($i);
for($i=0; $i<@ARGV; $i++) {
  if($ARGV[$i] eq "-i") {
    $templChain = $ARGV[$i+1];
  }
  elsif($ARGV[$i] eq "-l") {
    $templList = $ARGV[$i+1];
  }
  elsif($ARGV[$i] eq "-d") {
    $dbList = $ARGV[$i+1];
  }
  elsif($ARGV[$i] eq "-o") {
    $outdir = $ARGV[$i+1];
  }
  elsif($ARGV[$i] eq "-p") {
    $pvalue = $ARGV[$i+1];
  }
}

if($templChain eq "" && $templList eq "") {
  print "please specify template chain or list file.\n";
  exit(1);
}
if($dbList eq "") {
  print "please specify database list file.\n";
  exit(1);
}
if($outdir eq "") {
  print "please specify output directory.\n";
  exit(1);
}

unless(-e $outdir) {
  system("mkdir $outdir");
}

my(%dbChains, %templChains);
if($templList ne "") {
  &get_chains($templList, \%templChains);
}
elsif($templChain ne "") {
  $templChains{$templChain} = $templChain; 
}

&get_chains($dbList, \%dbChains);

foreach $templChain (sort keys %templChains) {
  print STDERR "template = $templChain\n";
  foreach $dbChain (sort keys %dbChains) {
    print STDERR "   query = $dbChain\n";
    $output = $outdir."/".$dbChain."-".$templChain.".smap";
    system(`sh -c "smap_comp $templChain $dbChain $output"`);
  }
  &sort_smap($outdir, $templChain, 0, $pvalue);
}

sub sort_smap
{
    my($dir,$templ,$sortby,$pvthre) = @_;
    opendir(DIR, "$dir") or die "Cannot find $dir.\n";
    my @files = grep(/$templ\.smap/, readdir(DIR));
    closedir(DIR);

    foreach $file (@files) {
      my $path = $dir."/".$file;
      my($hit,$score,$pvalue) = &read_smap($path);
      if($score<1000.0 && $pvalue<$pvthre) {
	if($sortby eq 0) {
	  $hits{$hit} = $pvalue;
	}
	else {
	  $hits{$hit} = $score;
	}
      }
    }

    my $out = $templ."\_sorted\.smap";
    open(OUT, ">$out");
    if($sortby eq 0) {
      foreach $hit (sort {$hits{$a}<=>$hits{$b}} keys %hits) {
	print out "$hit";
      }
    }
    else {
      foreach $hit (sort {$hits{$b}<=>$hits{$a}} keys %hits) {
	print out "$hit";
      }
    }
    close(OUT);
}

sub read_smap
{
  my($in) = @_;

  my $annot="";
  my $query="unknown";
  my $pvalue=1000.0;
  my $score=-1000.0;
  my $tanicoef=1000.0;
  my $rmsd=1000.0;
  open(IN, "<$in") or die "Cannot open $in.\n";
  while(<IN>) {
    chomp($_);
    if(/\>Query\s+Chain\:\s+(\S+)/) {
      $query = $1;
    }
    elsif(/P\-Value\s*\=\s*(\S+)\s+Raw\s+Score\s+\=\s+(\S+)\s+Tanimoto\s+Coeff\s+\=\s+(\S+)\s+RMSD\s+\=\s+(\S+)/) {
      $pvalue = $1;
      $score = $2;
      $tanicoef = $3;
      $rmsd = $4;
    }
    elsif(/^\S+\s+\|(.*)\|\s+/) {
      $annot = $1;
    }

  }
  close(IN);

  if($annot eq "" || $annot eq "null") {
    $annot = "unknown                                                     ";
  }
  elsif(length($annot)>60) {
    $annot = substr($annot, 0, 60);
  }
  elsif(length($annot)<60) {
    $append = "";
    for($i=0; $i<60-length($annot); $i++) {
      $append .= " ";
    }
    $annot .= $append;
  }

  #print "$in: $query\t$pvalue\t$score\n";
  my $hit = $query."\t".$annot."\t\t".$pvalue."\t".$score."\t".$tanicoef."\t".$rmsd."\n";
  #print "$in\t$hit\n";
  return($hit,$score,$pvalue);
}

sub get_chains()
{
  my($in, $chains_ref) = @_;
  open(IN, "<$in") or die "Cannot open $in.\n";
  while(<IN>) {
    $chain = "";
    if(/^(\S+)\s+(\S+)/) {
	$chain = $1."_".$2;
    }
    elsif(/^(\S+\_\S+)/) {
	$chain = $1;
    }  

    if($chain ne "") {
      $chains_ref->{$chain} = $chain;
    }
  }
  close(IN);
}
