#!/usr/bin/perl -w
#Author: Feng Cheng
use warnings;
use strict;

my ($alength,$blength,$ratio,$dirw) = ($ARGV[0],$ARGV[1],$ARGV[2],$ARGV[3]);
#selected genes matching analysis with best hit
#---Blastp_Best_Bone----------
my $in1 = "$dirw/temp/a2b.all_delTandem_bestHit";
open(FR1,$in1);
my %aBESTb = ();
while(defined($_=<FR1>)) {
  /^([^\t]+)\t([^\t]+)\t/;
  $aBESTb{$1} = $2;
}
close(FR1);
#-------------------------

#---order the tair gene---
my $in2 = "$dirw/temp/b_gene_sorted_delTandem";
open(FR2,$in2);
my $bindex = 0;
my (%bGene2index,%bIndex2gene,%bIndex2chr,%bGene2pos);
while(defined($_=<FR2>)) {
  $bindex += 1;
  /^([^\t]+)\t([^\t]+)\t/;
  my $bgene = $1;
  my $bchr = $2;
  /^[^\t]+\t([^\t]+\t[^\t]+\t[^\t]+)\t/;
  my $pos = $1;
  $bGene2index{$bgene} = $bindex;
  $bIndex2gene{$bindex} = $bgene;
  $bIndex2chr{$bindex} = $bchr;
  $bGene2pos{$bgene} = $pos;
}
close(FR2);
#-------------------------

#---order the rapa gene---
my $in3 = "$dirw/temp/a_gene_sorted_delTandem";
open(FR3,$in3);
my $aindex = 0;
my (%aGene2index,%aIndex2gene,%aIndex2chr);
while(defined($_=<FR3>)) {
  $aindex += 1;
  /^([^\t]+)\t([^\t]+)\t/;
  my $agene = $1;
  my $achr = $2;
  $aGene2index{$agene} = $aindex;
  $aIndex2gene{$aindex} = $agene;
  $aIndex2chr{$aindex} = $achr;
}
close(FR3);
#-------------------------
unlink($in1);
#unlink($in2);
#unlink($in3);

my $out1 = "$dirw/temp/a2b_colinear_orthologous";
open(FW1,">$out1");
my $out2 = "$dirw/temp/a2b_colinear_orthologous.rest";
open(FW2,">$out2");
my $out3 = "$dirw/temp/a2b_colinear_orthologous.temp";
open(FW3,">$out3");

my $in4 = "$dirw/temp/a2b.all_delTandem_pos.filter";
open(FR4,$in4);
my @input = <FR4>;
my $num = @input;
close(FR4);

my ($i,$j,$k,$left,$right,$chrA,$leftPos,$rightPos,$mclFlag);
my $chr = "XX";
my $bFlag = 0;

for($i=0;$i<$num;$i+=1) {
  my $temp = $input[$i];
  $temp =~ /^([^\t]+)\t([^\t]+)\t[^\t]+\t[^\t]+\t([^\t]+)\t/;
  my $aGene = $1;
  $chrA = $2;
  my $bGene = $3;

  if(exists($bGene2index{$bGene})) {
    $bFlag = 1;
  }
  else {
    $bFlag = 0;
  }

  #--locate tair flanking genes----------------------
  if($bFlag==1) {
    my $bbindex = $bGene2index{$bGene};
    my $bLeftPos = $bbindex;
    while($bLeftPos>1 && $bbindex-$bLeftPos<$blength && $bIndex2chr{$bLeftPos-1} eq $bIndex2chr{$bLeftPos}) {
      $bLeftPos -= 1;
    }
    my $bLeft = $bbindex - $bLeftPos;

    my $bRightPos = $bbindex;
    while($bRightPos<$bindex && $bRightPos-$bbindex<$blength && $bIndex2chr{$bRightPos+1} eq $bIndex2chr{$bRightPos}) {
      $bRightPos += 1;
    }
    my $bRight = $bRightPos - $bbindex;
    #--------------------------------------------------

    #--Best hit or not------------------------------
    if(exists($aBESTb{$aGene})) {
      if($aBESTb{$aGene} ne $bGene) {
        my $stemp = $temp;
        chomp($stemp);
        print FW3 "$stemp\t$aBESTb{$aGene}\tBLASTP X Best_Hit\n";
        $mclFlag = "BLASP+FLANK(-Best_Hit)";
      }
      else {
        $mclFlag = "Best_Hit";
      }
    }
    else {
      $mclFlag = "BLASP+FLANK";
    }

    if(1) { #--locate tair flanking genes------------------  set some restrictions here?
        my $aaindex = $aGene2index{$aGene};
        my $aLeftPos = $aaindex;
        while($aLeftPos>1 && $aaindex-$aLeftPos<$alength && $aIndex2chr{$aLeftPos-1} eq $aIndex2chr{$aLeftPos}) {
          $aLeftPos -= 1;
        }
        my $aLeft = $aaindex - $aLeftPos;

        my $aRightPos = $aaindex;
        while($aRightPos<$aindex && $aRightPos-$aaindex<$alength && $aIndex2chr{$aRightPos+1} eq $aIndex2chr{$aRightPos}) {
          $aRightPos += 1;
        }
        my $aRight = $aRightPos - $aaindex;
        #----------------------------------------------

        #--flanking match to tair ---------------------
        my ($countL,$countLL,$countLR,$countR,$countRR,$countRL) = (0,0,0,0,0,0);
        my ($tempflag,$orientM,$orient) = (1,0,'N');

        for($j=$aaindex-1;$j>=$aLeftPos&&$tempflag==1;$j-=1) {
          if(exists($aBESTb{$aIndex2gene{$j}})) {
            if(exists($bGene2index{$aBESTb{$aIndex2gene{$j}}})) {
              my $temp1 = $bGene2index{$aBESTb{$aIndex2gene{$j}}};
              my $temp2 = $temp1 - $bbindex;

              if($temp2>$bRight || $temp2<-$bLeft) {
                #$tempflag = 0;
              }
              elsif($temp2<0) {
                $countLL += 1;
              }
              elsif($temp2>0) {
                $countLR += 1;
              }
            }
          }
        }

        $tempflag = 1;
        for($j=$aaindex+1;$j<=$aRightPos&&$tempflag==1;$j+=1) {
          if(exists($aBESTb{$aIndex2gene{$j}})) {
            if(exists($bGene2index{$aBESTb{$aIndex2gene{$j}}})) {
              my $temp3 = $bGene2index{$aBESTb{$aIndex2gene{$j}}};
              my $temp4 = $temp3 - $bbindex;

              if($temp4>$bRight || $temp4<-$bLeft) {
                #$tempflag = 0;
              }
              elsif($temp4>0) {
                $countRR += 1;
              }
              elsif($temp4<0) {
                $countRL += 1;
              }
            }
          }
        }
        #----------------------------------------------

        #--set orientation-----------------------------
        if(($countLL>=$countLR && $countRR>=$countRL) || $countLL+$countRR>=$countLR+$countRL) {
          $countL = $countLL;
          $countR = $countRR;
          $orient = "+";
        }
        else {
          $countL = $countLR;
          $countR = $countRL;
          $orient = "-";
        }
        #----------------------------------------------


      #--output--------------------------------------
      if($aLeft+$aRight!=0 && ($countL+$countR)/($aLeft+$aRight)>=$ratio) {
        chomp($temp);
        print FW1 "$temp\t$countL\|$aLeft\t$countR\|$aRight\t$orient\t$mclFlag\n";
      }
      else {
        if($aLeft+$aRight==0) {
          #print "$aGene\n";
        }
        chomp($temp);
        print FW2 "$temp\t$countL\|$aLeft\t$countR\|$aRight\t$orient\t$mclFlag\n";
      }
      #----------------------------------------------
    }
  }
}
close(FW1);
close(FW2);
close(FW3);
unlink($out2);
unlink($out3);
unlink($in4);

