#!/usr/bin/perl
=head2 SYNOPSIS

	Description: Taking care of the multiple aligned reads.
				 1. Categorize the aligned fragments into different types according to their mapping status.
				 2. Assign the cross-mapping fragments to one subgenome and then quantify them.
				 3. Do Step#2 for another subgenome.
				 4. Equally assign the additional read counts (obtained from the cross-mapping reads) to the syntenic gene pairs in each subgenome
				 5. Quantify the uniquely mapped reads for each subgenome.
				 6. Add the additional read counts to the uniquely mapped read counts, and then call the TPM.
             
=head2 AUTHOR

	Kang Zhang  Email:  demut@foxmail.com
	
=cut

##Parameters:
#Thread number:
$threads=20; # split the bam files into Ns, and parallelly run them.
#Temp output dir:
$dir="./split";
$prefix=$ARGV[0];  # e.g. : "align_SampleName_vsCxJ";
$refGFF="merged_Brapa_Chiifuv3.1_Bol_JZS.gff";
$mutualSynOrth="mutual_0_vs_2_rmTandem_rm.synteny"; # the syntenic gene pairs info
#Notice: Please make sure that the genome 2 mark (Here is _JZS) is correct in processSAMtoAssignReads_simpleSplit.pl and in processSAMtoAssignReads_focus.pl.

open FINALBASH,">runProcessSAMtoAssignReads.sh";
	
system("picard SplitSamByNumberOfReads I=$prefix.bam N_FILES=$threads O=$dir OUT_PREFIX=$prefix VALIDATION_STRINGENCY=LENIENT");
for(my $i=1;$i<=$threads;$i++){
	my $count= sprintf "%04d",$i;
	my $bamfile=$dir."/$prefix"."_$count.bam";
	if(! -e $bamfile){
		print STDERR "Error: Can't find the $bamfile\n";
		next;
	}
	my $cmd="samtools view -h $bamfile|~/scripts/processSAMtoAssignReads_simpleSplit.pl - $prefix"."_$count 2>log_$prefix"."_$count";
	open BASH,">runProcessSAMtoAssignReads_$count.sh";
	print BASH "$cmd\n";
	close BASH;
	system("chmod 774 runProcessSAMtoAssignReads_$count.sh");
	my $bashcmd="nohup sh ./runProcessSAMtoAssignReads_$count.sh &\n";
	print FINALBASH "$bashcmd";
}
system ("sh ./runProcessSAMtoAssignReads.sh ");


@statusArray=readpipe("ps axu|grep zhangk|grep processSAMtoAssignReads|grep -v grep");
while($#statusArray >= 0){
	sleep(120);
	@statusArray=readpipe("ps axu|grep zhangk|grep processSAMtoAssignReads|grep -v grep");
}

sleep(150);
#picard MergeSamFiles I=input.sam O=out.bam SO=unsorted AS=true USE_THREADING=true 
my @type=("typeI","typeII","typeIII","typeIV","typeV","typeVI","typeVII");
foreach my $ty (@type){
	if($ty ne "unchanged"){open TMPRAW,">$prefix"."_$ty"."_raw_file.fofn";}
	for(my $i=1; $i <=$threads;$i++){
		my $count= sprintf "%04d",$i;
		if($ty ne "unchanged"){print TMPRAW "$prefix"."_$count"."_$ty"."_raw.sam\n"}
	}
	if($ty ne "unchanged"){close TMPRAW;}
	print STDERR "Proccessing the $prefix"."_$ty\n";
	if($ty ne "unchanged"){
		print STDERR "Proccessing the $prefix"."_$ty"."_raw\n";
		system ("samtools merge -@ 10 -n -b $prefix"."_$ty"."_raw_file.fofn $prefix"."_$ty"."_raw_m.bam");
	}
}


#接下来对于每一种type的raw bam 文件，重新处理两次，分别归于两个基因组


#Type II
my $command="samtools view -h $prefix"."_typeII_raw_m.bam |~/scripts/processSAMtoAssignReads_focus.pl - TypeII 1 >$prefix"."_typeII_raw_m_assign1.sam";
system ($command);
$command="samtools view -h $prefix"."_typeII_raw_m.bam |~/scripts/processSAMtoAssignReads_focus.pl - TypeII 2 >$prefix"."_typeII_raw_m_assign2.sam";
system ($command);

#TypeV
$command="samtools view -h $prefix"."_typeV_raw_m.bam |~/scripts/processSAMtoAssignReads_focus.pl - TypeV 1 >$prefix"."_typeV_raw_m_assign1.sam";
system ($command);
$command="samtools view -h $prefix"."_typeV_raw_m.bam |~/scripts/processSAMtoAssignReads_focus.pl - TypeV 2 >$prefix"."_typeV_raw_m_assign2.sam";
system ($command);

#get the read counts:
$command="featureCounts -g gene_id -p -D 1000 -T 10 -Q 5 -R CORE -a $refGFF -o $prefix"."_typeV_raw_m_assign1_count.txt $prefix"."_typeV_raw_m_assign1.sam";
system ($command);
$command="featureCounts -g gene_id -p -D 1000 -T 10 -Q 5 -R CORE -a $refGFF -o $prefix"."_typeV_raw_m_assign2_count.txt $prefix"."_typeV_raw_m_assign2.sam";
system ($command);

$command="featureCounts -g gene_id -p -D 1000 -T 10 -Q 5 -R CORE -a $refGFF -o $prefix"."_typeII_raw_m_assign1_count.txt $prefix"."_typeII_raw_m_assign1.sam";
system ($command);
$command="featureCounts -g gene_id -p -D 1000 -T 10 -Q 5 -R CORE -a $refGFF -o $prefix"."_typeII_raw_m_assign2_count.txt $prefix"."_typeII_raw_m_assign2.sam";
system ($command);

#merge the data:
$command="cat $prefix"."_typeII_raw_m_assign1.sam.featureCounts $prefix"."_typeV_raw_m_assign1.sam.featureCounts >$prefix"."_m_typeII_V_raw_m_assign1.sam.featureCounts";
system ($command);
$command="cat $prefix"."_typeII_raw_m_assign2.sam.featureCounts $prefix"."_typeV_raw_m_assign2.sam.featureCounts >$prefix"."_m_typeII_V_raw_m_assign2.sam.featureCounts";
system ($command);


## call the read counts for normal data:
$command="featureCounts -g gene_id -p -D 1000 -T 10 -Q 5 -a $refGFF -o $prefix"."_count.txt $prefix.bam";
system ($command);
## get the mutual ortholog counts:
$command="~/scripts/checkOrthPairReadMapping.pl $mutualSynOrth $prefix"."_m_typeII_V_raw_m_assign1.sam.featureCounts $prefix"."_m_typeII_V_raw_m_assign2.sam.featureCounts >$prefix"."_crossMap_info_mutual.txt";
system ($command);
## add them to the normal data:
$command="~/scripts/addSeperatedReadCounts.pl $prefix"."_count.txt $prefix"."_crossMap_info_mutual.txt >$prefix"."_count_add_mutual.txt";
system ($command);
## call TPM
$command="~/scripts/calTPMforLengthCount.pl $prefix"."_count_add_mutual.txt >$prefix"."_count_add_mutual_tpm.txt";
system ($command);

#cleanup
$command="rm runProcessSAMtoAssignReads_*.sh";
system ($command);
$command="rm *fofn";
system ($command);
