#!/usr/bin/perl -w


#program to do online codon-preserved alignments tool;
#modified from ocpat.pl on June 20, 2007 # Munirul Islam@
#modified from ocpat.pl on Nov 2, 2006 #Guozhen Liu@ 


use Getopt::Long;

GetOptions(
           "RefSeq_IDs=s" => \$geneList,
           "outfile=s" => \$outfile,
           "getPan_troglodytes_Genome=i" => \$getPan_troglodytes_Genome,
           "getMacaca_mulatta_Genome=i" => \$getMacaca_mulatta_Genome,
           "getMus_musculus_Genome=i" => \$getMus_musculus_Genome,
           "getRattus_norvegicus_Genome=i" => \$getRattus_norvegicus_Genome,
           "getOryctolagus_cuniculus_Genome=i" => \$getOryctolagus_cuniculus_Genome,
           "getCanis_familiaris_Genome=i" => \$getCanis_familiaris_Genome,
           "getBos_taurus_Genome=i" => \$getBos_taurus_Genome,
           "getDasypus_novemcinctus_Genome=i" => \$getDasypus_novemcinctus_Genome,
           "getLoxodonta_africana_Genome=i" => \$getLoxodonta_africana_Genome,
           "getEchinops_telfairi_Genome=i" => \$getEchinops_telfairi_Genome,
           "getMonodelphis_domestica_Genome=i" => \$getMonodelphis_domestica_Genome,
           "getGallus_gallus_Genome=i" => \$getGallus_gallus_Genome,
           "getXenopus_tropicalis_Genome=i" => \$getXenopus_tropicalis_Genome,
           "getOrnithorhynchus_anatinus_Genome=i" => \$getOrnithorhynchus_anatinus_Genome
);

#=DBG
#
#$geneList=shift;  # "ETCgene.txt";
#$outfile=shift;
#$getPan_troglodytes_Genome=1;
#$getMacaca_mulatta_Genome=1;
#$getMus_musculus_Genome=1;
#$getRattus_norvegicus_Genome=1;
#$getOryctolagus_cuniculus_Genome=1;
#$getCanis_familiaris_Genome=1;
#$getBos_taurus_Genome=1;
#$getDasypus_novemcinctus_Genome=1;
#$getLoxodonta_africana_Genome=1;
#$getEchinops_telfairi_Genome=1;
#$getMonodelphis_domestica_Genome=1;
#$getGallus_gallus_Genome=1;
#$getXenopus_tropicalis_Genome=1;
#$getOrnithorhynchus_anatinus_Genome=1;
#
#=cut           
#           
           
$preresult="CDSblast.raw.".$$;
$Stringency=0.00001;
$BITSCORE_THRESHOLD=140; #200;
$SIMILARITY_THRESHOLD=20; #35; ##Nov 2 modified from 20;
$IDENTICAL_THRESHOLD=2.2;  ##Oct27 -2006 change from 2.2;
#$ALIGN_LENGTH_THRESHOLD=0.1;

#genomic file location:
$OCPAT_DATA       ="data/";
$human_RNA        ="$OCPAT_DATA/humanRefSeq/human.rna.fna";
$rna_geneBank     ="$OCPAT_DATA/humanRefSeq/human.rna.gbff";
$human_RNA_DB     ="$OCPAT_DATA/humanRefSeq/humanRNA";

=FORFUTURE
#$chimp_RNA_DB     ="\"$OCPAT_DATA/chimp/ensemblChimpCDNA $OCPAT_DATA/chimp/chimpRefSeqRNA $OCPAT_DATA/chimp/nrRNA\"";
#$macaca_RNA_DB    ="\"$OCPAT_DATA/macaca/ensemblMacacaCDNA $OCPAT_DATA/macaca/macacaRefSeqRNA $OCPAT_DATA/macaca/nrRNA\"";
#$mouse_RNA_DB     ="\"$OCPAT_DATA/mouseRefSeq/ensemblMouseCDNA $OCPAT_DATA/mouseRefSeq/mouseRefSeqRNA $OCPAT_DATA/mouseRefSeq/nrRNA\"";
#$rat_RNA_DB       ="\"$OCPAT_DATA/ratRefSeq/ensemblRatCDNA $OCPAT_DATA/ratRefSeq/ratRefSeqRNA $OCPAT_DATA/ratRefSeq/nrRNA\"";
#$rabbit_RNA_DB    ="\"$OCPAT_DATA/rabbit/ensemblRabbitCDNA $OCPAT_DATA/rabbit/rabbitRefSeqRNA $OCPAT_DATA/rabbit/nrRNA\"";
#$dog_RNA_DB       ="\"$OCPAT_DATA/dog/ensemblDogCDNA $OCPAT_DATA/dog/dogRefSeqRNA $OCPAT_DATA/dog/nrRNA\"";
#$cow_RNA_DB       ="\"$OCPAT_DATA/cow/ensemblCowCDNA $OCPAT_DATA/cow/cowRefSeqRNA $OCPAT_DATA/cow/nrRNA\"";
#$armadillo_RNA_DB ="\"$OCPAT_DATA/armadillo/ensemblArmadilloCDNA $OCPAT_DATA/armadillo/armadilloRefSeqRNA $OCPAT_DATA/armadillo/nrRNA\"";
#$elephant_RNA_DB  ="\"$OCPAT_DATA/elephant/ensemblElephantCDNA $OCPAT_DATA/elephant/elephantRefSeqRNA $OCPAT_DATA/elephant/nrRNA\"";
#$tenrec_RNA_DB    ="\"$OCPAT_DATA/tenrec/ensemblTenrecCDNA $OCPAT_DATA/tenrec/tenrecRefSeqRNA $OCPAT_DATA/tenrec/nrRNA\"";
#$opossum_RNA_DB   ="\"$OCPAT_DATA/opossum/ensemblOpossumCDNA $OCPAT_DATA/opossum/opossumRefSeqRNA $OCPAT_DATA/opossum/nrRNA\"";
#$chicken_RNA_DB   ="\"$OCPAT_DATA/chicken/ensemblChickenCDNA $OCPAT_DATA/chicken/chickenRefSeqRNA $OCPAT_DATA/chicken/nrRNA\"";
#$xenopus_RNA_DB   ="\"$OCPAT_DATA/xenopus/ensemblFrogCDNA $OCPAT_DATA/xenopus/frogRefSeqRNA $OCPAT_DATA/xenopus/nrRNA\"";
#$platypus_RNA_DB  ="\"$OCPAT_DATA/platypus/ensemblPlatypusCDNA $OCPAT_DATA/platypus/platypusRefSeqRNA $OCPAT_DATA/platypus/nrRNA\"";
=cut

$chimp_RNA_DB     ="\"$OCPAT_DATA/chimp/ensemblChimpCDNA $OCPAT_DATA/chimp/nrRNA\"";
$macaca_RNA_DB    ="\"$OCPAT_DATA/macaca/ensemblMacaccaCDNA $OCPAT_DATA/macaca/nrRNA\"";
$mouse_RNA_DB     ="\"$OCPAT_DATA/mouseRefSeq/ensemblMouseCDNA $OCPAT_DATA/mouseRefSeq/nrRNA\"";
$rat_RNA_DB       ="\"$OCPAT_DATA/ratRefSeq/ensemblRatCDNA $OCPAT_DATA/ratRefSeq/nrRNA\"";
$rabbit_RNA_DB    ="\"$OCPAT_DATA/rabbit/ensemblRabbitCDNA $OCPAT_DATA/rabbit/nrRNA\"";
$dog_RNA_DB       ="\"$OCPAT_DATA/dog/ensemblDogCDNA $OCPAT_DATA/dog/nrRNA\"";
$cow_RNA_DB       ="\"$OCPAT_DATA/cow/ensemblCowCDNA $OCPAT_DATA/cow/nrRNA\"";
$armadillo_RNA_DB ="\"$OCPAT_DATA/armadillo/ensemblArmadilloCDNA $OCPAT_DATA/armadillo/nrRNA\"";
$elephant_RNA_DB  ="\"$OCPAT_DATA/elephant/ensemblElephantCDNA $OCPAT_DATA/elephant/nrRNA\"";
$tenrec_RNA_DB    ="\"$OCPAT_DATA/tenrec/ensemblTenrecCDNA $OCPAT_DATA/tenrec/nrRNA\"";
$opossum_RNA_DB   ="\"$OCPAT_DATA/opossum/ensemblOpossumCDNA $OCPAT_DATA/opossum/nrRNA\"";
$chicken_RNA_DB   ="\"$OCPAT_DATA/chicken/ensemblChickenCDNA $OCPAT_DATA/chicken/nrRNA\"";
$xenopus_RNA_DB   ="\"$OCPAT_DATA/xenopus/ensemblFrogCDNA $OCPAT_DATA/xenopus/nrRNA\"";
$platypus_RNA_DB  ="\"$OCPAT_DATA/platypus/ensemblPlatypusCDNA $OCPAT_DATA/platypus/nrRNA\"";


my $macaca_RNA;
my $chimp_RNA;
my $mouse_RNA;
my $rat_RNA;
my $dog_RNA;
my $cow_RNA;
my $opossum_RNA;
my $chicken_RNA;
my $xenopus_RNA;
my $rabbit_RNA;
my $elephant_RNA;
my $tenrec_RNA;
my $armadillo_RNA;
my $platypus_RNA;

my %align_seq;
$refSeq_id="";
$ALIGNED_NUCS=0;
$HUMAN_CDS_LEN=0;
my %input_id=();

my (%homo2human, %homo2rat, %homo2mouse, %homo2chimp, %homo2macaca, %homo2dog, %homo2cow, %homo2chicken, 
    %homo2opossum, %homo2xenopus, %homo2rabbit, %homo2elephant, %homo2tenrec, %homo2armadillo, %homo2platypus);
    
%homo2human=%homo2rat=%homo2mouse=%homo2chimp=%homo2macaca=%homo2dog=%homo2cow=%homo2platypus=();
%homo2chicken=%homo2opossum=%homo2xenopus=%homo2rabbit=%homo2elephant=%homo2tenrec=%homo2armadillo=();

my $tmpInputLine;
my @webinputfile;
       	     
#  & parseHomoloGene(); #will be implemented in a later version

#create a folder to hold the temporary tree files:

#fetch the human promoter sequences, include both the masked and the unmasked 1kb.
#using the masked promoter to fishout the Chimp promoter. 
open(WEBINPUT, $geneList)||die "Can't open the input file geneList=$geneList: $!\n";
$/ ="\n";
@webinputfile=<WEBINPUT>;
close(WEBINPUT);

$file_type_flag=0;
#determine the file type:
if ( $webinputfile[0] =~ /^\s*[NX]M_\d+/ ) {
	         print STDERR "Input file is RefSeq gene ID.\n";
	         $file_type_flag=1;
} elsif ( $webinputfile[0] =~ /^>/ ) {
        	 print STDERR "Input file is fasta formated sequences.\n";
        	 $file_type_flag=2;
} else {
        	 print STDERR "Input file is neither the RefSeq gene IDs nor the fasta formated sequences.\n";
        	 exit(1);
}

$TMPFOLDER="TMP_OCPATWORKINFFOLDER";
if ( -d $TMPFOLDER ) {
}else {
   	  `mkdir $TMPFOLDER `;
}  
        
open(LOGOPT, ">$outfile")||die "Can't open the output file $!\n";
$tmpGoldenRNA       ="$TMPFOLDER/tmpGoldenRNA.".$$;   

$processing_progress=0;
if (   $file_type_flag==1 ) {
	    foreach $tmpInputLine ( @webinputfile )  {
           if ( $tmpInputLine =~ /([NX]M_\d+)/ ) {
	              $refSeq_id = $1; 
	              %align_seq=();
	              $processing_progress +=1;
	              print STDERR "Processing the Gene ID number $processing_progress...\n";
                main();     
           } else {
        	      print STDERR "$_ is not an effective RefSeq gene ID.\n";
           }
      }
}  

if ( $file_type_flag == 2 ) {     
      open(WEBINPUT, $geneList)||die "Can't open the input file geneList=$geneList: $!\n";
      #Because we are reading FASTA files, we will assign "$/" to equal ">".
      $/ = ">";
      @inputSequenceEntries = <WEBINPUT>;
      close(WEBINPUT);
      $inputSequence_cnt=@inputSequenceEntries;
      
      for ( $i_inputSequence_cnt =1; $i_inputSequence_cnt < $inputSequence_cnt; $i_inputSequence_cnt ++ ) {
      	  print STDERR "Processing the Fasta Formated sequence number $i_inputSequence_cnt...\n";
          $this_sequenceEntry = $inputSequenceEntries[$i_inputSequence_cnt];
          if ( $this_sequenceEntry eq ">"){
      	        next;
          }
          $this_sequenceTitle = "";
          if ( $this_sequenceEntry =~ m/([^\n]+)/){
      	       $this_sequenceTitle = $1;
          }else {
      	       $this_sequenceTitle = "No title was found!";
          }
      
          $this_sequenceEntry =~ s/[^\n]+//;
#          $sequenceEntry =~ s/[^GATCNgatcn]//g;
          $this_sequenceEntry =~ s/\W//g;
#          $sequenceEntry =~ s/>//g;
          
          open(GOLDENRNA, ">$tmpGoldenRNA")||die "Can't open the output file $!\n";
          print GOLDENRNA ">$this_sequenceTitle\n";
          print GOLDENRNA "$this_sequenceEntry\n";
          close(GOLDENRNA);
          $refSeq_id="Seq".$i_inputSequence_cnt;
          %align_seq=();
                
          main();  
          $refSeq_id="";
             
      }
      
}


#######################################################

close(LOGOPT);
`rm -rf $TMPFOLDER CDSblast.raw*  *.dnd `;


sub main  {
        $tmpFasRNA          ="$TMPFOLDER/tmpFasRNA.".$$;
        $tmpFasPEP          ="$TMPFOLDER/tmpFasPEP.".$$;
####***************************************************************        
        $tmpHumanRNA        ="$TMPFOLDER/tmpHumanRNA.$refSeq_id.".$$;
        
        $tmpHumanCDS        ="$TMPFOLDER/tmpHumanCDS.$refSeq_id.".$$;
        $tmpMouseCDS        ="$TMPFOLDER/tmpMouseCDS.$refSeq_id.".$$;
        $tmpChimpCDS        ="$TMPFOLDER/tmpChimpCDS.$refSeq_id.".$$;
        $tmpMacacaCDS      ="$TMPFOLDER/tmpMacacaCDS.$refSeq_id.".$$;
        $tmpRatCDS          ="$TMPFOLDER/tmpRatCDS.$refSeq_id.".$$;
        $tmpDogCDS          ="$TMPFOLDER/tmpDogCDS.$refSeq_id.".$$;
        $tmpCowCDS          ="$TMPFOLDER/tmpCowCDS.$refSeq_id.".$$;
        $tmpOpossumCDS      ="$TMPFOLDER/tmpOpossumCDS.$refSeq_id.".$$;
        $tmpChickenCDS      ="$TMPFOLDER/tmpChickenCDS.$refSeq_id.".$$;
        $tmpXenopusCDS      ="$TMPFOLDER/tmpXenopusCDS.$refSeq_id.".$$;
        $tmpArmadilloCDS    ="$TMPFOLDER/tmpArmadilloCDS.$refSeq_id.".$$;
        $tmpElephantCDS     ="$TMPFOLDER/tmpElephantCDS.$refSeq_id.".$$;
        $tmpRabbitCDS       ="$TMPFOLDER/tmpRabbitCDS.$refSeq_id.".$$;
        $tmpTenrecCDS       ="$TMPFOLDER/tmpTenrecCDS.$refSeq_id.".$$;
        $tmpPlatypusCDS     ="$TMPFOLDER/tmpPlatypusCDS.$refSeq_id.".$$;
#####################################################################      
        $tmpHumanPep        ="$TMPFOLDER/tmpHumanPep.$refSeq_id.".$$;
        $tmpMousePep        ="$TMPFOLDER/tmpMousePep.$refSeq_id.".$$;
        $tmpChimpPep        ="$TMPFOLDER/tmpChimpPep.$refSeq_id.".$$;
        $tmpMacacaPep      ="$TMPFOLDER/tmpMacacaPep.$refSeq_id.".$$;
        $tmpRatPep          ="$TMPFOLDER/tmpRatPep.$refSeq_id.".$$;
        $tmpDogPep          ="$TMPFOLDER/tmpDogPep.$refSeq_id.".$$;
        $tmpCowPep          ="$TMPFOLDER/tmpCowPep.$refSeq_id.".$$;
        $tmpOpossumPep      ="$TMPFOLDER/tmpOpossumPep.$refSeq_id.".$$;
        $tmpChickenPep      ="$TMPFOLDER/tmpChickenPep.$refSeq_id.".$$;
        $tmpXenopusPep      ="$TMPFOLDER/tmpXenopusPep.$refSeq_id.".$$;
        $tmpRabbitPep       ="$TMPFOLDER/tmpRabbitPep.$refSeq_id.".$$;
        $tmpArmadilloPep    ="$TMPFOLDER/tmpArmadilloPep.$refSeq_id.".$$;
        $tmpTenrecPep       ="$TMPFOLDER/tmpTenrecPep.$refSeq_id.".$$;
        $tmpElephantPep     ="$TMPFOLDER/tmpElephantPep.$refSeq_id.".$$;
        $tmpPlatypusPep     ="$TMPFOLDER/tmpPlatypusPep.$refSeq_id.".$$;
###############################################################################        
        $tmp_pep_align      ="$TMPFOLDER/tmp_pep_alignment.".$$;
        
        $tmpMultipleRNA     ="$refSeq_id.RNA.multi";
        $tmpMultiplePEP     ="$refSeq_id.PEP.multi";
        $tmpGoldenPep       ="$refSeq_id.GoldenPep.seq";    
########################################################################
        $tmp_PEP_align      =$tmpMultiplePEP; #="MultiplePEP.$refSeq_id.align";;
        $tmp_PEP_align      =~ s/multi$/nxs/;  #aln/;  
        $tmp_RNA_align      =$tmpMultipleRNA; 
        $tmp_RNA_align      =~ s/multi$/nxs/;
#        $tmp_PAML_align     =$tmp_RNA_align;                ###/

#        $tmp_PAML_align     =~ s/nxs$/paml\.ocpat/;
        $CDSinNEXUS         = $refSeq_id.".CDSalign.nxs";
        $CDSinPAML          = $refSeq_id.".CDS.paml";
##########################################################################  
        $Stringency=0.000001;
        $BITSCORE_THRESHOLD=140;
        $ALIGN_LENGTH_THRESHOLD=0.1;
             
               
        undef %reading_Frame;
        undef %frame_seq;
        $HUMAN_CDS_LEN=0;
        $exist_flag=0;
        $gene_symbol="";
        
        if ( $refSeq_id =~ /[NX]M_\d+/ ) { 
        	   print STDERR "Processing the 1st sequence for human...\n";
       	     $exist_flag= & getFastaByID($refSeq_id, $human_RNA); 
       	     if ( $exist_flag==1 ) {
       	     	    `mv $tmpFasRNA $tmpHumanRNA`;
                   $gold_pep_seq=""; 
                   ($cds_start, $cds_end, $gene_symbol, $gold_pep_seq)= & getCDSposition_pep($refSeq_id, $rna_geneBank); #$id might not be refSeq id
                   print LOGOPT "Human gene $refSeq_id has the following orthologs:\n";
                   print LOGOPT "Gene symbol: $gene_symbol\n";
                   
                   `echo ">Human golden pep" > $tmpGoldenPep `;
                   `echo $gold_pep_seq >> $tmpGoldenPep `;
                   
         #           ` head -1 $tmpHumanRNA > $tmp_PAML_align `;
                   & getCDS($tmpHumanRNA, $cds_start, $cds_end, $tmpHumanCDS);
#                   $HUMAN_CDS_LEN=$cds_end-$cds_start+1;
             #       $tmpHumanCDS will be generated
                   
                   $multiple_log=0;
                   ` cp $tmpHumanCDS $tmpGoldenRNA `;
             }	
        }elsif ( $refSeq_id =~ /^Seq\d+/ ) {
        	   print STDERR "Processing the $i_inputSequence_cnt th fasta formated sequence for human...\n";
       	     $exist_flag = & blastsearch_RefSeq($tmpGoldenRNA, $human_RNA_DB);  
       	     if ( $exist_flag==1 ) {
#       	     	    $refSeq_id = $ct_id;
       	     	    $homo2human{$refSeq_id} = $ct_id;
       	     	    `fastacmd -d $human_RNA_DB -p F -s $ct_id -o $tmpHumanRNA `;
       	     	    $gold_pep_seq=""; 
                  ($cds_start, $cds_end, $gene_symbol, $gold_pep_seq)= & getCDSposition_pep($ct_id, $rna_geneBank);
       	     	    
       	     	    print LOGOPT "Input sequence $i_inputSequence_cnt has human gene $ct_id as ortholog.\n";
                 #  print LOGOPT "Gene symbol: $gene_symbol\n";
       	     	    `echo ">Human golden pep" > $tmpGoldenPep `;
                   `echo $gold_pep_seq >> $tmpGoldenPep `;
                 & getCDS($tmpHumanRNA, $cds_start, $cds_end, $tmpHumanCDS);
       	     }
       	}      
        if ( $exist_flag==0 && $refSeq_id =~ /[NX]M_\d+/ ) {
       		        print STDERR "This id does not have corresponding RefSeq sequence.\n";
        }elsif ( $exist_flag==0 && $refSeq_id =~ /^Seq\d+/ ) {
       		        print STDERR "This input sequence does not have a corresponding human RefSeq sequence.\n";
        }else {
             
###########*******************************************************************************************             
       if ($getPan_troglodytes_Genome==1 ) {
             $exist_flag=0;
             if ( $homo2chimp{$refSeq_id} ) {
             	  $exist_flag = & getFastaByID($homo2chimp{$refSeq_id}, $chimp_RNA); 
             }
             if ( $exist_flag==1 ) {	  
              	  print LOGOPT "Chimp gene $homo2chimp{$refSeq_id} is referenced by homologene cluster.\n";
             } else {
                  & blastsearch_ensembl($tmpGoldenRNA, $chimp_RNA_DB, $gene_symbol);
                   if ( $ct_id eq "null" ) {
                       	 print LOGOPT "A Chimp gene homologous to $refSeq_id can not be identified.\n";
                  } else {
                  	     print LOGOPT "Chimp gene $ct_id with $max_similarity % concordance is pulled out by blast search.\n";
                  	     $homo2chimp{$refSeq_id}=$ct_id;
                  	     $multiple_log +=1;
                  }
             }
        }  
                
###########*******************************************************************************************             
        if (  $getMacaca_mulatta_Genome == 1 )  {
              $exist_flag=0;
              if ( $homo2macaca{$refSeq_id} ) {
             	     $exist_flag = & getFastaByID($homo2macaca{$refSeq_id}, $macaca_RNA); 
              }
              if ( $exist_flag==1 ) {	  
                	  print LOGOPT "Macaca gene $homo2macaca{$refSeq_id} is referenced by homologene cluster.\n";
              } else {
                    & blastsearch_ensembl($tmpGoldenRNA, $macaca_RNA_DB, $gene_symbol);
                    if ( $ct_id eq "null" ) {
                  	     print LOGOPT "A Macaca gene homologous to $refSeq_id can not be identified.\n";
                    } else {
                         print LOGOPT "Macaca gene $ct_id with $max_similarity % concordance is pulled out by blast search.\n";
                  	     $homo2macaca{$refSeq_id}=$ct_id;
                  	     $multiple_log +=1;
                    }
              }
        }  
                
###########*******************************************************************************************             
        if ( $getMus_musculus_Genome == 1 ) {
             $exist_flag=0;
             if ( $homo2mouse{$refSeq_id} ) {
                  $exist_flag=getFastaByID($homo2mouse{$refSeq_id}, $mouse_RNA); 
             }
             if ( $exist_flag==1 ) {
                  print LOGOPT "Mouse gene $homo2mouse{$refSeq_id} is referenced by homologene cluster.\n";
             } else {
                    & blastsearch_ensembl($tmpGoldenRNA, $mouse_RNA_DB, $gene_symbol);
                    if ( $ct_id eq "null" ) {
                  	      print LOGOPT "A Mouse gene homologous to $refSeq_id can not be identified.\n"; 
                    } else {
                          print LOGOPT "Mouse gene $ct_id with $max_similarity % concordance is pulled out by blast search.\n";
                          $homo2mouse{$refSeq_id}=$ct_id; 
                          $multiple_log +=1;   
                    }
             } 
        }    
###########*******************************************************************************************             
        if ( $getRattus_norvegicus_Genome == 1 )  {      
             $exist_flag=0;
             if ( $homo2rat{$refSeq_id} ) {
             	   $exist_flag=getFastaByID($homo2rat{$refSeq_id}, $rat_RNA); 
             }
             if ( $exist_flag==1 ) {	  
              	   print LOGOPT "Rat gene $homo2rat{$refSeq_id} is referenced by homologene cluster.\n"; 
             } else {
                   & blastsearch_ensembl($tmpGoldenRNA, $rat_RNA_DB, $gene_symbol);
                   if ( $ct_id eq "null" ) {
                  	    print LOGOPT "A Rat gene homologous to $refSeq_id can not be identified.\n";
                   } else {
                  	    print LOGOPT "Rat gene $ct_id with $max_similarity % concordance is pulled out by blast search.\n"; 
                  	    $homo2rat{$refSeq_id}=$ct_id;
                  	    $multiple_log +=1;
                   }
             }
        }

###########*******************************************************************************************             
        if ( $getOryctolagus_cuniculus_Genome ==1 )  {      
             $exist_flag=0;
             if ( $homo2rabbit{$refSeq_id} ) {
             	    $exist_flag=getFastaByID($homo2rabbit{$refSeq_id}, $rabbit_RNA); 
             }
             if ( $exist_flag==1 ) {	  
              	   print LOGOPT "Rabbit gene $homo2rabbit{$refSeq_id} is referenced by homologene cluster.\n"; 
             } else {
                   & blastsearch_ensembl($tmpGoldenRNA, $rabbit_RNA_DB, $gene_symbol);
                   if ( $ct_id eq "null" ) {
                  	    print LOGOPT "A Rabbit gene homologous to $refSeq_id can not be identified.\n";
                   } else {
                  	    print LOGOPT "Rabbit gene $ct_id with $max_similarity % concordance is pulled out by blast search.\n"; 
                  	    $homo2rabbit{$refSeq_id}=$ct_id;
                  	    $multiple_log +=1;
                   }
             }
        }

###########*******************************************************************************************     
        if ( $getCanis_familiaris_Genome ==1 )  {
             $exist_flag=0;
             if ( $homo2dog{$refSeq_id} ) {
             	    $exist_flag=getFastaByID($homo2dog{$refSeq_id}, $dog_RNA); 
             }
             if ( $exist_flag==1 ) {
              	   print LOGOPT "Dog gene $homo2dog{$refSeq_id} is referenced by homologene cluster.\n";
             } else {
                   & blastsearch_ensembl($tmpGoldenRNA, $dog_RNA_DB, $gene_symbol);
                   if ( $ct_id eq "null" ) {
                  	    print LOGOPT "A Dog gene homologous to $refSeq_id can not be identified.\n"; 
                   } else {
                  	    print LOGOPT "Dog gene $ct_id with $max_similarity % concordance is pulled out by blast search.\n"; 
                  	    $homo2dog{$refSeq_id}=$ct_id;
                  	    $multiple_log +=1;
                   }
             }
        }
                      
#**************************************************************************************************************
        if ( $getBos_taurus_Genome == 1 ) { 
             $exist_flag=0;
             if ( $homo2cow{$refSeq_id} ) {
             	   $exist_flag=getFastaByID($homo2cow{$refSeq_id}, $cow_RNA); 
             }
             if ( $exist_flag==1 ) {
             	    print LOGOPT "Cow gene $homo2cow{$refSeq_id} is referenced by homologene cluster.\n";
             } else {
                   & blastsearch_ensembl($tmpGoldenRNA, $cow_RNA_DB, $gene_symbol);
                   if ( $ct_id eq "null" ) {
                  	    print LOGOPT "A Cow gene homologous to $refSeq_id can not be identified.\n"; 
                   } else {
                  	    print LOGOPT "Cow gene $ct_id with $max_similarity % concordance is pulled out by blast search.\n"; 
                  	    $homo2cow{$refSeq_id}=$ct_id;
                  	    $multiple_log +=1;
                   }
             }
        }
###########*******************************************************************************************             
 
#**************************************************************************************************************            
             if (  $multiple_log > 2 ) {
#             	   local %my_align_DNA;
             	   undef %my_align_DNA;
                   
                   `cat $tmpHumanCDS > $tmpMultipleRNA `;
                   if ( $homo2chimp{$refSeq_id} ) {
                   	 `fastacmd -d $chimp_RNA_DB -p F -s $homo2chimp{$refSeq_id} -o $tmpChimpCDS `;
                   	 `cat $tmpChimpCDS >> $tmpMultipleRNA `;
                   }
                   if ( $homo2macaca{$refSeq_id} ) {
                   	 `fastacmd -d $macaca_RNA_DB -p F -s $homo2macaca{$refSeq_id} -o $tmpMacacaCDS `;
                   	 `cat $tmpMacacaCDS >> $tmpMultipleRNA `;
                   }  
                   if ( $homo2mouse{$refSeq_id} ) {      
             	         `fastacmd -d $mouse_RNA_DB -p F -s $homo2mouse{$refSeq_id} -o $tmpMouseCDS `;
             	         `cat $tmpMouseCDS >> $tmpMultipleRNA `;
                   } 
                   if ( $homo2rat{$refSeq_id} ) {
             	         `fastacmd -d $rat_RNA_DB -p F -s $homo2rat{$refSeq_id} -o $tmpRatCDS `;
             	         `cat $tmpRatCDS >> $tmpMultipleRNA `;
                   } 
                   if ( $homo2rabbit{$refSeq_id} ) {
             	         `fastacmd -d $rabbit_RNA_DB -p F -s $homo2rabbit{$refSeq_id} -o $tmpRabbitCDS `;
             	         `cat $tmpRabbitCDS >> $tmpMultipleRNA `;
                   } 
                   if ( $homo2dog{$refSeq_id} ) {
             	         `fastacmd -d $dog_RNA_DB -p F -s $homo2dog{$refSeq_id} -o $tmpDogCDS `;
             	         `cat $tmpDogCDS >> $tmpMultipleRNA `;
                   }      
                   if ( $homo2cow{$refSeq_id} ) {
             	         `fastacmd -d $cow_RNA_DB -p F -s $homo2cow{$refSeq_id} -o $tmpCowCDS `;
             	         `cat $tmpCowCDS >> $tmpMultipleRNA `;
                   }      
                   
                   $clustalw_parameter="-align -outorder=input -case=upper -seqnos=on -output=nexus";
                   `clustalw -infile=$tmpMultipleRNA $clustalw_parameter`; ###$tmpMultipleRNA `;
                  
#                   `echo "*****************************************************************************"  >> $tmp_PAML_align `; 
#                   `echo "Original Multiple RNA alignments of six species:" >> $tmp_PAML_align `; 
#                   `cat $tmp_RNA_align >> $tmp_PAML_align `; 

                   & assessRNAalignment($tmp_RNA_align) ;
                   
#_____________________________________________________________________________________________
#_____________________________________________________________________________________________
                     
   #re-define human CDS and Pep                
                   `echo ">human" > $tmpHumanCDS `;
                   `echo $my_align_DNA{"human"} >> $tmpHumanCDS `;
                   
                   $maximum_align_score=0;
             	   for ($rf=1; $rf<4; $rf ++) {
                       & translate2pep_by_readingFrame($tmpHumanCDS,$rf,$tmpFasPEP);
                      `bl2seq -p blastp -i $tmpGoldenPep -j $tmpFasPEP -o $tmp_pep_align -e $Stringency -F F`;
                      `echo "Two peps alignment: human at reading frame $rf to human:\n">>$preresult`;
                      `cat $tmp_pep_align>>$preresult`;
                       $alignmentScore{$rf}= & parseBlastp($tmp_pep_align);
                       if ( $alignmentScore{$rf} > $maximum_align_score ) {
                       	    $maximum_align_score=$alignmentScore{$rf};
                       }
                  }
                  for ($rf=1; $rf<4; $rf ++) {
                  	if ( $alignmentScore{$rf} == $maximum_align_score ) {
                       	     $reading_Frame{"human"}=$rf;
                       	     & translate2pep_by_readingFrame($tmpHumanCDS,$rf,$tmpHumanPep);
                       	     $frame_seq{"human"}=getRNAseqinFrame($tmpHumanCDS, $rf);
                       }
                  }
                  `cat $tmpHumanPep > $tmpMultiplePEP `;
                  `cat $tmpHumanCDS > $tmpMultipleRNA `;
                  undef %alignmentScore;
#_____________________________________________________________________________________________
#_____________________________________________________________________________________________
                   
                  if ( $homo2chimp{$refSeq_id} ) {
                  	`echo ">chimp" > $tmpChimpCDS `;
                        `echo $my_align_DNA{"chimp"} >> $tmpChimpCDS `;
             	        $maximum_align_score=0;
             	        for ($rf=1; $rf<4; $rf ++) {
                            & translate2pep_by_readingFrame($tmpChimpCDS,$rf,$tmpFasPEP);
                            `bl2seq -p blastp -i $tmpGoldenPep -j $tmpFasPEP -o $tmp_pep_align -e $Stringency -F F`;
                            `echo "Two peps alignment: chimp at reading frame $rf to human:\n">>$preresult`;
                            `cat $tmp_pep_align>>$preresult`;
                            $alignmentScore{$rf}=parseBlastp($tmp_pep_align);
                            if ( $alignmentScore{$rf} > $maximum_align_score ) {
                       	         $maximum_align_score=$alignmentScore{$rf};
                            }
                        }
                        for ($rf=1; $rf<4; $rf ++) {
                  	     if ( $alignmentScore{$rf} == $maximum_align_score ) {
                       	          $reading_Frame{"chimp"}=$rf;
                       	          & translate2pep_by_readingFrame($tmpChimpCDS,$rf,$tmpChimpPep);
                       	          $frame_seq{"chimp"}=getRNAseqinFrame($tmpChimpCDS, $rf);    
                             }
                        }
                        `cat $tmpChimpCDS >> $tmpMultipleRNA `;
                        `cat $tmpChimpPep >> $tmpMultiplePEP `;
                        undef %alignmentScore;
             	  }
             	            
#_____________________________________________________________________________________________
#_____________________________________________________________________________________________
                  if ( $homo2macaca{$refSeq_id} ) {
                  	`echo ">macaca" > $tmpMacacaCDS `;
                        `echo $my_align_DNA{"macaca"} >> $tmpMacacaCDS `;
             	        $maximum_align_score=0;
             	        for ($rf=1; $rf<4; $rf ++) {
                            & translate2pep_by_readingFrame($tmpMacacaCDS,$rf,$tmpFasPEP);
                            `bl2seq -p blastp -i $tmpGoldenPep -j $tmpFasPEP -o $tmp_pep_align -e $Stringency -F F`;
                            `echo "Two peps alignment: macaca at reading frame $rf to human:\n">>$preresult`;
                            `cat $tmp_pep_align>>$preresult`;
                            $alignmentScore{$rf}=parseBlastp($tmp_pep_align);
                            if ( $alignmentScore{$rf} > $maximum_align_score ) {
                       	         $maximum_align_score=$alignmentScore{$rf};
                            }
                        }
                        for ($rf=1; $rf<4; $rf ++) {
                  	     if ( $alignmentScore{$rf} == $maximum_align_score ) {
                       	          $reading_Frame{"macaca"}=$rf;
                       	          & translate2pep_by_readingFrame($tmpMacacaCDS,$rf,$tmpMacacaPep);
                       	          $frame_seq{"macaca"}=getRNAseqinFrame($tmpMacacaCDS, $rf);    
                             }
                        }
                        `cat $tmpMacacaCDS >> $tmpMultipleRNA `;
                        `cat $tmpMacacaPep >> $tmpMultiplePEP `;
                        undef %alignmentScore;
             	  }
             	            
#_____________________________________________________________________________________________
#_____________________________________________________________________________________________
                             
             
                  if ( $homo2mouse{$refSeq_id} ) {      
                  	`echo ">mouse" > $tmpMouseCDS `;
                        `echo $my_align_DNA{"mouse"} >> $tmpMouseCDS `;
             	        $maximum_align_score=0;
             	        for ($rf=1; $rf<4; $rf ++) {
                             & translate2pep_by_readingFrame($tmpMouseCDS,$rf,$tmpFasPEP);
                             `bl2seq -p blastp -i $tmpGoldenPep -j $tmpFasPEP -o $tmp_pep_align -e $Stringency -F F`;
                             `echo "Two peps alignment: mouse at reading frame $rf to human:\n">>$preresult`;
                             `cat $tmp_pep_align>>$preresult`;
                             $alignmentScore{$rf}=parseBlastp($tmp_pep_align);
                             if ( $alignmentScore{$rf} > $maximum_align_score ) {
                       	          $maximum_align_score=$alignmentScore{$rf};
                             }
                        }
                        for ($rf=1; $rf<4; $rf ++) {
                  	     if ( $alignmentScore{$rf} == $maximum_align_score ) {
                       	          $reading_Frame{"mouse"}=$rf;
                       	          & translate2pep_by_readingFrame($tmpMouseCDS,$rf,$tmpMousePep);
                       	          $frame_seq{"mouse"}=getRNAseqinFrame($tmpMouseCDS, $rf);
                             }
                        }
                        `cat $tmpMouseCDS >> $tmpMultipleRNA `;
                        `cat $tmpMousePep >> $tmpMultiplePEP `;
                        undef %alignmentScore;
             	  }
                 
#_____________________________________________________________________________________________
#_____________________________________________________________________________________________
               
             
                  if ( $homo2rat{$refSeq_id} ) {      
                  	`echo ">rat" > $tmpRatCDS `;
                        `echo $my_align_DNA{"rat"} >> $tmpRatCDS `;
             	        $maximum_align_score=0;
             	        for ($rf=1; $rf<4; $rf ++) {
                             & translate2pep_by_readingFrame($tmpRatCDS,$rf,$tmpFasPEP);
                             `bl2seq -p blastp -i $tmpGoldenPep -j $tmpFasPEP -o $tmp_pep_align -e $Stringency -F F `;
                             `echo "Two peps alignment: rat at reading frame $rf to human:\n">>$preresult`;
                             `cat $tmp_pep_align>>$preresult`;
                             $alignmentScore{$rf}=parseBlastp($tmp_pep_align);
                             if ( $alignmentScore{$rf} > $maximum_align_score ) {
                       	          $maximum_align_score=$alignmentScore{$rf};
                             }
                        }
                        for ($rf=1; $rf<4; $rf ++) {
                  	     if ( $alignmentScore{$rf} == $maximum_align_score ) {
                       	          $reading_Frame{"rat"}=$rf;
                       	          & translate2pep_by_readingFrame($tmpRatCDS,$rf,$tmpRatPep);
                       	          $frame_seq{"rat"}=getRNAseqinFrame($tmpRatCDS, $rf);
                             }
                        }
                        `cat $tmpRatCDS >> $tmpMultipleRNA `;
                        `cat $tmpRatPep >> $tmpMultiplePEP `;
                         undef %alignmentScore;
             	  }
#_____________________________________________________________________________________________
#_____________________________________________________________________________________________
               
             
                  if ( $homo2rabbit{$refSeq_id} ) {      
                  	`echo ">rabbit" > $tmpRabbitCDS `;
                        `echo $my_align_DNA{"rabbit"} >> $tmpRabbitCDS `;
             	        $maximum_align_score=0;
             	        for ($rf=1; $rf<4; $rf ++) {
                             & translate2pep_by_readingFrame($tmpRabbitCDS,$rf,$tmpFasPEP);
                             `bl2seq -p blastp -i $tmpGoldenPep -j $tmpFasPEP -o $tmp_pep_align -e $Stringency -F F`;
                             `echo "Two peps alignment: rabbit at reading frame $rf to human:\n">>$preresult`;
                             `cat $tmp_pep_align>>$preresult`;
                             $alignmentScore{$rf}=parseBlastp($tmp_pep_align);
                             if ( $alignmentScore{$rf} > $maximum_align_score ) {
                       	          $maximum_align_score=$alignmentScore{$rf};
                             }
                        }
                        for ($rf=1; $rf<4; $rf ++) {
                  	     if ( $alignmentScore{$rf} == $maximum_align_score ) {
                       	          $reading_Frame{"rabbit"}=$rf;
                       	          & translate2pep_by_readingFrame($tmpRabbitCDS,$rf,$tmpRabbitPep);
                       	          $frame_seq{"rabbit"}=getRNAseqinFrame($tmpRabbitCDS, $rf);
                             }
                        }
                        `cat $tmpRabbitCDS >> $tmpMultipleRNA `;
                        `cat $tmpRabbitPep >> $tmpMultiplePEP `;
                         undef %alignmentScore;
             	  }
#_____________________________________________________________________________________________
#_____________________________________________________________________________________________
                  
                  if ( $homo2dog{$refSeq_id} ) {
                  	`echo ">dog" > $tmpDogCDS `;
                        `echo $my_align_DNA{"dog"} >> $tmpDogCDS `;
             	        $maximum_align_score=0;
             	        for ($rf=1; $rf<4; $rf ++) {
                            & translate2pep_by_readingFrame($tmpDogCDS,$rf,$tmpFasPEP);
                            `bl2seq -p blastp -i $tmpGoldenPep -j $tmpFasPEP -o $tmp_pep_align -e $Stringency -F F `;
                            `echo "Two peps alignment: dog at reading frame $rf to human:\n">>$preresult`;
                            `cat $tmp_pep_align>>$preresult`;
                            $alignmentScore{$rf}=parseBlastp($tmp_pep_align);
                            if ( $alignmentScore{$rf} > $maximum_align_score ) {
                       	         $maximum_align_score=$alignmentScore{$rf};
                            }
                        }
                        for ($rf=1; $rf<4; $rf ++) {
                  	     if ( $alignmentScore{$rf} == $maximum_align_score ) {
                       	          $reading_Frame{"dog"}=$rf;
                       	          & translate2pep_by_readingFrame($tmpDogCDS,$rf,$tmpDogPep);
                       	          $frame_seq{"dog"}=getRNAseqinFrame($tmpDogCDS, $rf);    
                             }
                        }
                        `cat $tmpDogCDS >> $tmpMultipleRNA `;
                        `cat $tmpDogPep >> $tmpMultiplePEP `;
                        undef %alignmentScore;
             	  }                                   
#_____________________________________________________________________________________________
#_____________________________________________________________________________________________
 
             
                  if ( $homo2cow{$refSeq_id} ) {      
                  	`echo ">cow" > $tmpCowCDS `;
                        `echo $my_align_DNA{"cow"} >> $tmpCowCDS `;
             	        $maximum_align_score=0;
             	        for ($rf=1; $rf<4; $rf ++) {
                             & translate2pep_by_readingFrame($tmpCowCDS,$rf,$tmpFasPEP);
                             `bl2seq -p blastp -i $tmpGoldenPep -j $tmpFasPEP -o $tmp_pep_align -e $Stringency -F F `;
                             `echo "Two peps alignment: cow at reading frame $rf to human:\n">>$preresult`;
                             `cat $tmp_pep_align>>$preresult`;
                             $alignmentScore{$rf}=parseBlastp($tmp_pep_align);
                             if ( $alignmentScore{$rf} > $maximum_align_score ) {
                       	          $maximum_align_score=$alignmentScore{$rf};
                             }
                        }
                        for ($rf=1; $rf<4; $rf ++) {
                  	     if ( $alignmentScore{$rf} == $maximum_align_score ) {
                       	          $reading_Frame{"cow"}=$rf;
                       	          & translate2pep_by_readingFrame($tmpCowCDS,$rf,$tmpCowPep);
                       	          $frame_seq{"cow"}=getRNAseqinFrame($tmpCowCDS, $rf);
                             }
                        }
                        `cat $tmpCowCDS >> $tmpMultipleRNA `;
                        `cat $tmpCowPep >> $tmpMultiplePEP `;
                        undef %alignmentScore;
             	  }


          
             }  else {
             	   next;
             }   	
                  	
###########*******************************************************************************************             
#loose the requirement:
             $Stringency=0.00001;
             $BITSCORE_THRESHOLD=100;
             $ALIGN_LENGTH_THRESHOLD=0.08;
######******************************************************************************************
#armadillo
       if ( $getDasypus_novemcinctus_Genome ==1 ) {  
             & blastsearch_ensembl($tmpGoldenRNA, $armadillo_RNA_DB, $gene_symbol);
             if ( $ct_id eq "null" ) {
               	  print LOGOPT "An Armadillo gene homologous to $refSeq_id can not be identified.\n";
             } else {
              	  print LOGOPT "Armadillo gene $ct_id with $max_similarity % concordance is  pulled out by blast search.\n"; 
                   $homo2armadillo{$refSeq_id} = $ct_id;
             }
          

             if ( $homo2armadillo{$refSeq_id} ) {
             	    $maximum_align_score=0;
             	    `fastacmd -d $armadillo_RNA_DB -p F -s $homo2armadillo{$refSeq_id} -o $tmpArmadilloCDS `;

             	    for ($rf=1; $rf<4; $rf ++) {
                       & translate2pep_by_readingFrame($tmpArmadilloCDS,$rf,$tmpFasPEP);
                      `bl2seq -p blastp -i $tmpGoldenPep -j $tmpFasPEP -o $tmp_pep_align -e $Stringency -F F `;
                      `echo "Two peps alignment: armadillo at reading frame $rf to human:\n">>$preresult`;
                      `cat $tmp_pep_align>>$preresult`;
                       $alignmentScore{$rf}=parseBlastp($tmp_pep_align);
                       if ( $alignmentScore{$rf} > $maximum_align_score ) {
                       	    $maximum_align_score=$alignmentScore{$rf};
                       }
                  }
                  for ($rf=1; $rf<4; $rf ++) {
                       if (  $alignmentScore{$rf} == $maximum_align_score ) {
                       	     $reading_Frame{"armadillo"}=$rf;
                       	     & translate2pep_by_readingFrame($tmpArmadilloCDS,$rf,$tmpArmadilloPep);
                       	     $frame_seq{"armadillo"}=getRNAseqinFrame($tmpArmadilloCDS, $rf);
                       }
                  }
                  if ( -s $tmpArmadilloCDS ) {
                  	   `echo ">armadillo" >> $tmpMultiplePEP `;
             	         `tail +2 $tmpArmadilloPep >> $tmpMultiplePEP `;
                        `cat $tmpArmadilloCDS >> $tmpMultipleRNA `;
                         $multiple_log +=1;
                         undef %alignmentScore;
                  }
             }
       }
###########*******************************************************************************************             
 #=elephant   
       if (  $getLoxodonta_africana_Genome == 1 ) {
             & blastsearch_ensembl($tmpGoldenRNA, $elephant_RNA_DB, $gene_symbol);
             if ( $ct_id eq "null" ) {
               	  print LOGOPT "An Elephant gene homologous to $refSeq_id can not be identified.\n";
             } else {
              	  print LOGOPT "Elephant gene $ct_id with $max_similarity % concordance is  pulled out by blast search.\n"; 
                   $homo2elephant{$refSeq_id} = $ct_id;
             }
          

             if ( $homo2elephant{$refSeq_id} ) {
             	    $maximum_align_score=0;
             	    `fastacmd -d $elephant_RNA_DB -p F -s $homo2elephant{$refSeq_id} -o $tmpElephantCDS `;

             	    for ($rf=1; $rf<4; $rf ++) {
                       & translate2pep_by_readingFrame($tmpElephantCDS,$rf,$tmpFasPEP);
                      `bl2seq -p blastp -i $tmpGoldenPep -j $tmpFasPEP -o $tmp_pep_align -e $Stringency -F F `;
                      `echo "Two peps alignment: elephant at reading frame $rf to human:\n">>$preresult`;
                      `cat $tmp_pep_align>>$preresult`;
                       $alignmentScore{$rf}=parseBlastp($tmp_pep_align);
                       if ( $alignmentScore{$rf} > $maximum_align_score ) {
                       	    $maximum_align_score=$alignmentScore{$rf};
                       }
                  }
                  for ($rf=1; $rf<4; $rf ++) {
                       if (  $alignmentScore{$rf} == $maximum_align_score ) {
                       	     $reading_Frame{"elephant"}=$rf;
                       	     & translate2pep_by_readingFrame($tmpElephantCDS,$rf,$tmpElephantPep);
                       	     $frame_seq{"elephant"}=getRNAseqinFrame($tmpElephantCDS, $rf);
                       }
                  }
                  if ( -s $tmpElephantCDS ) {
                  	   `echo ">elephant" >> $tmpMultiplePEP `;
             	         `tail +2 $tmpElephantPep >> $tmpMultiplePEP `;
                        `cat $tmpElephantCDS >> $tmpMultipleRNA `;
                         $multiple_log +=1;
                         undef %alignmentScore;
                  }
             }
       }
###########*******************************************************************************************  
#tenrec
       if (  $getEchinops_telfairi_Genome ==1 ) {
             & blastsearch_ensembl($tmpGoldenRNA, $tenrec_RNA_DB, $gene_symbol);
             if ( $ct_id eq "null" ) {
               	  print LOGOPT "An Tenrec gene homologous to $refSeq_id can not be identified.\n";
             } else {
              	  print LOGOPT "Tenrec gene $ct_id with $max_similarity % concordance is  pulled out by blast search.\n"; 
                   $homo2tenrec{$refSeq_id} = $ct_id;
             }
          

             if ( $homo2tenrec{$refSeq_id} ) {
             	    $maximum_align_score=0;
             	    `fastacmd -d $tenrec_RNA_DB -p F -s $homo2tenrec{$refSeq_id} -o $tmpTenrecCDS `;

             	    for ($rf=1; $rf<4; $rf ++) {
                       & translate2pep_by_readingFrame($tmpTenrecCDS,$rf,$tmpFasPEP);
                      `bl2seq -p blastp -i $tmpGoldenPep -j $tmpFasPEP -o $tmp_pep_align -e $Stringency -F F `;
                      `echo "Two peps alignment: tenrec at reading frame $rf to human:\n">>$preresult`;
                      `cat $tmp_pep_align>>$preresult`;
                       $alignmentScore{$rf}=parseBlastp($tmp_pep_align);
                       if ( $alignmentScore{$rf} > $maximum_align_score ) {
                       	    $maximum_align_score=$alignmentScore{$rf};
                       }
                  }
                  for ($rf=1; $rf<4; $rf ++) {
                       if (  $alignmentScore{$rf} == $maximum_align_score ) {
                       	     $reading_Frame{"tenrec"}=$rf;
                       	     & translate2pep_by_readingFrame($tmpTenrecCDS,$rf,$tmpTenrecPep);
                       	     $frame_seq{"tenrec"}=getRNAseqinFrame($tmpTenrecCDS, $rf);
                       }
                  }
                  if ( -s $tmpTenrecCDS ) {
                  	   `echo ">tenrec" >> $tmpMultiplePEP `;
             	         `tail +2 $tmpTenrecPep >> $tmpMultiplePEP `;
                        `cat $tmpTenrecCDS >> $tmpMultipleRNA `;
                         $multiple_log +=1;
                         undef %alignmentScore;
                  }
             }
       }
###########*******************************************************************************************  
#opossum
       if ( $getMonodelphis_domestica_Genome == 1 ) {
            & blastsearch_ensembl($tmpGoldenRNA, $opossum_RNA_DB, $gene_symbol);
            if ( $ct_id eq "null" ) {
               	  print LOGOPT "An Opossum gene homologous to $refSeq_id can not be identified.\n";
             } else {
              	  print LOGOPT "Opossum gene $ct_id with $max_similarity % concordance is  pulled out by blast search.\n"; 
                  $homo2opossum{$refSeq_id} = $ct_id;
             }
         
             if ( $homo2opossum{$refSeq_id} ) {
             	    $maximum_align_score=0;
             	    `fastacmd -d $opossum_RNA_DB -p F -s $homo2opossum{$refSeq_id} -o $tmpOpossumCDS `;

             	  for ($rf=1; $rf<4; $rf ++) {
                       & translate2pep_by_readingFrame($tmpOpossumCDS,$rf,$tmpFasPEP);
                      `bl2seq -p blastp -i $tmpGoldenPep -j $tmpFasPEP -o $tmp_pep_align -e $Stringency -F F `;
                      `echo "Two peps alignment: opossum at reading frame $rf to human:\n">>$preresult`;
                      `cat $tmp_pep_align>>$preresult`;
                       $alignmentScore{$rf}=parseBlastp($tmp_pep_align);
                       if ( $alignmentScore{$rf} > $maximum_align_score ) {
                       	    $maximum_align_score=$alignmentScore{$rf};
                       }
                  }
                  for ($rf=1; $rf<4; $rf ++) {
                       if (  $alignmentScore{$rf} == $maximum_align_score ) {
                       	     $reading_Frame{"opossum"}=$rf;
                       	     & translate2pep_by_readingFrame($tmpOpossumCDS,$rf,$tmpOpossumPep);
                       	     $frame_seq{"opossum"}=getRNAseqinFrame($tmpOpossumCDS, $rf);
                       }
                  }
                  if ( -s $tmpOpossumCDS ) {
                  	   `echo ">opossum" >> $tmpMultiplePEP `;
             	         `tail +2 $tmpOpossumPep >> $tmpMultiplePEP `;
                        `cat $tmpOpossumCDS >> $tmpMultipleRNA `;
                         $multiple_log +=1;
                         undef %alignmentScore;
                  }
             }
       }
###########*******************************************************************************************             
#chicken
       if ( $getGallus_gallus_Genome == 1 ) { 
            & blastsearch_ensembl($tmpGoldenRNA, $chicken_RNA_DB, $gene_symbol);
            if ( $ct_id eq "null" ) {
                	print LOGOPT "A Chicken gene homologous to $refSeq_id can not be identified.\n"; 
            } else {
                	print LOGOPT "Chicken gene $ct_id with $max_similarity % concordance is pulled out by blast search.\n"; 
                  $homo2chicken{$refSeq_id}=$ct_id;
            }
           
            if ( $homo2chicken{$refSeq_id} ) {
             	 ` fastacmd -d $chicken_RNA_DB -p F -s $homo2chicken{$refSeq_id} -o $tmpChickenCDS `;
                  $maximum_align_score=0;
             	   for ($rf=1; $rf<4; $rf ++) {
                      & translate2pep_by_readingFrame($tmpChickenCDS,$rf,$tmpFasPEP);
                      ` bl2seq -p blastp -i $tmpGoldenPep -j $tmpFasPEP -o $tmp_pep_align -e $Stringency -F F `;
                      `echo "Two peps alignment: chicken at reading frame $rf to human:\n">>$preresult`;
                      `cat $tmp_pep_align>>$preresult`;
                       $alignmentScore{$rf}=parseBlastp($tmp_pep_align);
                       if ( $alignmentScore{$rf} > $maximum_align_score ) {
                       	    $maximum_align_score=$alignmentScore{$rf};
                       }
                  }
                  for ($rf=1; $rf<4; $rf ++) {
                       if (  $alignmentScore{$rf} == $maximum_align_score ) {
                       	     $reading_Frame{"chicken"}=$rf;
                       	     & translate2pep_by_readingFrame($tmpChickenCDS,$rf,$tmpChickenPep);
                       	     $frame_seq{"chicken"}=getRNAseqinFrame($tmpChickenCDS, $rf);
                       }
                  }  
             }
             if ( -e $tmpChickenCDS ) {
             	  `echo ">chicken" >> $tmpMultiplePEP `;
             	  `tail +2 $tmpChickenPep >> $tmpMultiplePEP `;
                  `cat $tmpChickenCDS >> $tmpMultipleRNA `;
                  $multiple_log +=1;
                  undef %alignmentScore;
             }
       }
###########*******************************************************************************************             
#=XENOPUS
       if ( $getXenopus_tropicalis_Genome ==1 ) {
            & blastsearch_ensembl($tmpGoldenRNA, $xenopus_RNA_DB, $gene_symbol);
            if ( $ct_id eq "null" ) {
                 	print LOGOPT "A Xenopus gene homologous to $refSeq_id can not be identified.\n";
            } else {
                	print LOGOPT "Xenopus gene $ct_id with $max_similarity % concordance is pulled out by blast search.\n"; 
                  $homo2xenopus{$refSeq_id}=$ct_id;
            }
            if ( $homo2xenopus{$refSeq_id} ) {
             	 ` fastacmd -d $xenopus_RNA_DB -p F -s $homo2xenopus{$refSeq_id} -o $tmpXenopusCDS `;
                  $maximum_align_score=0;
             	   for ($rf=1; $rf<4; $rf ++) {
                       & translate2pep_by_readingFrame($tmpXenopusCDS,$rf,$tmpFasPEP);
                      ` bl2seq -p blastp -i $tmpGoldenPep -j $tmpFasPEP -o $tmp_pep_align -e $Stringency -F F `;
                      `echo "Two peps alignment: xenopus at reading frame $rf to human:\n">>$preresult`;
                      `cat $tmp_pep_align>>$preresult`;
                       $alignmentScore{$rf}=parseBlastp($tmp_pep_align);
                       if ( $alignmentScore{$rf} > $maximum_align_score ) {
                       	    $maximum_align_score=$alignmentScore{$rf};
                       }
                  }
                  for ($rf=1; $rf<4; $rf ++) {
                       if (  $alignmentScore{$rf} == $maximum_align_score ) {
                       	     $reading_Frame{"xenopus"}=$rf;
                       	     & translate2pep_by_readingFrame($tmpXenopusCDS,$rf,$tmpXenopusPep);
                       	     $frame_seq{"xenopus"}=getRNAseqinFrame($tmpXenopusCDS, $rf);
                       }
                  }  
             }
             if ( -e $tmpXenopusCDS ) {
             	  `echo ">xenopus" >> $tmpMultiplePEP `;
             	  `tail +2 $tmpXenopusPep >> $tmpMultiplePEP `;
#                  `cat $tmpXenopusPep >> $tmpMultiplePEP `;
                  `cat $tmpXenopusCDS >> $tmpMultipleRNA `;
                  $multiple_log +=1;
                  undef %alignmentScore;
             }
        }    
###########*******************************************************************************************                     
#=PLATYPUS
       if ( $getOrnithorhynchus_anatinus_Genome ==1 ) {
            & blastsearch_ensembl($tmpGoldenRNA, $platypus_RNA_DB, $gene_symbol);
            if ( $ct_id eq "null" ) {
                 	print LOGOPT "A Platypus gene homologous to $refSeq_id can not be identified.\n";
            } else {
                	print LOGOPT "Platypus gene $ct_id with $max_similarity % concordance is pulled out by blast search.\n"; 
                  $homo2platypus{$refSeq_id}=$ct_id;
            }
            if ( $homo2platypus{$refSeq_id} ) {
             	 ` fastacmd -d $platypus_RNA_DB -p F -s $homo2platypus{$refSeq_id} -o $tmpPlatypusCDS `;
                  $maximum_align_score=0;
             	   for ($rf=1; $rf<4; $rf ++) {
                       & translate2pep_by_readingFrame($tmpPlatypusCDS,$rf,$tmpFasPEP);
                      ` bl2seq -p blastp -i $tmpGoldenPep -j $tmpFasPEP -o $tmp_pep_align -e $Stringency -F F `;
                      `echo "Two peps alignment: platypus at reading frame $rf to human:\n">>$preresult`;
                      `cat $tmp_pep_align>>$preresult`;
                       $alignmentScore{$rf}=parseBlastp($tmp_pep_align);
                       if ( $alignmentScore{$rf} > $maximum_align_score ) {
                       	    $maximum_align_score=$alignmentScore{$rf};
                       }
                  }
                  for ($rf=1; $rf<4; $rf ++) {
                       if (  $alignmentScore{$rf} == $maximum_align_score ) {
                       	     $reading_Frame{"platypus"}=$rf;
                       	     & translate2pep_by_readingFrame($tmpPlatypusCDS,$rf,$tmpPlatypusPep);
                       	     $frame_seq{"platypus"} = getRNAseqinFrame($tmpPlatypusCDS, $rf);
                       }
                  }  
             }
             if ( -e $tmpPlatypusCDS ) {
             	  `echo ">platypus" >> $tmpMultiplePEP `;
             	  `tail +2 $tmpPlatypusPep >> $tmpMultiplePEP `;
#                  `cat $tmpXenopusPep >> $tmpMultiplePEP `;
                  `cat $tmpPlatypusCDS >> $tmpMultipleRNA `;
                  $multiple_log +=1;
                  undef %alignmentScore;
             }
        }    
###########*******************************************************************************************  
###########*******************************************************************************************   
        if ( $multiple_log > 2 ) {
#              `echo "Translated peptides sequences:\n" >> $tmp_PAML_align `;
#              `cat $tmpMultiplePEP >> $tmp_PAML_align `; 
              $clustalw_parameter="-align -type=protein -outorder=input -case=upper -seqnos=on -output=nexus -gapopen=1";
             `clustalw -infile=$tmpMultiplePEP $clustalw_parameter`; ###$tmpMultipleRNA `;
#need to translate the pep alignment into DNA alignment 
#              `echo "\n\n*******************************************************\n" >> $tmp_PAML_align `;  
#              `echo "Multiple PEP alignment from the Clustalw program:" >> $tmp_PAML_align `;
#               `cat $tmp_PEP_align >> $tmp_PAML_align `; 
               ($my_start, $my_end)= translatePep2DNAalign($tmp_PEP_align,$tmp_RNA_align);   #also need %frame_seq; 
               
#               `echo "\n\nMultiple RNA alignment translated from the Clustalw program:\n\n" >> $tmp_PAML_align `;
#               `cat $tmp_RNA_align >> $tmp_PAML_align `; 
               & extract_core_alignment($tmp_RNA_align, $my_start, $my_end); 
          }
     }
}



##################################################################################
#section 2: functions
##################################################################################
sub extract_core_alignment  {
    my($alignment_file,$alignment_start,$alignment_end)=@_;
    $seq_name="";
    %seq_array=();
    my $align_len=0;
 
    $alignment_output=$alignment_file;
    $alignment_output=~ s/nxs$/phylip/;
    undef @SPECIES;

#    `echo "\n\n---------------------------------------------------------------" >> $result_output `;
    
    
    open(IN, $alignment_file)||die "Can't open the input alignment file $!\n";
    while(<IN>) {
          	chop;
          	if (/^\#NEXUS|^CLUSTAL/ ) {
          		    $flag=0;
            }
            if (/^human\s+\S+/ ) {
                  	$flag=1;
            }
            if ( $flag==1 && /^(\S+)(\s+)(\S+)/ ){
                  	$fragmnent=$_;
                  	$seq_name=$1;
                  	if ( !$seq{$seq_name} ) {
                  	    push @SPECIES, $seq_name;
                  	}
                  	$space=$2;
                  	$fragmnent=~ s/^$seq_name//;
                  	$fragmnent=~ s/^$space//;
                  	$seq{$seq_name} .=$fragmnent;
            }
    }
    close(IN);
    
    $specie_cnt=0;
    foreach $k(keys %seq ) {
      	  @{$seq_array[$specie_cnt]} = split('',$seq{$k});
      	  $align_len=@{$seq_array[$specie_cnt]};
      	  $specie_name{$specie_cnt}=$k;
          $specie_cnt +=1;
    }
    
    undef %seq;
    undef %align_seq;
    
    #identify core alignment
    $core_start=3*$alignment_start;
    $core_end=3*$alignment_end;
    my $is_onlyTaxonWithInsertion; 
    my $tmpCodon="";
    for ($i=$core_start; $i<$core_end+1; $i +=3 ) {
    	   $is_onlyTaxonWithInsertion=$specie_cnt;
         for ($j=0; $j<$specie_cnt; $j ++) {    	
         	    $tmpCodon = ${$seq_array[$j]}[$i].${$seq_array[$j]}[$i+1].${$seq_array[$j]}[$i+2]; 
         	    if ( $tmpCodon eq '---' || $tmpCodon eq '???' ) {
         	    	    $is_onlyTaxonWithInsertion -=1;
         	    }
         } 
         if ( $is_onlyTaxonWithInsertion > 1 ) {
               for ($j=0; $j<$specie_cnt; $j ++) {    	
         	          $align_seq{$specie_name{$j}} .= ${$seq_array[$j]}[$i].${$seq_array[$j]}[$i+1];
         	          $align_seq{$specie_name{$j}} .= ${$seq_array[$j]}[$i+2];  
               } 
         }
    }

    for ($j=0; $j<$specie_cnt; $j ++) {
          undef @{$seq_array[$j]};
    }
    
    
    #********************************************************************
    undef %specie_name;
    $specie_cnt=0;
    foreach $k(keys %align_seq ) {
    	@{$seq_array[$specie_cnt]} = split('',$align_seq{$k});
    	$align_len=@{$seq_array[$specie_cnt]};
            $specie_name{$specie_cnt}=$k;
            $specie_cnt +=1;	
    }
    undef %align_seq;

    for ($i=0; $i<$align_len; $i +=3) {
         for ($j=0; $j<$specie_cnt; $j ++) {
         	   $my_stopCodon_cnt=0;
       
         	   my $my_codon=${$seq_array[$j]}[$i].${$seq_array[$j]}[$i+1].${$seq_array[$j]}[$i+2];
             $my_codon=~ tr/atguc/ATGUC/;

              if ( $my_codon eq 'TAG'  ) {
    	             $my_stopCodon_cnt +=1;
           	  }elsif ( $my_codon eq 'TAA'  ) {
           	       $my_stopCodon_cnt +=1;
           	  }elsif ( $my_codon eq 'TGA'  ) {
           	       $my_stopCodon_cnt +=1;
           #	  }elsif ( $my_codon eq 'ATG'  ) {
           #	       $my_stopCodon_cnt +=1;
           	  }
           	  
           	  if ( $my_stopCodon_cnt >0 ) {
           	       $my_codon="???";
           	  }	       
              $align_seq{$specie_name{$j}} .= $my_codon ;  
         }
    }
    
    for ($j=0; $j<$specie_cnt; $j ++) {
         undef @{$seq_array[$j]};
    }


#******************************************************************

	
    foreach $k(keys %align_seq ) {
    	   $align_len=length($align_seq{$k});
    }
    
    open(OPTB, ">$CDSinNEXUS")||die "Can't open the output file $!\n";
    print OPTB "#NEXUS\n";
    print OPTB "BEGIN DATA;\n";
    print OPTB "dimensions ntax=$specie_cnt nchar=$align_len;\n";
    print OPTB "format missing=?\n";
    print OPTB "symbols=\"ABCDEFGHIKLMNPQRSTUVWXYZ\"\n";
    print OPTB "interleave datatype=DNA gap= -;\n\n";
    print OPTB "matrix\n";
    
    $ALIGNED_NUCS=$align_len;

    $my_position=0;
    my $my_k;
    
    while ($my_position < $align_len ) {
    for ($my_k=0; $my_k < $specie_cnt; $my_k  ++ ) {
          print OPTB "$SPECIES[$my_k]\t";
          $my_this_line= substr($align_seq{$SPECIES[$my_k]}, $my_position, 60);
    #      $my_this_line=~ s/\s//g;
          print OPTB "$my_this_line\n";
    } 
    print OPTB "\n";
    $my_position +=60;
    }
    print OPTB ";\n";
    print OPTB "end;\n\n";
    close(OPTB);
#    print OPTB "\n\n\n################################################################################\n\n";   
    
    foreach $k(keys %align_seq ) {
          $align_seq{$k} =~ s/(\S{3})/$1 /g;
          $align_seq{$k} =~ s/((\S\S\S\s){20})/$1\n/g;
    } 



    open(OPT_PAML, ">$CDSinPAML")||die "Can't open the output file $!\n";
    print OPT_PAML "     $specie_cnt     $align_len\n\n";    
    for ($my_k=0; $my_k < $specie_cnt; $my_k  ++ ) {
          print OPT_PAML "$SPECIES[$my_k]\n";
          print OPT_PAML "$align_seq{$SPECIES[$my_k]}\n";
    } 
    
    
    close(OPT_PAML);

#get human aligned seq to figure out starting and ending points in the human CDS:
    $human_aligned_seq="$TMPFOLDER/human_aligned_seq.".$refSeq_id.".fas";
    open(ALIGNED_OPT, ">$human_aligned_seq")||die "Can't open the output file $!\n";
    print ALIGNED_OPT ">human aligned seq\n";
    $my_human_aligned_seq = $align_seq{"human"};
    $my_human_aligned_seq =~ s/\s+//g;
    $my_human_aligned_seq =~ s/-//g;
#    $numberBPincoreNoGap = length $my_human_aligned_seq;
    print ALIGNED_OPT "$my_human_aligned_seq\n";
    close(ALIGNED_OPT);
#    ($align_CDS_start, $align_CDS_stop) = & compare2seq4positions($human_aligned_seq,$tmpHumanCDS);

}

#*********************************************************************

sub compareSeq4indents  {
    	my($my_seq1,$my_seq2)=@_;
    	my(@my_ary1, @my_ary2);
    	my $my_ary_len;
    	my ($my_indent_cnt1, $my_indent_cnt2, $i);
    	@my_ary1 =split('', $my_seq1);
    	@my_ary2 =split('', $my_seq2);   
    	$my_ary_len=@my_ary1;
    	$my_indent_cnt1=$my_indent_cnt2=0;
    	
    	for ($i=0; $i<$my_ary_len; $i ++) {
        		if ($my_ary1[$i] eq '-'  && $my_ary2[$i] ne '-' ) {
        	 		   $my_indent_cnt1 +=1;
        		}elsif ($my_ary2[$i] eq '-'  && $my_ary1[$i] ne '-' ) {
        			   $my_indent_cnt2 +=1;
        		}
    	}
    	return($my_indent_cnt1, $my_indent_cnt2);
}



##########################################################################################
#sub of section 1
###########################################################################################
sub translatePep2DNAalign  {
      my($my_ipt,$my_opt)=@_; ## $tmp_PEP_align, $tmp_RNA_align
    	my (%position, $i, $k, $fragmnent, $seq_name, $space, $my_fragment_len, @my_seqary, $my_codon);
    	my $aligned_species=0;
    	$space=$seq_name=$my_fragment_len=$my_codon=$fragmnent=$i=$k="";
    	@my_seqary=();
    	%position=();
    	foreach $k(keys %frame_seq ) {
    		   $position{$k}=0;
    		   $aligned_species +=1;
    	}
    	open(MYIPT, $my_ipt)||die "Can't open the input file my_ipt=$my_ipt: $!\n";
    	$/="\n";
    		
    	open(MYOPT, ">$tmp_RNA_align")||die "Can't open the input file tmp_RNA_align=$tmp_RNA_align: $!\n";
    	
    	while( <MYIPT> ) {
	      	if (  /^(human|chimp|macaca|mouse|dog|rat|rabbit|opossum|chicken|cow|tenrec|elephant|armadillo|xenopus|platypus|lcl\|\w+\d+)(\s+)(\S+)\s*\d*$/ ) {
			         $fragmnent=$3;
        	     $seq_name=$1;
        	     if ( $seq_name =~ /lcl\|ENS\w+\d+/ ) {
        	        	$seq_name =~ s/lcl\|//; 
        	        	if ( $seq_name eq $homo2human{$refSeq_id} ) {
        	        	     $seq_name="human";
        	        	}elsif ( $seq_name eq $homo2rat{$refSeq_id} ) {
        	        	     $seq_name="rat";
        	        	}elsif ( $seq_name eq $homo2rabbit{$refSeq_id} ) {
        	        	     $seq_name="rabbit";
        	        	}elsif ( $seq_name eq $homo2mouse{$refSeq_id} ) {
        	        	     $seq_name="mouse";
        	        	}elsif ( $seq_name eq $homo2chimp{$refSeq_id} ) {
        	        	     $seq_name="chimp";
        	        	}elsif ( $seq_name eq $homo2macaca{$refSeq_id} ) {
        	        	     $seq_name="macaca";
        	        	}elsif ( $seq_name eq $homo2chicken{$refSeq_id} ) {
        	        	     $seq_name="chicken";
        	        	}elsif ( $seq_name eq $homo2dog{$refSeq_id} ) {
        	        	     $seq_name="dog";
        	        	}elsif ( $seq_name eq $homo2cow{$refSeq_id} ) {
        	        	     $seq_name="cow";
        	        	}elsif ( $seq_name eq $homo2xenopus{$refSeq_id} ) {
        	        	     $seq_name="xenopus";
        	        	}elsif ( $seq_name eq $homo2opossum{$refSeq_id} ) {
        	        	     $seq_name="opossum";
        	        	}elsif ( $seq_name eq $homo2tenrec{$refSeq_id} ) {
        	        	     $seq_name="tenrec";
        	        	}elsif ( $seq_name eq $homo2elephant{$refSeq_id} ) {
        	        	     $seq_name="elephant";
        	        	}elsif ( $seq_name eq $homo2armadillo{$refSeq_id} ) {
        	        	     $seq_name="armadillo";
        	        	}elsif ( $seq_name eq $homo2platypus{$refSeq_id} ) {
        	        	     $seq_name="platypus";
        	        	}
        	     }
        	      
     	        $space=$2;
     	        print MYOPT "$seq_name\t";
     	        @my_seqary=split('', $fragmnent);
    	        
		 # code modified by Munirul Islam
		 # the following if statement check whether seq_name key exists in seq_array
		 # if not, it pushes that key in the hash array.
		 if (exists $seq_array{$seq_name}) {
			@{$seq_array{$seq_name}} = (@{$seq_array{$seq_name}},@my_seqary);
		 }
		 else {
			push(@{$seq_array{$seq_name}},@my_seqary);
		 }

     	        $my_fragment_len=@my_seqary;
     	        for ($i=0; $i<$my_fragment_len; $i ++) {
        	        	if ($my_seqary[$i] eq "-" ) {
        	        		   print MYOPT "---";
        	        	}else {
        	        		   $my_codon=substr($frame_seq{$seq_name}, $position{$seq_name},4 );
                         $my_codon=~ s/\s$//;
        	        		   print MYOPT $my_codon;
        	        	 	   $position{$seq_name} +=4;
        	        	}
        	    }
        	    print MYOPT "\n";
        	 }elsif ( /^dimensions ntax=(\d+) nchar=(\d+)\;/ ) {
        	 	    $my_thisAlign_codon_len=3*$2;
        	 	    print MYOPT "dimensions ntax=$1 nchar=$my_thisAlign_codon_len\;\n";
        	 }elsif ( /^interleave datatype=PROTEIN/ ) {
        	 	     $_=~ s/PROTEIN/DNA/;
        	 	     print MYOPT "$_\n";	
        	 }else {
        	 	     print MYOPT "$_\n";
        	 }
      }	
      close(MYIPT);
    	close(MYOPT);
    	
    
    	my $align_pep_len=@{$seq_array{"human"}};
    	
    	my($my_start, $my_end);
    	$my_start=$my_end=0;
    	for ($m_i=0; $m_i<$align_pep_len; $m_i ++ ) {	
            my $isIdentical=0;
            foreach  $my_specie (keys %seq_array  ) {
                if ( ${$seq_array{"human"}}[$m_i] ne "-" && ${$seq_array{$my_specie}}[$m_i] eq ${$seq_array{"human"}}[$m_i]  ) {
      	              $isIdentical +=1;
      	        }
      	        if ( ${$seq_array{"human"}}[$m_i+1] ne "-" && ${$seq_array{$my_specie}}[$m_i+1] eq ${$seq_array{"human"}}[$m_i+1]  ) {
      	              $isIdentical +=1;
      	        }
      	        if ( ${$seq_array{"human"}}[$m_i+2] ne "-" && ${$seq_array{$my_specie}}[$m_i+2] eq ${$seq_array{"human"}}[$m_i+2]  ) {
      	              $isIdentical +=1;
      	        }
      	        if ( ${$seq_array{$my_specie}}[$m_i] eq "-" || ${$seq_array{$my_specie}}[$m_i] eq "X") {
      	              $isIdentical -=1;
      	        }
      	        if ( ${$seq_array{$my_specie}}[$m_i+1] eq "-" || ${$seq_array{$my_specie}}[$m_i+1] eq "X") {
      	              $isIdentical -=1;
      	        }
      	        if ( ${$seq_array{$my_specie}}[$m_i+2] eq "-" || ${$seq_array{$my_specie}}[$m_i+2] eq "X") {
      	              $isIdentical -=1;
      	        }
      	    }
            if ( $isIdentical > $aligned_species * $IDENTICAL_THRESHOLD ) {
        	       $my_start=$m_i;
        	       $m_i=$align_pep_len; 
            }
      }
      	
      for ($m_i=$align_pep_len-1; $m_i >$my_start;  $m_i -- ) {	
            $isIdentical=0 ;
            foreach  $my_specie (keys %seq_array  ) {
                  if ( ${$seq_array{"human"}}[$m_i] ne "-" && ${$seq_array{$my_specie}}[$m_i] eq ${$seq_array{"human"}}[$m_i]  ) {
      	               $isIdentical += 1;
        	        }
        	        if ( ${$seq_array{"human"}}[$m_i-1] ne "-" && ${$seq_array{$my_specie}}[$m_i-1] eq ${$seq_array{"human"}}[$m_i-1]  ) {
        	              $isIdentical += 1;
        	        }
        	        #The third aa does not improve the alignment , optional
        	        if ( ${$seq_array{"human"}}[$m_i-2] ne "-" && ${$seq_array{$my_specie}}[$m_i-2] eq ${$seq_array{"human"}}[$m_i-2]  ) {
        	              $isIdentical += 1;
        	        }
                        if ( ${$seq_array{$my_specie}}[$m_i]   eq "-" || ${$seq_array{$my_specie}}[$m_i]   eq "X") {
        	              $isIdentical -=1;
        	        }
        	        if ( ${$seq_array{$my_specie}}[$m_i-1] eq "-" || ${$seq_array{$my_specie}}[$m_i-1] eq "X") {
        	              $isIdentical -=1;
        	        }
        	        if ( ${$seq_array{$my_specie}}[$m_i-2] eq "-" || ${$seq_array{$my_specie}}[$m_i-2] eq "X") {
        	              $isIdentical -=1;
        	        }
      	        
      	    }
            if ( $isIdentical > $aligned_species * $IDENTICAL_THRESHOLD ) {
        	       $my_end=$m_i;
        	       $m_i=0; 
            }
	    }
     	foreach  $my_specie (keys %seq_array  ) {
     		    undef @{$seq_array{$my_specie}};
     		    delete $seq_array{$my_specie};
     	}
     	return ($my_start, $my_end);	
}


sub parseHomoloGene   {
	open(IN, "/sky/genomeDNA/hmlg.trip.ftp")||die "Can't open the input file $!\n";
	while(<IN>) {
		chop;
		if (/^\d+\|\d+/) {
			@a=split(/\|/, $_);
			if ( $a[0]==9606 ) {
				$a[5] =~ s/\.\d$//;
				$a[8] =~ s/\.\d$//;
				if ( $a[1]==10090 ) {
					$homo2mouse{$a[5]}=$a[8];
				} elsif ( $a[1]==10116 ) {
					$homo2rat{$a[5]}=$a[8];
				} elsif ( $a[1]==9615 ) {
					$homo2dog{$a[5]}=$a[8];
				} elsif ( $a[1]==9031 ) {
					$homo2chicken{$a[5]}=$a[8];
				} elsif ( $a[1]==9598 ) {
					$homo2chimp{$a[5]}=$a[8];
				} 
		         }elsif ( $a[1]==9606 ) {
				$a[8] =~ s/\.\d$//;
				$a[5] =~ s/\.\d$//;
				if ( $a[0]==10090 ) {
					$homo2mouse{$a[8]}=$a[5];
				} elsif ( $a[0]==10116 ) {
					$homo2rat{$a[8]}=$a[5];
				} elsif ( $a[0]==9615 ) {
					$homo2dog{$a[8]}=$a[5];
				} elsif ( $a[0]==9031 ) {
					$homo2chicken{$a[8]}=$a[5];
				} elsif ( $a[0]==9598 ) {
					$homo2chimp{$a[8]}=$a[5];
				} 
		         }
		  }
        }
        close(IN);
         
        open(IN, "/sky/genomeDNA/hmlg.ftp")||die "Can't open the input file $!\n";
	while(<IN>) {
		chop;
		if (/^\d+\|\d+/) {
			@a=split(/\|/, $_);
			if ( $a[0]==9606 ) {
				$a[5] =~ s/\.\d$//;
				$a[8] =~ s/\.\d$//;
				if ( $a[1]==10090 ) {
					$homo2mouse{$a[5]}=$a[8];
				} elsif ( $a[1]==10116 ) {
					$homo2rat{$a[5]}=$a[8];
				} elsif ( $a[1]==9615 ) {
					$homo2dog{$a[5]}=$a[8];
				} elsif ( $a[1]==9031 ) {
					$homo2chicken{$a[5]}=$a[8];
				} elsif ( $a[1]==9598 ) {
					$homo2chimp{$a[5]}=$a[8];
				} 
		         }elsif ( $a[1]==9606 ) {
				$a[8] =~ s/\.\d$//;
				$a[5] =~ s/\.\d$//;
				if ( $a[0]==10090 ) {
					$homo2mouse{$a[8]}=$a[5];
				} elsif ( $a[0]==10116 ) {
					$homo2rat{$a[8]}=$a[5];
				} elsif ( $a[0]==9615 ) {
					$homo2dog{$a[8]}=$a[5];
				} elsif ( $a[0]==9031 ) {
					$homo2chicken{$a[8]}=$a[5];
				} elsif ( $a[0]==9598 ) {
					$homo2chimp{$a[8]}=$a[5];
				} 
		         }
		  }
          }
          close(IN);
}
	
	
	
sub getCDS {
	my($my_rnafile, $cds_start, $cds_end, $my_cds_opt)=@_;
	my ($my_header, $my_seq, $my_substr, $my_len);
	$my_seq="";
	if ( $cds_start eq "" && $cds_end eq "" ) {
		$cds_start=1;
		$cds_end=100000;
	} 
	$cds_start -=1;
	$my_len=$cds_end-$cds_start;
	
	open(FASIN, $my_rnafile)||die "Can't open the input file my_rnafile=$my_rnafile : $!\n";
	$/ ="\n";
        while( <FASIN> ) {
        	chop;
           if ( />/ ) {
           	$my_header=$_;
           } else {
           	$my_seq .=$_;
           }
         }
         close(FASIN);

         $my_substr=substr($my_seq, $cds_start, $my_len);
         $my_substr =~ s/(\w{60})/$1\n/g;
         open(FASOUT, ">$my_cds_opt")||die "Can't open the output file $!\n";
         print FASOUT "$my_header CDS $cds_start to $cds_end\n";
         print FASOUT $my_substr;
         print FASOUT "\n";
         close(FASOUT);
}
  

sub blastsearch_ensembl {
    my($inputSeq, $inputDB, $my_gene_symbol)=@_;
    my ($tmp_rna_align, $nohit);
    $ct_id="null";
 
    `echo "Query sequence: $inputSeq\n">>$preresult`;

    $tmp_rna_align="$TMPFOLDER/alignment.".$$;
    $BLAST_PARAMETER="-e $Stringency -a 3 -S 1 -X 100 -y 100 -Z 100";
#    $BLAST_PARAMETER="-e $Stringency -m 1 -F F -a 3 -S 1 -X 100 -y 100 -Z 100";
     ` blastall -p blastn -d $inputDB -i $inputSeq -o $tmp_rna_align $BLAST_PARAMETER`;

     $nohit=`grep "No hits found" $tmp_rna_align|wc -l`;
     if ($nohit==1)  { 
#     	    print STDERR "$inputSeq does not have an orthologe gene in $inputDB.\n";
      } else {
             ($ct_id, $max_similarity)= & parseBls4Ortholog($tmp_rna_align,$my_gene_symbol);
             `cat $tmp_rna_align>>$preresult`;
             `/bin/rm -f $tmp_rna_align`;
             `echo "\n\n">>$preresult`;
             if ( $inputDB =~ /chicken|opossum|xenopus|platypus/i ) { 
             	  $max_similarity +=10; 
             }elsif ( $inputDB =~ /chimp/i ) { 
             	  $max_similarity -=10; 
             }
             if ( $max_similarity > $SIMILARITY_THRESHOLD ) {
             	    if ( $max_similarity > 100 ) {
             	    	   $max_similarity -=15;
             	    }
#                  getFastaByID($ct_id,$searchFas);
             }else {
             	  $ct_id="null";
             }
             if ( $inputDB =~ /chicken|opossum|xenopus|platypus/i ) { 
             	  $max_similarity -=10; 
             }elsif ( $inputDB =~ /chimp/i ) { 
             	  $max_similarity +=10; 
             }
      }
}     

sub getFastaByID {
    my($m_id, $m_searchFas)=@_;
    my ($this_id, $my_organism);
    
    if ($m_searchFas =~ /human/ ) {
    	$my_organism ="human";
    }elsif ($m_searchFas =~ /mouse/ ) {
    	$my_organism ="mouse";
    }elsif ($m_searchFas =~ /chimp/ ) {
    	$my_organism ="chimp";
    }elsif ($m_searchFas =~ /macaca/ ) {
    	$my_organism ="macaca";
    }elsif ($m_searchFas =~ /dog/ ) {
    	$my_organism ="dog";
    }elsif ($m_searchFas =~ /rat/ ) {
    	$my_organism ="rat";
    }elsif ($m_searchFas =~ /cow/ ) {
    	$my_organism ="cow";
    }elsif ($m_searchFas =~ /chicken/ ) {
    	$my_organism ="chicken";
    }elsif ($m_searchFas =~ /opossum/ ) {
    	$my_organism ="opossum";
    }elsif ($m_searchFas =~ /xenopus/ ) {
    	$my_organism ="xenopus";
    }elsif ($m_searchFas =~ /rabbit/ ) {
    	$my_organism ="rabbit";
    }elsif ($m_searchFas =~ /tenrec/ ) {
    	$my_organism ="tenrec";
    }elsif ($m_searchFas =~ /armadillo/ ) {
    	$my_organism ="armadillo";
    }elsif ($m_searchFas =~ /elephant/ ) {
    	$my_organism ="elephant";
    }elsif ($m_searchFas =~ /platypus/ ) {
    	$my_organism ="plstypus";
    }
    
    open(FASIN, $m_searchFas)||die "Can't open the input file m_searchFas=$m_searchFas: $!\n";
    my $m_flag=0;
    $/ ="\n";
    while( <FASIN> ) {
       if ( /^>/ ) {
       	    if ($m_flag==1 ) {
	        close(MOPT);
                last;
            }
	    if ( $_ =~ /^>gi\|\d+\|\w+\|(\S+)\.\d\|/  ) { 
	         $this_id=$1;
	    }elsif ($_ =~ /^>sf(\d+_CDS_\d+)\|\d+/ ) { 
	         $this_id=$1;
	    }elsif ($_ =~ /^>(\w+\d+)\s+\d+/ ) { 
	         $this_id=$1;
	    }elsif ($_ =~ /^>(ref|db|emb|dbj|gb)\|(\S+)\.\d+/ ) { 
	         $this_id=$2;
	    }elsif ( $_ =~ /^>(\d+_CDSbyGscan)\s+/ )  {
           $this_id=$1;
      }
	    
	    if ($this_id eq $m_id ) {
	    	$m_flag=1;
	    	$_ =~ s/^>(\S+.+)$/>$my_organism                               $1/;
	    	open (MOPT, ">$tmpFasRNA") ||die "Can't open the output file $!\n";
	    }
       }
       if ( $m_flag==1 ) {
            print MOPT $_;
       }
       
    }
    close(MOPT);
    close(FASIN);
    return $m_flag;
}



sub parseBlastp  {
    my($m_file)=@_;
    `echo "Blastp result:\n" >> $preresult`;
    `cat $m_file >> $preresult`;
         my $align_score=0;
         open(FILE, $m_file) ||die "Can't open the input file m_file=$m_file: !\n";
         my $flag=0;
         $/ ="\n";
         while ( <FILE> )  {
              if (/^\n$/)  {next; }
              if (/^ Score =\s+(\d+\.*\d*)\s+bits\s+\(\d+\)/ ) {
                    $align_score=$1;
                    last;
              }
        }close(FILE);  
        return $align_score;
}

sub getRNAseqinFrame   {
	my($m_file, $m_frame)=@_;
	my( @my_seq_array, $my_ary_len, $my_framed_seq,);
	$my_seq=""; 
	$my_framed_seq="";
	undef @my_seq_array;
	open(FILE, $m_file) ||die "Can't open the input file m_file=$m_file: $!\n";
	$/ ="\n";
	while ( <FILE> )  {
		chop $_;
		if (/^>/ ) {
		    next;
		}else {
		    $my_seq .=$_;
		}
	}
	close(FILE);  
	@my_seq_array=split('', $my_seq);
	$my_ary_len=@my_seq_array;
	for ($i=$m_frame-1; $i<$my_ary_len-2; $i +=3 ) {
		$my_framed_seq .=$my_seq_array[$i].$my_seq_array[$i+1].$my_seq_array[$i+2]." ";
	}
	return $my_framed_seq;
}


sub getCDSposition {
	my($geneid, $genebankfile)=@_;
	my ($cds_start, $cds_end);
	$cds_start= $cds_end="";
	open(FASIN, $genebankfile)||die "Can't open the input file genebankfile=$genebankfile: $!\n";
        my $m_flag=0;
        $/ ="\n";
        while( <FASIN> ) {
           if ( /^LOCUS\s+([XN]M_\d+)\s+\d+/ ) {
           	$locusid=$1;
           	if ( $geneid eq $locusid ) {
           		$m_flag +=1;
           	}
           }
           if ( /^FEATURES\s+Location/ && $m_flag==1 ) {
           	$m_flag +=1;
           }
           if ( /^\s+CDS\s+(\d+)\.\.(\d+)/ && $m_flag==2) {
           	$cds_start=$1;
           	$cds_end=$2;
           	$m_flag +=1;
           }
           if ( $m_flag>0 && /\/\/\n$/ ) {
           	last;
           }
        }
        close(FASIN);
        
        return ($cds_start, $cds_end); 
}
	

sub parseBls4Ortholog {
         my($m_file, $m_genesyn)=@_;
         my ($my_subject_id, $query_len, $my_bitscore, %my_bitscore, %my_similarity,
             $my_maxalign_len, %my_maxalign_len, %my_headline, $my_k);
         $my_subject_id="null";
         $my_k="";
         $my_bitscore=$my_maxalign_len=$query_len=0;
         undef %my_bitscore;
         undef %my_similarity;
         undef %my_headline;
         
         open(FILE, $m_file) ||die "Can't open the input file m_file=$m_file: $!\n";
         my $flag=0;
         $/ ="\n";
         while ( <FILE> )  {
              chop;	
              if (/^$/)  {next; }
              if ($flag==0 && /^Query=\s+/ ) {
                  $flag +=1;
              }
              if ($flag==1 && /^\s+\((\d+)\s+letters\)/ ) {
              	  $query_len=$1;
                  $flag =2;
              }
              if ($flag==2 && /^Database:/ ) {
                  $flag =3;
              }
              if ($flag==3 && /^Sequences producing significant alignments:/ ) {
                  $flag +=1;
              }
              if (/^>/ && $flag >=7 )  { $flag =4; } #permute
              if (/^>/ && $flag==4 )  {
     #parse all different headers:         	
                     if ($_ =~ /^>gi\|\d+\|\w+\|(\S+)\.\d\|/ )  {
                         $my_subject_id=$1;
                     }elsif ( $_ =~ /^>(sf\d+_CDS_\d+)/ )  {
                         $my_subject_id=$1;
                     }elsif ( $_ =~ /^>(\w+\d+)\s+\d+/ )  {
                         $my_subject_id=$1;
                     }elsif ( $_ =~ /^>(ENS\w+\d+)\s*/ )  {
                         $my_subject_id=$1;
                     }elsif ( $_ =~ /^>(ref|db|emb|dbj|gb)\|(\S+)\.\d+/ )  {
                         $my_subject_id=$2;
                     }elsif ( $_ =~ /^>(\d+_CDSbyGscan)\s+/ )  {
                         $my_subject_id=$1;
                     }elsif ( $_ =~ /^>ref\|([NX]_M\d+)\.*\d*/ )  {
                         $my_subject_id=$1;
                     }
#                     $my_headline{$my_subject_id} = $_;
                     $flag +=1;
              }
              if ($flag==5  && /^\s+Length\s+=\s+(\d+)/ )  {
                     if ( $1/$query_len < 0.5 || $1/$query_len > 20 ) {   #need to be enforced to eliminate repeated alignment
                           $flag=4; 
                           next;
                     } else {
                           $flag +=1;
                     }
           
              }
              if ($flag==5  ) { $my_headline{$my_subject_id} .=$_;}
              if ($flag==6  && /^ Score\s+=\s+(\d+)\.*\d*\s+bits/ )  {
              	  $my_bitscore{$my_subject_id}=$1;
                  $flag +=1;
              }
              if ($flag==7  && /^ Identities\s+=\s+(\d+)\/(\d+)\s+\(/ )  {
              	  $my_similarity{$my_subject_id} = 100*$1/$query_len;
                  $flag +=1;
              }
        }close(FILE); 
        
        my $my_maximum_similarity=0;
        foreach $my_k(keys %my_similarity) {
             if ( $my_headline{$my_k} =~ /\b$m_genesyn\b/i ) {
        	  $my_similarity{$my_k} +=15;
             }
             if ( $my_similarity{$my_k} > $my_maximum_similarity ) {
             	  $my_maximum_similarity=$my_similarity{$my_k} ;
             }
        }
        my $top_similarity_cnt=0;
        foreach $my_k(keys %my_similarity) {
        	if ( $my_similarity{$my_k} == $my_maximum_similarity ) {
        		$top_similarity_cnt +=1;
        		$my_bitscore=$my_bitscore{$my_k};
        		$my_subject_id=$my_k;
        	}
        }
        my $is_choosed_flag=0;
        if ( $top_similarity_cnt > 1) {
        	foreach $my_k(keys %my_similarity) {
        	      if ( $my_similarity{$my_k} == $my_maximum_similarity && $my_k =~ /NM_\d+/ ) {
        		     $my_bitscore=$my_bitscore{$my_k};
        		     $my_subject_id=$my_k;
        		     $is_choosed_flag=1;
        	      }
        	}
        	if ($is_choosed_flag==0 ) {
        		foreach $my_k(keys %my_similarity) {
        	             if ( $my_similarity{$my_k} == $my_maximum_similarity && $my_k =~ /^ENS/ ) {
        		           $my_bitscore=$my_bitscore{$my_k};
        		           $my_subject_id=$my_k;
        		     }
        	      }
        	}
        }
	
        return($my_subject_id, $my_maximum_similarity) ;
}


sub assessRNAalignment  {
    	my($my_ipt)=@_;
    	my ($m_i, $k, $fragmnent,%my_species, $seq_name, $space, $my_fragment_len);
    	my ($specie_cnt, $m_flag);	
    	my $align_DNA_len;
    	
            $seq_name=$k=$fragmnent=$seq_name=$space=$my_fragment_len=$specie_cnt="";
            $m_i=$align_DNA_len=0;
            %my_species=();
    	open(MYIPT, $my_ipt)||die "Can't open the input file: my_ipt=$my_ipt failed at $refSeq_id $!\n";
    	$m_flag=0;
    	$/ ="\n";
    	while(<MYIPT>) {
   		    chop;
   		    if ( /^(human|chimp|macaca|mouse|dog|rat|rabbit|tenrec|elephant|armadillo|opossum|chicken|cow|xenopus|platypus)(\s+)(\S+)\s*\d*$/ ) {
   			       $fragmnent=$3;
        	     $seq_name=$1;
        	     $m_flag=1;
        	}elsif (/^(lcl\|\w+\d+)(\s+)(\S+)\s*\d*$/ ) {
			         $fragmnent=$3;
        	     $seq_name=$1;
        	     $seq_name =~ s/lcl\|//; 
        	     $m_flag=1;
        	}elsif (/^(gi\|\d+\|\w+\|\S+\.\S*)(\s+)(\S+)\s*\d*$/ ) {
			         $fragmnent=$3;
        	     $seq_name=$1;
        	     $seq_name =~ s/gi\|\d+\|\w+\|//;
        	     $seq_name =~ s/\.\S*$//; 
        	     $m_flag=1;
        	}elsif (/^(ref|db|emb|dbj|gb)\|(\S+)\.\S*(\s+)(\S+)\s*\d*$/ ) {
			         $fragmnent=$4;
        	     $seq_name=$2;
        	     $seq_name =~ s/(ref|db|emb|dbj|gb)\|//;
        	     $seq_name =~ s/\.\S+$//; 
        	     $m_flag=1;
        	}elsif ( $_ =~ /^(\d+_CDSbyGscan)(\s+)(\S+)\s*\d*$/ ) {
			         $fragmnent=$3;
        	     $seq_name=$1;
               $m_flag=1;
          }else {
        	 	   $m_flag=0;
        	}  
        	
        	if ( $m_flag == 1) {
    		       	if ( $homo2human{$refSeq_id} && $seq_name eq $homo2human{$refSeq_id} ) {
    	        	     $seq_name="human";
    	        	}elsif ( $homo2rat{$refSeq_id} && $seq_name eq $homo2rat{$refSeq_id} ) {
    	        	     $seq_name="rat";
    	        	}elsif ($homo2mouse{$refSeq_id} && $seq_name eq $homo2mouse{$refSeq_id} ) {
    	        	     $seq_name="mouse";
    	        	}elsif ( $homo2chimp{$refSeq_id} && $seq_name eq $homo2chimp{$refSeq_id} ) {
    	        	     $seq_name="chimp";
    	        	}elsif ( $homo2macaca{$refSeq_id} && $seq_name eq $homo2macaca{$refSeq_id} ) {
    	        	     $seq_name="macaca";
    	        	}elsif ( $homo2dog{$refSeq_id} && $seq_name eq $homo2dog{$refSeq_id} ) {
    	        	     $seq_name="dog";
    	        	}elsif ( $homo2cow{$refSeq_id} && $seq_name eq $homo2cow{$refSeq_id} ) {
    	        	     $seq_name="cow";
    	        	}elsif ( $homo2chicken{$refSeq_id} && $seq_name eq $homo2chicken{$refSeq_id} ) {
    	        	     $seq_name="chicken";
    	        	}elsif ( $homo2opossum{$refSeq_id} && $seq_name eq $homo2opossum{$refSeq_id} ) {
    	        	     $seq_name="opossum";
    	        	}elsif ( $homo2xenopus{$refSeq_id} && $seq_name eq $homo2xenopus{$refSeq_id} ) {
    	        	     $seq_name="xenopus";
    	        	}elsif ( $homo2rabbit{$refSeq_id} && $seq_name eq $homo2rabbit{$refSeq_id} ) {
    	        	     $seq_name="rabbit";
    	        	}elsif ( $homo2tenrec{$refSeq_id} && $seq_name eq $homo2tenrec{$refSeq_id} ) {
    	        	     $seq_name="tenrec";
    	        	}elsif ( $homo2elephant{$refSeq_id} && $seq_name eq $homo2elephant{$refSeq_id} ) {
    	        	     $seq_name="elephant";
    	        	}elsif ( $homo2armadillo{$refSeq_id} && $seq_name eq $homo2armadillo{$refSeq_id} ) {
    	        	     $seq_name="armadillo";
    	        	}elsif ( $homo2platypus{$refSeq_id} && $seq_name eq $homo2platypus{$refSeq_id} ) {
    	        	     $seq_name="platypus";
    	        	}
           	  
           	   $my_align_DNA{$seq_name} .=$fragmnent;
               $my_species{$seq_name}=1;
         }
	   }
	   close(MYIPT);   
	    
     $specie_cnt=0;
     foreach $k(keys %my_align_DNA ) {
     	     while ( $my_align_DNA{$k} =~ /([ATGC][ATGC])(-{3,})([ATGC][ATGC])/ ) {
#     	     	      $seq_tmp1=$1;
     	     	      $seq_tmp2=$2;
#     	     	      $seq_tmp3=$3;
     	     	      $seq_tmp2=~ tr/-/N/;
     	            $my_align_DNA{$k} =~ s/([ATGC][ATGC])(-{3,})([ATGC][ATGC])/$1$seq_tmp2$3/;
     	     }
           @{$seq_array{$k}} = split('',$my_align_DNA{$k});
           $specie_cnt +=1;
     }
       
    	$align_DNA_len=@{$seq_array{"human"}};
    	
    	my($my_start, $my_end);
    	$my_start=$my_end=0;
    	for ($m_i=0; $m_i<$align_DNA_len; $m_i ++ ) {	
           my $isIdentical=0;
           foreach  $my_specie (keys %my_align_DNA  ) {
    	        if ( ${$seq_array{$my_specie}}[$m_i] ne "-" && ${$seq_array{$my_specie}}[$m_i] ne "N" ) {
    	              $isIdentical +=1;
    	        }
    	    }
          if ( $isIdentical > 0.5 * $specie_cnt ) {
            	 $my_start=$m_i;
            	 $m_i=$align_DNA_len;  #to exit loop
          }
    	}
    	
    	for ($m_i=$align_DNA_len-1; $m_i >$my_start;  $m_i -- ) {	
           $isIdentical=0 ;
           foreach  $my_specie (keys %my_align_DNA  ) {
    	        if ( ${$seq_array{$my_specie}}[$m_i-1] ne "-" && ${$seq_array{$my_specie}}[$m_i-1] ne "N" ) {
    	              $isIdentical +=1;
    	        }
    	     }
           if ( $isIdentical > 0.5 * $specie_cnt ) {
            	 $my_end=$m_i;
            	 $m_i=0; 
           }
    	}
    	
    	
    	for ($m_i=$my_start; $m_i<$my_end; $m_i ++ ) {	
           $isIdentical=0;
           foreach  $my_specie (keys %my_align_DNA  ) {
    	        if ( ${$seq_array{$my_specie}}[$m_i] ne "-" ) {
    	              $isIdentical +=1;
    	        }
    	     }
           if ( $specie_cnt -$isIdentical > 2 && ${$seq_array{"human"}}[$m_i] eq "-" ) {
              	foreach  $my_specie (keys %my_align_DNA  ) {
    	              ${$seq_array{$my_specie}}[$m_i] = "9";
    	         }
    	     }elsif ( $isIdentical == 1 || $isIdentical == 2 ) {
              	foreach  $my_specie (keys %my_align_DNA  ) {
    	              ${$seq_array{$my_specie}}[$m_i] = "9";
    	         }
    	     }elsif ( $isIdentical == $specie_cnt-1 || $isIdentical == $specie_cnt-2  ) {
            	  foreach  $my_specie (keys %my_align_DNA  ) {
    	             if ( ${$seq_array{$my_specie}}[$m_i] eq "-" ) {
    	                  ${$seq_array{$my_specie}}[$m_i] = "N";
    	             }
    	          }
    	     }
    	    
    	}
    	
    	undef %my_align_DNA;
    	for ($m_i=$my_start; $m_i<$my_end; $m_i ++ ) {	
    	    foreach  $my_specie (keys %my_species  ) {
    	          $my_align_DNA{$my_specie} .= ${$seq_array{$my_specie}}[$m_i];
    	    }
    	}
      foreach  $my_specie (keys %my_species  ) {
           $my_align_DNA{$my_specie} =~ s/9//g;
           $my_align_DNA{$my_specie} =~ s/^-+//;
           $my_align_DNA{$my_specie} =~ s/^N+//;
           $my_align_DNA{$my_specie} =~ s/^-+//;
           
           $my_align_DNA{$my_specie} =~ s/-+$//;
           $my_align_DNA{$my_specie} =~ s/N+$//;
           $my_align_DNA{$my_specie} =~ s/-+//g;
      }
    	foreach  $my_specie (keys %my_align_DNA  ) {
    		undef @{$seq_array{$my_specie}};
    	}
}
     
     
#modified on July 5, 2005, add pep output function     
sub getCDSposition_pep {
	my($geneid, $genebankfile)=@_;
	my ($cds_start, $cds_end, $gene_symbol, $my_pep_seq);
	$cds_start= $cds_end=$my_pep_seq=$gene_symbol="";
	open(FASIN, $genebankfile)||die "Can't open the input file genebankfile=$genebankfile: $!\n";
        my $m_flag=0;
        $/ ="\n";
        while( <FASIN> ) {
           if ( /^LOCUS\s+([XN]M_\d+)\s+\d+/ ) {
           	$locusid=$1;
           	if ( $geneid eq $locusid ) {
           		$m_flag +=1;
           	}
           }
           if ( /^FEATURES\s+Location/ && $m_flag==1 ) {
           	$m_flag +=1;
           }
           if ( /^\s+CDS\s+(\d+)\.\.(\d+)/ && $m_flag==2) {
           	$cds_start=$1;
           	$cds_end=$2;
           	$m_flag +=1;
           }
           if (/^\s+\/gene=\"(\S+)\"/ &&  $m_flag==3 ) {
           	$gene_symbol=$1;
           	$m_flag +=1;
           }
           if ( /^\s+\/translation=/ && $m_flag==4) {
           	chop $_;
           	$_=~ s/^\s+\/translation=\"//;
           	$m_flag +=1;
           }
           if ( $m_flag ==5 && /\"\n$/ ) {
           	chop $_; chop $_;
           	$_ =~ s/^\s+//;
           	$my_pep_seq .=$_;
           	$m_flag +=1;
           	last;
           }
           if ( $m_flag==5  ) {
           	chomp $_;
           	$_ =~ s/^\s+//;
           	$my_pep_seq .=$_;
           	
           }
           if ($m_flag>0 && /^\/\/\n$/ ) {
           	last;
           }	
        }
        close(FASIN);
        
        return ($cds_start, $cds_end, $gene_symbol, $my_pep_seq); 
}


sub translate2pep_by_readingFrame {
#modified from Paul Stothard's translate.pl, 
#(Genome Canada Bioinformatics Help Desk)
     
      my ($fileToRead,$READINGFRAME,$pep_opt)=@_;    #READINGFRAME could be 1,2, or 3.
      my(@arrayOfNames,@arrayOfTranslations,%translationHash,$sequenceEntry,$sequenceTitle,
      @growingProtein,$codon,$i, $joinedAminoAcids);
      $READINGFRAME -=1;
     
      @arrayOfNames = ();
      
      #Declare an array to hold the protein translation of each sequence.
      @arrayOfTranslations = ();
      
      %translationHash = 
          (gca => "A", gcg => "A", gct => "A", gcc => "A", gcn => "A",
           tgc => "C", tgt => "C",
           gat => "D", gac => "D",
           gaa => "E", gag => "E",
           ttt => "F", ttc => "F",
           gga => "G", ggg => "G", ggc => "G", ggt => "G", ggn => "G",
           cat => "H", cac => "H",
           ata => "I", att => "I", atc => "I",
           aaa => "K", aag => "K",
           cta => "L", ctg => "L", ctt => "L", ctc => "L", ctn => "L", tta => "L", ttg => "L",
           atg => "M",
           aat => "N", aac => "N",
           cca => "P", cct => "P", ccg => "P", ccc => "P", ccn => "P",
           caa => "Q", cag => "Q",
           cga => "R", cgg => "R", cgc => "R", cgt => "R", cgn => "R",
           aga => "R", agg => "R",
           tca => "S", tcg => "S", tcc => "S", tct => "S", tcn => "S",
           agc => "S", agt => "S",
           aca => "T", acg => "T", acc => "T", act => "T", acn => "T",
           gta => "V", gtg => "V", gtc => "V", gtt => "V", gtn => "V",
           tgg => "W",
           tat => "Y", tac => "Y",
           tag => "X", taa => "X", tga => "X");
      
      open (DNAFILE, $fileToRead) or die( "Cannot open file : $!" );
      open(PEPFILE, ">$pep_opt")||die "Can't open the output file $!\n";
      
      #Because we are reading FASTA files, we will assign "$/" to equal ">".
      $/ = ">";
      
      while ( $sequenceEntry = <DNAFILE>) {
          if ( $sequenceEntry eq ">"){
      	        next;
          }
          $sequenceTitle = "";
          if ( $sequenceEntry =~ m/([^\n]+)/){
      	       $sequenceTitle = $1;
          }else {
      	       $sequenceTitle = "No title was found!";
          }
      
          $sequenceEntry =~ s/[^\n]+//;
          push (@arrayOfNames, $sequenceTitle);
#          $sequenceEntry =~ s/[^GATCNgatcn]//g;
          $sequenceEntry =~ s/\W//g;
          $sequenceEntry = lc($sequenceEntry);
          @growingProtein = ();
          for ( $i = $READINGFRAME; $i <= (length($sequenceEntry) - 3); $i = $i + 3) {
               $codon = substr($sequenceEntry, $i, 3);
      	      if (exists( $translationHash{$codon} )){
      	           push (@growingProtein, $translationHash{$codon});
      	      }else {
      	           push (@growingProtein, "X");
      	      }
          } 
          $joinedAminoAcids = join("",@growingProtein);
          push (@arrayOfTranslations, $joinedAminoAcids);
      } 
      close (DNAFILE) or die( "Cannot close file : $!");
      for ($i = 0; $i < scalar(@arrayOfNames); $i = $i + 1) {
          print PEPFILE ">$arrayOfNames[$i]\n";
          print PEPFILE "$arrayOfTranslations[$i]\n";
      } 
      close(PEPFILE);
}


sub compare2seq4positions {
	  my($my_human_aligned_seq,$my_tmpHumanCDS)=@_;
	  my($my_human_aligned_start,$my_human_aligned_stop); 
	  $my_human_aligned_start=100000;
	  $my_human_aligned_stop=0;
	  `bl2seq -p blastn -i $my_human_aligned_seq -j $my_tmpHumanCDS -o $tmp_pep_align -D 1 -S 1 -e $Stringency -F F`;
    open(ALIGNED_OPT, $tmp_pep_align)||die "Can't open the input file tmp_pep_align=$tmp_pep_align: $!\n";
    
    while( <ALIGNED_OPT> ) {
    	  chop;
    	  if (/^\#/ ) {
    	  	  next;
    	  }else {
    	  	  @a=split(/\t/, $_);
    	  	  if ( $a[8] < $my_human_aligned_start ) {
    	  	  	  $my_human_aligned_start=$a[8];
    	  	  }
            if ( $a[9] > $my_human_aligned_stop ) {
    	  	  	  $my_human_aligned_stop=$a[9];
    	  	  }
    	  }
    }
    close(ALIGNED_OPT);
    return($my_human_aligned_start,$my_human_aligned_stop);
}


sub blastsearch_RefSeq  {  
     my($inputSeq, $inputDB)=@_;
     my ($tmp_rna_align, $nohit, $my_exist_flag);
     $ct_id="null";
     $my_exist_flag=0;
     
     `echo "Query sequence: $inputSeq\n">>$preresult`;

     $tmp_rna_align="$TMPFOLDER/alignment.".$$;
     $BLAST_PARAMETER="-e $Stringency -a 3 -S 1 -X 100 -y 100 -Z 100";
#    $BLAST_PARAMETER="-e $Stringency -m 1 -F F -a 3 -S 1 -X 100 -y 100 -Z 100";
     ` blastall -p blastn -d $inputDB -i $inputSeq -o $tmp_rna_align $BLAST_PARAMETER`;

     $my_gene_symbol="";
     
     $nohit=`grep "No hits found" $tmp_rna_align|wc -l`;
     if ($nohit==1)  { 
#     	    print STDERR "$inputSeq does not have an orthologe gene in $inputDB.\n";
            $my_exist_flag=0;
     } else {
             ($ct_id, $max_similarity)= & parseBls4Ortholog($tmp_rna_align,$my_gene_symbol);
             `cat $tmp_rna_align>>$preresult`;
             `/bin/rm -f $tmp_rna_align`;
             `echo "\n\n">>$preresult`;
       
             if ( $max_similarity > $SIMILARITY_THRESHOLD ) {
             	     $my_exist_flag=1;
             }else {
             	    $ct_id="null";
             	     $my_exist_flag=0;
             }
     }
     return $my_exist_flag;
}   

__END__;

