## this programs bulds the hierarchy from a directed network
## the input file is a two column file separated by tabs: 
## Regulator	Target

#!/usr/bin/perl -w

#Specify the input file with the proper path here

$filename1='Z:\Research\Misc\plant_hier\plant_reg.txt';
#$filename1='Z:\Shared\Research\Isalan_hierarchy\isalan_reg.txt';
open (DATA_1_FILE, $filename1);
@data_1 = <DATA_1_FILE>;
close DATA_1_FILE;


## extract all genes

@all_genes=();
foreach $line1 (@data_1) 
	{@line1=split(/\t/,$line1);
	 $gene1=$line1[0];
	 $gene2=$line1[1];
	 chomp $gene1;
	 chomp $gene2;
	 $present1=0;
	 $present2=0;

     foreach $gene (@all_genes) 
		 {if (uc($gene) eq uc($gene1)) 
			{$present1=1;
			 last;
			}
		 }
     if ($present1 ==0) 
		 {push(@all_genes,$gene1);
		 }
     
     
     foreach $gene (@all_genes) 
		 {if(uc($gene) eq uc($gene2)) 
			{$present2=1;
			 last;
			}
		 }
	 if ($present2 ==0) 
		 {push(@all_genes,$gene2);
		 }
	}


#specify output file

$temp_file = 'Z:\\Research\\Misc\\plant_hier\\plant_hierarchy.txt';
open (TEMPFILE, ">".$temp_file);

print TEMPFILE "level\n";
#Assigning to the lowest level 

@level1=();
@level1_temp=();
@bottom=();
@remaining=();
@remaining2=();

foreach $gene (@all_genes) 
	{chomp $gene;
	 #print $gene,"\n";
	 $TG=1;	
	 foreach $line1 (@data_1) 
		 {@line1=split(/\t/,$line1);
		  $TF=$line1[0];
		  $target=$line1[1];
		  chomp $target;

		  #if it appears as a TF, then its not a bottom one
		  if (uc($gene) eq uc($TF) && uc($TF) ne uc($target)) 
			  {$TG=0;
		       last;
			  }
		 }

      if ($TG==1) 
		  {print TEMPFILE "$gene = bottom\n";
		   push(@bottom,$gene);
		  }
	}



#### push all the regulators of bottom ones into level1_temp
	 foreach $gene (@bottom) 
		{foreach $line1 (@data_1) 
			{@line1=split(/\t/,$line1);
			  $TF=$line1[0];
			  $TG=$line1[1];
			  chomp $TF;
			  chomp $TG;
			  if (uc($gene) eq uc($TG) && uc($TF) ne uc($TG)) 
			  	{push(@level1_temp,$TF);
				 #print TEMPFILE "$TF\n";
				}
			}
		}
		



 
  
##Get the remaining ones

foreach $gene (@all_genes) 
	{$remaining=1;
	 foreach $gene2 (@level1_temp) 
		 {if (uc($gene) eq uc($gene2)) 
			{$remaining=0;
			 last;
			}
		 }
     
	 foreach $gene3 (@bottom) 
		 {if (uc($gene) eq uc($gene3)) 
			{$remaining=0;
			 last;
			}
		 }

	 if ($remaining==1) 
		 {push(@remaining,$gene);
		 }
	}
 

### check for those TFs that are regulated by level1 TFs but dont regulate the bottom ones.
print $#remaining,"\n";
for($i=0;$i<=$#remaining+1;$i++) 
		{$gene=$remaining[$i];
		 foreach $line1 (@data_1) 
			{@line1=split(/\t/,$line1);
			  $TF=$line1[0];
			  $TG=$line1[1];
			  chomp $TF;
			  chomp $TG;
			  if ($TG eq $gene)
				  {foreach $gene1 (@level1_temp) 
						{#print $gene1;
				         if ($TF eq $gene1) 
							{push(@level1_temp,$gene);
							 print "$gene\n";
							 last;
							}
						}
				  }
			}
		}
		

### clean level1_temp of repetitions 
  
foreach $gene (@level1_temp) 
	{$already=0;
	 foreach $gene2 (@level1) 
		 {if (uc($gene) eq uc($gene2)) 
			{$already=1;
			 last;
			}
		 }
     
	 foreach $gene3 (@bottom) 
		 {if (uc($gene) eq uc($gene3)) 
			{$already=1;
			 last;
			}
		 }

	 if ($already==0) 
		 {push(@level1,$gene);
		 }
	}
    
	 foreach $gene (@level1) 
		 {print TEMPFILE $gene," = first\n";
	      print $gene,"=first\n"; 
	 }

 

##Get the remaining ones
@remaining=();
foreach $gene (@all_genes) 
	{$remaining=1;
	 foreach $gene2 (@level1) 
		 {if (uc($gene) eq uc($gene2)) 
			{$remaining=0;
			 last;
			}
		 }
     
	 foreach $gene3 (@bottom) 
		 {if (uc($gene) eq uc($gene3)) 
			{$remaining=0;
			 last;
			}
		 }

	 if ($remaining==1) 
		 {push(@remaining,$gene);
		 }
	}
 
 print $#remaining;



 #### push all the regulators of first ones into level2_temp

@level2_temp=();
foreach $remaining (@remaining) 
	{foreach $line1 (@data_1) 
		{@line1=split(/\t/,$line1);
		 $TF=$line1[0];
		 $TG=$line1[1];
		 chomp $TF;
		 chomp $TG;
		 if (uc($TF) eq uc($remaining)) 
			 {foreach $gene (@level1) 
				 {if (uc($gene) eq uc($TG)) 
					{push(@level2_temp,$remaining);
					 #print "$remaining\n";
					 last;
					 }
				 }
			 }
		}
	}




### check for those TFs that are regulated by level2 TFs but dont regulate the level1 ones.
print $#remaining,"\n";
for($i=0;$i<=$#remaining+1;$i++) 
		{$gene=$remaining[$i];
		 foreach $line1 (@data_1) 
			{@line1=split(/\t/,$line1);
			  $TF=$line1[0];
			  $TG=$line1[1];
			  chomp $TF;
			  chomp $TG;
			  if ($TG eq $gene)
				  {foreach $gene1 (@level2_temp) 
						{#print $gene1;
				         if ($TF eq $gene1) 
							{push(@level2_temp,$gene);
							 print "$gene\n";
							 last;
							}
						}
				  }
			}
		}
	

### clean level2_temp of repetitions 
 
 @level2=(); 
foreach $gene (@level2_temp) 
	{$already=0;
	 foreach $gene2 (@level2) 
		 {if (uc($gene) eq uc($gene2)) 
			{$already=1;
			 last;
			}
		 }
 

	 if ($already==0) 
		 {push(@level2,$gene);
		 }
	}
    
	 foreach $gene (@level2) 
		 {print TEMPFILE $gene," = two\n";
	      print $gene,"=two\n"; 
	 }

 
##Get the remaining ones
@remaining=();
foreach $gene (@all_genes) 
	{$remaining=1;
	 foreach $gene2 (@level1) 
		 {if (uc($gene) eq uc($gene2)) 
			{$remaining=0;
			 last;
			}
		 }
     
	 foreach $gene3 (@bottom) 
		 {if (uc($gene) eq uc($gene3)) 
			{$remaining=0;
			 last;
			}
		 }

     foreach $gene4 (@level2) 
		 {if (uc($gene) eq uc($gene4)) 
			{$remaining=0;
			 last;
			}
		 }

	 if ($remaining==1) 
		 {push(@remaining,$gene);
	      print TEMPFILE "$gene = three\n";
		 }
	}
 
 print $#remaining;
