## this programs rebuilds the hier and counts changes upon deletion of nodes.

#!/usr/bin/perl -w

### file that lists the TFs (one per each) that are to be deleted one by one
$filename1='Z:\Shared\Research\Isalan_hierarchy\haiyuan_verify\Ecoli_TF.txt';
#$filename1='Z:\Shared\Research\Isalan_hierarchy\isalan_reg.txt';
open (DATA_1_FILE, $filename1);
@data_1 = <DATA_1_FILE>;
close DATA_1_FILE;

#### tab-delimited file that contains the network only between TFs.
$filenamea='Z:\Shared\Research\Isalan_hierarchy\haiyuan_verify\ERN_formatted_only_TF.txt';
#$filename1='Z:\Shared\Research\Isalan_hierarchy\isalan_reg.txt';
open (DATA_a_FILE, $filenamea);
@data_a = <DATA_a_FILE>;
close DATA_a_FILE;

### current hierarhy (TF \t level) that has to be perturbed upon deletion
$filenameB='Z:\Shared\Research\Isalan_hierarchy\haiyuan_verify\haiyuan_hierarchy.txt';
#$filename1='Z:\Shared\Research\Isalan_hierarchy\isalan_reg.txt';
open (DATA_B_FILE, $filenameB);
@data_B = <DATA_B_FILE>;
close DATA_B_FILE;


### you dont necessarily need all the following files. If you want some characteristics listed from them
### for each TF/node, then you can list them here.

$filenameC='Z:\Shared\Research\Isalan_hierarchy\Deletion_Ecoli\colonies_lived.txt';
#$filename1='Z:\Shared\Research\Isalan_hierarchy\isalan_reg.txt';
open (DATA_C_FILE, $filenameC);
@data_C = <DATA_C_FILE>;
close DATA_C_FILE;

$filenameD='Z:\Shared\Research\Isalan_hierarchy\Deletion_Ecoli\out-degree_TF_only.txt';
open (DATA_D_FILE, $filenameD);
@data_D = <DATA_D_FILE>;
close DATA_D_FILE;

$filenameF='Z:\Shared\Research\Isalan_hierarchy\Deletion_Ecoli\out-degree_overall.txt';
open (DATA_F_FILE, $filenameF);
@data_F = <DATA_F_FILE>;
close DATA_F_FILE;


$filenameE='Z:\Shared\Research\Isalan_hierarchy\Deletion_Ecoli\in-degree_TF_only.txt';
open (DATA_E_FILE, $filenameE);
@data_E = <DATA_E_FILE>;
close DATA_E_FILE;


$filenameb='C:\research_temp\OverExpression\Yeast_TFs.txt';
open (DATA_b_FILE, $filenameb);
@data_b = <DATA_b_FILE>;
close DATA_b_FILE;


### specify the output file here
$temp_file = 'Z:\\Shared\Research\\Isalan_hierarchy\\Deletion_Ecoli\\changes_hier.txt';
#$temp_file = 'Z:\\Shared\\Research\\Isalan_hierarchy\\isalan_hierarchy.txt';
open (TEMPFILE, ">".$temp_file);

print TEMPFILE "Delete\tCurrent_level\tOut_TF\tOut_Overall\tIn\tColonies\tChanges\tGene_moved\tInitial_level\tFinal_level\n";	


## for every combination generate new edges


for ($m=0;$m<=$#data_1;$m++) 
{$deleted=$data_1[$m];
	 chomp $deleted;
	 @reg_net=();
	 @mod_hier=();
	print "$deleted\n";
	print TEMPFILE "$deleted\t";
	
	$current='ND';
	foreach $line1 (@data_B) 
	{chomp $line1;
	 @line1=split(' ',$line1);
	 $gene1=$line1[0];
	 $level=$line1[2];
	 if ($gene1 eq $deleted) 
		{$current=$level
		}
	}

	$out='ND';
	$in='ND';
	$out_overall='ND';
	foreach $lined (@data_D) 
		{chomp $lined;
		 @lined=split(/\t/,$lined);
		 if (uc($lined[0]) eq uc($deleted)) 
			 {#print " out  $lined[1]\t";
			  $out= $lined[1];
			 }
		}

	foreach $linee (@data_E) 
		{chomp $linee;
		 @linee=split(/\t/,$linee);
		 if (uc($linee[0]) eq uc($deleted)) 
			 {#print  " in $linee[1]\t";
			  $in=$linee[1];
			  $in = $linee[1];
			 }
		}

	foreach $linef (@data_F) 
		{chomp $linef;
		 @linef=split(/\t/,$linef);
		 if (uc($linef[0]) eq uc($deleted)) 
			 {#print  " in $linee[1]\t";
			  $out_overall = $linef[1];
			 }
		}
    print TEMPFILE "$current\t$out\t$out_overall\t$in\t";
			 
	foreach $line1 (@data_a) 
		{chomp $line1;
		 @line1=split(/\t/,$line1);
		 #print "$line1\n";
		 #print TEMPFILE "$deleted\t$line1[0]\t$line1[1]\n";
		 if ($line1[0] eq $deleted) 
			 { print "deleted $line1\n";
			 }
		 if ($line1[1] eq $deleted && $in ==1) 
			 { print " in ==1 deleted $line1\n";
			   $regulator=$line1[0];
			  print "regu $regulator\n";
			 }
		 if ($line1[1] eq $deleted) 
			 { print "deleted $line1\n";
			 }
		 else
			 {
			  push(@reg_net,$line1);
		      #print TEMPFILE2 "$line1\n";
			 }
		}
	
	
	count_changes();
	
    #
  
	}


sub count_changes
{


## extract all genes

@all_genes=();
foreach $line1 (@reg_net) 
	{@line1=split(/\t/,$line1);
	 $gene1=$line1[0];
	 $gene2=$line1[1];
	 chomp $gene1;
	 chomp $gene2;
	 #print TEMPFILE2 "$gene1\t$gene2\n";
	 $present1=0;
	 $present2=0;

     foreach $gene (@all_genes) 
		 {if (uc($gene) eq uc($gene1)) 
			{$present1=1;
			 last;
			}
		 }
     if ($present1 ==0) 
		 {push(@all_genes,$gene1);
		 }
     
     
     foreach $gene (@all_genes) 
		 {if(uc($gene) eq uc($gene2)) 
			{$present2=1;
			 last;
			}
		 }
	 if ($present2 ==0) 
		 {push(@all_genes,$gene2);
		 }
	}

foreach $gene (@all_genes) 
	{#print TEMPFILE2 $gene," sdfsdfsdf \n";
	 #print $gene," sdfsdfsdf \n";
	
	}


 


#print TEMPFILE2 "level\n";
$header="level";
push(@mod_hier,$header);
#$entry="$regulator = bottom";
#push(@mod_hier,$entry);

#Assigning to the lowest level 

@level1=();
@level1_temp=();
@bottom=();
@remaining=();
@remaining2=();
#push(@bottom,$regulator);
foreach $gene (@all_genes) 
	{chomp $gene;
	 $TG=1;	
	 foreach $line1 (@reg_net) 
		 {@line1=split(/\t/,$line1);
		  $TF=$line1[0];
		  $target=$line1[1];
		  chomp $target;
		  chomp $TF;

		  #if it appears as a TF, then its not a bottom one
		  if (uc($gene) eq uc($TF) && uc($TF) ne uc($target)) 
			  {$TG=0;
		        last;
			  }
		 }

      if ($TG==1) 
		  {$entry="$gene = bottom";
		   push(@mod_hier,$entry);
		   push(@bottom,$gene);
		  }
	}



#### push all the regulators of bottom ones into level1_temp
	 foreach $gene (@bottom) 
		{
		 foreach $line1 (@reg_net) 
			{@line1=split(/\t/,$line1);
			  $TF=$line1[0];
			  $TG=$line1[1];
			  chomp $TF;
			  chomp $TG;
			  if (uc($gene) eq uc($TG) && uc($TF) ne uc($TG)) 
			  	{push(@level1_temp,$TF);
				 #print "$TF\n";
				}
			}
		}
		



 
  
##Get the remaining ones

foreach $gene (@all_genes) 
	{$remaining=1;
	 foreach $gene2 (@level1_temp) 
		 {if (uc($gene) eq uc($gene2)) 
			{$remaining=0;
			 last;
			}
		 }
     
	 foreach $gene3 (@bottom) 
		 {if (uc($gene) eq uc($gene3)) 
			{$remaining=0;
			 last;
			}
		 }

	 if ($remaining==1) 
		 {push(@remaining,$gene);
		  #print $gene," remains\n";
		 }
	}
 
 
### check for those TFs that are regulated by level1 TFs but dont regulate the bottom ones.
#print $#remaining," this is remaining\n";
for($i=0;$i<=$#remaining+1;$i++) 
		{$gene=$remaining[$i];
		 foreach $line1 (@reg_net) 
			{@line1=split(/\t/,$line1);
			  $TF=$line1[0];
			  $TG=$line1[1];
			  chomp $TF;
			  chomp $TG;
			  if ($TG eq $gene)
				  {foreach $gene1 (@level1_temp) 
						{#print $gene1;
				         if ($TF eq $gene1) 
							{push(@level1_temp,$gene);
							 #print "$gene\n";
							 last;
							}
						}
				  }
			}
		}

### clean level1_temp of repetitions 
  
foreach $gene (@level1_temp) 
	{$already=0;
	 foreach $gene2 (@level1) 
		 {if (uc($gene) eq uc($gene2)) 
			{$already=1;
			 last;
			}
		 }
     
	 foreach $gene3 (@bottom) 
		 {if (uc($gene) eq uc($gene3)) 
			{$already=1;
			 last;
			}
		 }

	 if ($already==0) 
		 {push(@level1,$gene);
		 }
	}
    
	 foreach $gene (@level1) 
		 {$entry= "$gene = first";
	      push(@mod_hier,$entry);
		  #print $gene,"=first\n"; 
	 }


 

##Get the remaining ones
@remaining=();
foreach $gene (@all_genes) 
	{$remaining=1;
	 foreach $gene2 (@level1) 
		 {if (uc($gene) eq uc($gene2)) 
			{$remaining=0;
			 last;
			}
		 }
     
	 foreach $gene3 (@bottom) 
		 {if (uc($gene) eq uc($gene3)) 
			{$remaining=0;
			 last;
			}
		 }

	 if ($remaining==1) 
		 {push(@remaining,$gene);
		 }
	}
 




 #### push all the regulators of first ones into level2_temp

@level2_temp=();
foreach $remaining (@remaining) 
	{foreach $line1 (@reg_net) 
		{@line1=split(/\t/,$line1);
		 $TF=$line1[0];
		 $TG=$line1[1];
		 chomp $TF;
		 chomp $TG;
		 if (uc($TF) eq uc($remaining)) 
			 {foreach $gene (@level1) 
				 {if (uc($gene) eq uc($TG)) 
					{push(@level2_temp,$remaining);
					 #print "$remaining\n";
					 last;
					 }
				 }
			 }
		}
	}




### check for those TFs that are regulated by level2 TFs but dont regulate the level1 ones.
#print $#remaining,"\n";
for($i=0;$i<=$#remaining+1;$i++) 
		{$gene=$remaining[$i];
		 foreach $line1 (@reg_net) 
			{@line1=split(/\t/,$line1);
			  $TF=$line1[0];
			  $TG=$line1[1];
			  chomp $TF;
			  chomp $TG;
			  if ($TG eq $gene)
				  {foreach $gene1 (@level2_temp) 
						{#print $gene1;
				         if ($TF eq $gene1) 
							{push(@level2_temp,$gene);
							 #print "$gene\n";
							 last;
							}
						}
				  }
			}
		}
	

### clean level2_temp of repetitions 
 
 @level2=(); 
foreach $gene (@level2_temp) 
	{$already=0;
	 foreach $gene2 (@level2) 
		 {if (uc($gene) eq uc($gene2)) 
			{$already=1;
			 last;
			}
		 }
 

	 if ($already==0) 
		 {push(@level2,$gene);
		 }
	}
    
	 foreach $gene (@level2) 
		 {$entry="$gene = two";
	      push(@mod_hier,$entry);
		  #print $gene,"=two\n"; 
	 }

 

##Get the remaining ones
@remaining=();
foreach $gene (@all_genes) 
	{$remaining=1;
	 foreach $gene2 (@level1) 
		 {if (uc($gene) eq uc($gene2)) 
			{$remaining=0;
			 last;
			}
		 }
     
	 foreach $gene3 (@bottom) 
		 {if (uc($gene) eq uc($gene3)) 
			{$remaining=0;
			 last;
			}
		 }

     foreach $gene4 (@level2) 
		 {if (uc($gene) eq uc($gene4)) 
			{$remaining=0;
			 last;
			}
		 }

	 if ($remaining==1) 
		 {push(@remaining,$gene);
	      $entry= "$gene = three";
		  push(@mod_hier,$entry);
		  #print "$gene = three\n";
		 }
	}
 
 #print $#remaining;

$colinies='ND';
	
foreach $linec (@data_C) 
	{chomp $linec;
     @linec=split(/\t/,$linec);
	 $linec[0] =~ s/^\s+//;
     $linec[0] =~ s/\s+$//;
	 $linec[0] =~ s/^\s+//;
     $linec[0] =~ s/\s+$//;
	 #print "$linec[0]\n";
	 if (uc($linec[0]) eq uc($deleted)) 
		 {$colonies=$linec[3];
		  }
	}

print TEMPFILE "$colonies\t";
print "pheno $colonies\t";
		 
$changes=0;
@changes=();
foreach $line1 (@data_B) 
	{chomp $line1;
	 @line1=split(' ',$line1);
	 $gene1=$line1[0];
	 $levelA=$line1[2];
	 $present=0;
	 #print "$gene1\t$levelA\t$levelB\n";
			 
	 foreach $line2 (@mod_hier) 
		{chomp $line2;
		 @line2=split(' ',$line2);
		 $gene2=$line2[0];
		 $levelB=$line2[2];
         if ($gene1 eq $gene2 && $levelA ne $levelB) 
			 {$changes++;
			 print "$gene1\t$levelA\t$levelB\n";
			  push(@changes,$gene1);
			  push(@changes,$levelA);
			  push(@changes,$levelB);
			  $present=1;
			  last;
			 }
		 elsif($gene1 eq $gene2) 
			 {$present=1;
			 } 
		}
	


	if ($present==0 && $gene1 ne $deleted) 
		{$changes++;
		 push(@changes,$gene1);
		 push(@changes,$levelA);
		 push(@changes,'bottom');
				  
		}
	}

foreach $line1 (@mod_hier) 
	{chomp $line1;
	 #print TEMPFILE "$line1\n";
	}

foreach $linea (@mod_hier) 
 {#print TEMPFILE "$linea\n";
	 }
print TEMPFILE "$changes\t";
print "changes $changes\n";


foreach $gene1 (@changes) 
	{print TEMPFILE "$gene1\t";		 
	}

print TEMPFILE "\n";

}
