00001 #include "log.h"
00002 #include "format.h"
00003 #include "linestream.h"
00004 #include "geneOntology.h"
00005 #include <gsl/gsl_randist.h>
00006
00007
00016 #define ANALYSIS_MODE_GENE_ENRICHMENT 1
00017 #define ANALYSIS_MODE_GENE_DEPLETION 2
00018
00019
00020
00021 typedef struct {
00022 char* db;
00023 char* dbGeneName;
00024 char* geneName;
00025 char* goId;
00026 } GoGeneAssociationEntry;
00027
00028
00029
00030 static Array goTerms = NULL;
00031 static Array goNodes = NULL;
00032 static Array genericGoSlimNodes = NULL;
00033 static Array goGeneAssociations = NULL;
00034 static Texta genesOfInterest = NULL;
00035 static GoNode* biologicalProcessRoot = NULL;
00036 static GoNode* molecularFunctionRoot = NULL;
00037 static GoNode* cellularComponentRoot = NULL;
00038
00039
00040
00041
00047 GoNode* geneOntology_getBiologicalProcessRoot (void)
00048 {
00049 return biologicalProcessRoot;
00050 }
00051
00052
00053
00059 GoNode* geneOntology_getMolecularFunctionRoot (void)
00060 {
00061 return molecularFunctionRoot;
00062 }
00063
00064
00065
00071 GoNode* geneOntology_getCellularComponentRoot (void)
00072 {
00073 return cellularComponentRoot;
00074 }
00075
00076
00077
00078 static Array getGoNodesByNameSpace (char* namespace)
00079 {
00080 int i;
00081 GoNode *currGoNode;
00082 static Array subsetGoNodes = NULL;
00083
00084 if (subsetGoNodes == NULL) {
00085 subsetGoNodes = arrayCreate (15000,GoNode*);
00086 }
00087 else {
00088 arrayClear (subsetGoNodes);
00089 }
00090 for (i = 0; i < arrayMax (goNodes); i++) {
00091 currGoNode = arrp (goNodes,i,GoNode);
00092 if (strEqual (currGoNode->goTerm->namespace,namespace) ||
00093 strEqual (namespace,"all")) {
00094 array (subsetGoNodes,arrayMax (subsetGoNodes),GoNode*) = currGoNode;
00095 }
00096 }
00097 return subsetGoNodes;
00098 }
00099
00100
00101
00107 Array geneOntology_getBiologicalProcessGoNodes (void)
00108 {
00109 return getGoNodesByNameSpace ("biological_process");
00110 }
00111
00112
00113
00119 Array geneOntology_getMolecularFunctionGoNodes (void)
00120 {
00121 return getGoNodesByNameSpace ("molecular_function");
00122 }
00123
00124
00125
00131 Array geneOntology_getCellularComponentGoNodes (void)
00132 {
00133 return getGoNodesByNameSpace ("cellular_component");
00134 }
00135
00136
00137
00143 Array geneOntology_getAllGoNodes (void)
00144 {
00145 return getGoNodesByNameSpace ("all");
00146 }
00147
00148
00149
00156 Array geneOntology_getGenericGoSlimNodes (void)
00157 {
00158 return genericGoSlimNodes;
00159 }
00160
00161
00162
00169 int geneOntology_getNumberOfAssociatedGenes (void)
00170 {
00171 return arrayMax (goGeneAssociations);
00172 }
00173
00174
00175
00185 int geneOntology_getNumberOfGenesOfInterest (void)
00186 {
00187 return arrayMax (genesOfInterest);
00188 }
00189
00190
00191
00202 void geneOntology_resetGenesOfInterest (void)
00203 {
00204 int i;
00205 GoNode *currGoNode;
00206
00207 for (i = 0; i < arrayMax (goNodes); i++) {
00208 currGoNode = arrp (goNodes,i,GoNode);
00209 if (currGoNode->genesOfInterest != NULL) {
00210 arrayClear (currGoNode->genesOfInterest);
00211 }
00212 }
00213 }
00214
00215
00216
00217 static void readGoOntology (char* oboFileName)
00218 {
00219 LineStream ls;
00220 char *line,*pos1,*pos2;
00221 int isGoTerm;
00222 GoTerm *currGoTerm;
00223 GoTagValue *currGoTagValue;
00224
00225 goTerms = arrayCreate (10000,GoTerm);
00226 ls = ls_createFromFile (oboFileName);
00227 isGoTerm = 0;
00228 while (line = ls_nextLine (ls)) {
00229 if (strEqual (line,"[Term]")) {
00230 isGoTerm = 1;
00231 currGoTerm = arrayp (goTerms,arrayMax (goTerms),GoTerm);
00232 continue;
00233 }
00234 else if (line[0] == '\0') {
00235 isGoTerm = 0;
00236 continue;
00237 }
00238 if (isGoTerm == 1) {
00239 if (strStartsWithC (line,"id:")) {
00240 pos1 = strchr (line,' ');
00241 currGoTerm->id = hlr_strdup (pos1 + 1);
00242 }
00243 else if (strStartsWithC (line,"name:")) {
00244 pos1 = strchr (line,' ');
00245 currGoTerm->name = hlr_strdup (pos1 + 1);
00246 }
00247 else if (strStartsWithC (line,"namespace:")) {
00248 pos1 = strchr (line,' ');
00249 currGoTerm->namespace = hlr_strdup (pos1 + 1);
00250 }
00251 else if (strStartsWithC (line,"alt_id:")) {
00252 if (currGoTerm->altIds == NULL) {
00253 currGoTerm->altIds = textCreate (10);
00254 }
00255 pos1 = strchr (line,' ');
00256 textAdd (currGoTerm->altIds,pos1 + 1);
00257 }
00258 else if (strStartsWithC (line,"def:")) {
00259 pos1 = strstr (line," [");
00260 if (pos1 == NULL) {
00261 die ("Expected ' [' in def line: %s",line);
00262 }
00263 *pos1 = '\0';
00264 pos1 = strchr (line,' ');
00265 strTrim (pos1," \""," \"");
00266 currGoTerm->definition = hlr_strdup (pos1);
00267 }
00268 else if (strStartsWithC (line,"subset:")) {
00269 if (currGoTerm->subsets == NULL) {
00270 currGoTerm->subsets = textCreate (10);
00271 }
00272 pos1 = strchr (line,' ');
00273 textAdd (currGoTerm->subsets,pos1 + 1);
00274 if (strEqual (pos1 + 1,"goslim_generic")) {
00275 currGoTerm->isGenericGoSlim = 1;
00276 }
00277 }
00278 else if (strStartsWithC (line,"comment:")) {
00279 pos1 = strchr (line,' ');
00280 currGoTerm->comment = hlr_strdup (pos1 + 1);
00281 }
00282 else if (strEqual (line,"is_obsolete: true")) {
00283 currGoTerm->isObsolete = 1;
00284 }
00285 else if (strStartsWithC (line,"synonym:")) {
00286 if (currGoTerm->synonyms == NULL) {
00287 currGoTerm->synonyms = textCreate (10);
00288 }
00289 pos1 = strrchr (line,'"');
00290 *pos1 = '\0';
00291 pos1 = strchr (line,'"');
00292 textAdd (currGoTerm->synonyms,pos1 + 1);
00293 }
00294 else if (strStartsWithC (line,"consider:")) {
00295 if (currGoTerm->considers == NULL) {
00296 currGoTerm->considers = textCreate (10);
00297 }
00298 pos1 = strchr (line,' ');
00299 textAdd (currGoTerm->considers,pos1 + 1);
00300 }
00301 else if (strStartsWithC (line,"xref:")) {
00302 if (currGoTerm->xrefs == NULL) {
00303 currGoTerm->xrefs = arrayCreate (10,GoTagValue);
00304 }
00305 currGoTagValue = arrayp (currGoTerm->xrefs,arrayMax (currGoTerm->xrefs),GoTagValue);
00306 pos1 = strchr (line,' ');
00307 pos2 = strchr (pos1 + 1,':');
00308 *pos2 = '\0';
00309 currGoTagValue->tag = hlr_strdup (pos1 + 1);
00310 currGoTagValue->value = hlr_strdup (pos2 + 1);
00311 }
00312 else if (strStartsWithC (line,"is_a:")) {
00313 if (currGoTerm->parents == NULL) {
00314 currGoTerm->parents = textCreate (10);
00315 }
00316 if (pos1 = strchr (line,'!')) {
00317 *(pos1 - 1) = '\0';
00318 }
00319 pos1 = strchr (line,' ');
00320 textAdd (currGoTerm->parents,pos1 + 1);
00321 }
00322 else if (strStartsWithC (line,"relationship:")) {
00323 if (currGoTerm->relationships == NULL) {
00324 currGoTerm->relationships = arrayCreate (10,GoTagValue);
00325 }
00326 currGoTagValue = arrayp (currGoTerm->relationships,arrayMax (currGoTerm->relationships),GoTagValue);
00327 if (pos1 = strchr (line,'!')) {
00328 *(pos1 - 1) = '\0';
00329 }
00330 pos1 = strchr (line,' ');
00331 pos2 = strchr (pos1 + 1,' ');
00332 *pos2 = '\0';
00333 currGoTagValue->tag = hlr_strdup (pos1 + 1);
00334 currGoTagValue->value = hlr_strdup (pos2 + 1);
00335 }
00336 else if (strStartsWithC (line,"replaced_by:")) {
00337 ;
00338 }
00339 else if (strStartsWithC (line,"disjoint_from:")) {
00340 ;
00341 }
00342 else {
00343 warn ("Unexpected line: %s",line);
00344 }
00345 }
00346 }
00347 ls_destroy (ls);
00348 }
00349
00350
00351
00352 static int sortGoNodesById (GoNode* a, GoNode* b)
00353 {
00354 return strcmp (a->id,b->id);
00355 }
00356
00357
00358
00365 GoNode* geneOntology_findGoNode (char* id)
00366 {
00367 GoNode testGoNode;
00368 int index,foundGoNode;
00369
00370 testGoNode.id = hlr_strdup (id);
00371 foundGoNode = arrayFind (goNodes,&testGoNode,&index,(ARRAYORDERF)sortGoNodesById);
00372 hlr_free (testGoNode.id);
00373 if (foundGoNode == 1) {
00374 return arrp (goNodes,index,GoNode);
00375 }
00376 return NULL;
00377 }
00378
00379
00380
00381 static void addGoNode (Array a, GoNode* n)
00382 {
00383 int i;
00384 GoNode *currGoNode;
00385
00386 for (i = 0; i < arrayMax (a); i++) {
00387 currGoNode = arru (a,i,GoNode*);
00388 if (currGoNode == n) {
00389 return;
00390 }
00391 }
00392 array (a,arrayMax (a),GoNode*) = n;
00393 }
00394
00395
00396
00397 static void goTerms2goNodes (void)
00398 {
00399 int i,j;
00400 GoTerm *currGoTerm;
00401 GoNode *currGoNode,*parentGoNode;
00402
00403 goNodes = arrayCreate (arrayMax (goTerms),GoNode);
00404 genericGoSlimNodes = arrayCreate (500,GoNode*);
00405 for (i = 0; i < arrayMax (goTerms); i++) {
00406 currGoTerm = arrp (goTerms,i,GoTerm);
00407 if (currGoTerm->isObsolete == 1) {
00408 continue;
00409 }
00410 currGoNode = arrayp (goNodes,arrayMax (goNodes),GoNode);
00411 currGoNode->goTerm = currGoTerm;
00412 currGoNode->id = hlr_strdup (currGoTerm->id);
00413 if (currGoNode->goTerm->isGenericGoSlim == 1) {
00414 array (genericGoSlimNodes,arrayMax (genericGoSlimNodes),GoNode*) = currGoNode;
00415 }
00416 if (strEqual (currGoNode->goTerm->name,"biological_process")) {
00417 biologicalProcessRoot = currGoNode;
00418 }
00419 else if (strEqual (currGoNode->goTerm->name,"molecular_function")) {
00420 molecularFunctionRoot = currGoNode;
00421 }
00422 else if (strEqual (currGoNode->goTerm->name,"cellular_component")) {
00423 cellularComponentRoot = currGoNode;
00424 }
00425 }
00426 arraySort (goNodes,(ARRAYORDERF)sortGoNodesById);
00427 for (i = 0; i < arrayMax (goNodes); i++) {
00428 currGoNode = arrp (goNodes,i,GoNode);
00429 if (currGoNode->goTerm->parents == NULL) {
00430 continue;
00431 }
00432 if (currGoNode->parents == NULL) {
00433 currGoNode->parents = arrayCreate (10,GoNode*);
00434 }
00435 for (j = 0; j < arrayMax (currGoNode->goTerm->parents); j++) {
00436 parentGoNode = geneOntology_findGoNode (textItem (currGoNode->goTerm->parents,j));
00437 if (parentGoNode == NULL) {
00438 die ("Expected to find %s in GO ontology: %s",textItem (currGoNode->goTerm->parents,j));
00439 }
00440 addGoNode (currGoNode->parents,parentGoNode);
00441 if (parentGoNode->children == NULL) {
00442 parentGoNode->children = arrayCreate (10,GoNode*);
00443 }
00444 addGoNode (parentGoNode->children,currGoNode);
00445 }
00446 }
00447 }
00448
00449
00450
00451 static int sortGoGeneAssociationEntriesByGeneName (GoGeneAssociationEntry *a, GoGeneAssociationEntry *b)
00452 {
00453 return strcmp (a->geneName,b->geneName);
00454 }
00455
00456
00457
00458 static int sortGoGeneAssociationsByGeneName (GoGeneAssociation *a, GoGeneAssociation *b)
00459 {
00460 return strcmp (a->geneName,b->geneName);
00461 }
00462
00463
00464
00465 static void readGoAnnotations (char* goAnnotationFileName)
00466 {
00467 LineStream ls;
00468 char *line;
00469 WordIter w;
00470 Array goGeneAssociationEntries;
00471 GoGeneAssociationEntry *currGoGeneAssociationEntry,*nextGoGeneAssociationEntry;
00472 GoGeneAssociation *currGoGeneAssociation;
00473 int i,j;
00474
00475 goGeneAssociationEntries = arrayCreate (100000,GoGeneAssociationEntry);
00476 ls = ls_createFromFile (goAnnotationFileName);
00477 while (line = ls_nextLine (ls)) {
00478 if (line[0] == '\0' || line[0] == '!') {
00479 continue;
00480 }
00481 currGoGeneAssociationEntry = arrayp (goGeneAssociationEntries,arrayMax (goGeneAssociationEntries),GoGeneAssociationEntry);
00482 w = wordIterCreate (line,"\t",0);
00483 currGoGeneAssociationEntry->db = hlr_strdup (wordNext (w));
00484 currGoGeneAssociationEntry->dbGeneName = hlr_strdup (wordNext (w));
00485 currGoGeneAssociationEntry->geneName = hlr_strdup (wordNext (w));
00486 wordNext (w);
00487 currGoGeneAssociationEntry->goId = hlr_strdup (wordNext (w));
00488 wordIterDestroy (w);
00489 }
00490 ls_destroy (ls);
00491 arraySort (goGeneAssociationEntries,(ARRAYORDERF)sortGoGeneAssociationEntriesByGeneName);
00492 goGeneAssociations = arrayCreate (10000,GoGeneAssociation);
00493 i = 0;
00494 while (i < arrayMax (goGeneAssociationEntries)) {
00495 currGoGeneAssociationEntry = arrp (goGeneAssociationEntries,i,GoGeneAssociationEntry);
00496 currGoGeneAssociation = arrayp (goGeneAssociations,arrayMax (goGeneAssociations),GoGeneAssociation);
00497 currGoGeneAssociation->db = hlr_strdup (currGoGeneAssociationEntry->db);
00498 currGoGeneAssociation->dbGeneName = hlr_strdup (currGoGeneAssociationEntry->dbGeneName);
00499 currGoGeneAssociation->geneName = hlr_strdup (currGoGeneAssociationEntry->geneName);
00500 currGoGeneAssociation->goIds = textCreate (10);
00501 textAdd (currGoGeneAssociation->goIds,currGoGeneAssociationEntry->goId);
00502 j = i + 1;
00503 while (j < arrayMax (goGeneAssociationEntries)) {
00504 nextGoGeneAssociationEntry = arrp (goGeneAssociationEntries,j,GoGeneAssociationEntry);
00505 if (strEqual (currGoGeneAssociationEntry->geneName,nextGoGeneAssociationEntry->geneName)) {
00506 textAdd (currGoGeneAssociation->goIds,nextGoGeneAssociationEntry->goId);
00507 }
00508 else {
00509 break;
00510 }
00511 j++;
00512 }
00513 i = j;
00514 textUniqKeepOrder (currGoGeneAssociation->goIds);
00515 }
00516 arraySort (goGeneAssociations,(ARRAYORDERF)sortGoGeneAssociationsByGeneName);
00517 }
00518
00519
00520
00521 static void mapAnnotatedGenesToGoOntology (void)
00522 {
00523 int i,j;
00524 GoGeneAssociation *currGoGeneAssociation;
00525 GoNode *currGoNode;
00526
00527 for (i = 0; i < arrayMax (goGeneAssociations); i++) {
00528 currGoGeneAssociation = arrp (goGeneAssociations,i,GoGeneAssociation);
00529 for (j = 0; j < arrayMax (currGoGeneAssociation->goIds); j++) {
00530 currGoNode = geneOntology_findGoNode (textItem (currGoGeneAssociation->goIds,j));
00531 if (currGoNode == NULL) {
00532 die ("Expected to find %s in GO ontology: %s",textItem (currGoGeneAssociation->goIds,j));
00533 }
00534 if (currGoNode->associatedGenes == NULL) {
00535 currGoNode->associatedGenes = textCreate (50);
00536 }
00537 textAdd (currGoNode->associatedGenes,currGoGeneAssociation->geneName);
00538 }
00539 }
00540 }
00541
00542
00543
00554 GoGeneAssociation* geneOntology_findGoGeneAssociation (char* geneName)
00555 {
00556 GoGeneAssociation testGoGeneAssociation;
00557 int index,foundGoGeneAssociation;
00558
00559 testGoGeneAssociation.geneName = hlr_strdup (geneName);
00560 foundGoGeneAssociation = arrayFind (goGeneAssociations,&testGoGeneAssociation,&index,(ARRAYORDERF)sortGoGeneAssociationsByGeneName);
00561 hlr_free (testGoGeneAssociation.geneName);
00562 if (foundGoGeneAssociation == 1) {
00563 return arrp (goGeneAssociations,index,GoGeneAssociation);
00564 }
00565 return NULL;
00566 }
00567
00568
00569
00580 Texta geneOntology_mapGenesOfInterestToGeneOntology (Texta geneNamesOfInterest)
00581 {
00582 int i,j;
00583 GoGeneAssociation *currGoGeneAssociation;
00584 GoNode *currGoNode;
00585 static Texta invalidGeneNames = NULL;
00586
00587 textCreateClear (invalidGeneNames,100);
00588 genesOfInterest = textCreate (100);
00589 textUniqKeepOrder (geneNamesOfInterest);
00590 for (i = 0; i < arrayMax (geneNamesOfInterest); i++) {
00591 currGoGeneAssociation = geneOntology_findGoGeneAssociation (textItem (geneNamesOfInterest,i));
00592 if (currGoGeneAssociation != NULL) {
00593 textAdd (genesOfInterest,textItem (geneNamesOfInterest,i));
00594 for (j = 0; j < arrayMax (currGoGeneAssociation->goIds); j++) {
00595 currGoNode = geneOntology_findGoNode (textItem (currGoGeneAssociation->goIds,j));
00596 if (currGoNode == NULL) {
00597 die ("Expected to find %s in GO ontology: %s",textItem (currGoGeneAssociation->goIds,j));
00598 }
00599 if (currGoNode->genesOfInterest == NULL) {
00600 currGoNode->genesOfInterest = textCreate (10);
00601 }
00602 textAdd (currGoNode->genesOfInterest,textItem (geneNamesOfInterest,i));
00603 }
00604 }
00605 else {
00606 textAdd (invalidGeneNames,textItem (geneNamesOfInterest,i));
00607 }
00608 }
00609 return invalidGeneNames;
00610 }
00611
00612
00613
00619 void geneOntology_init (char* geneOntologyFileName)
00620 {
00621 readGoOntology (geneOntologyFileName);
00622 goTerms2goNodes ();
00623 }
00624
00625
00626
00633 void geneOntology_mapGeneAnnotationsToGeneOntology (char* goGeneAssociationFileName)
00634 {
00635 readGoAnnotations (goGeneAssociationFileName);
00636 mapAnnotatedGenesToGoOntology ();
00637 }
00638
00639
00640
00641 static void countGenes (GoNode* n, Texta goNodeAnnotatedGenes, Texta goNodeGenesOfInterest)
00642 {
00643 int i;
00644 GoNode *currGoNode;
00645
00646 if (n->children != NULL) {
00647 for (i = 0; i < arrayMax (n->children); i++) {
00648 currGoNode = arru (n->children,i,GoNode*);
00649 countGenes (currGoNode,goNodeAnnotatedGenes,goNodeGenesOfInterest);
00650 }
00651 }
00652 if (n->associatedGenes != NULL) {
00653 for (i = 0; i < arrayMax (n->associatedGenes); i++) {
00654 textAdd (goNodeAnnotatedGenes,textItem (n->associatedGenes,i));
00655 }
00656 }
00657 if (n->genesOfInterest != NULL) {
00658 for (i = 0; i < arrayMax (n->genesOfInterest); i++) {
00659 textAdd (goNodeGenesOfInterest,textItem (n->genesOfInterest,i));
00660 }
00661 }
00662 }
00663
00664
00665
00666 static double calculatePvalueForEnrichment (int k, int n1, int n2, int t)
00667 {
00668 double pvalue;
00669 int i;
00670
00671 pvalue = 0.0;
00672 for(i = k; i <= n1; i++) {
00673 pvalue += gsl_ran_hypergeometric_pdf (i,n1,n2,t);
00674 }
00675 return pvalue;
00676 }
00677
00678
00679
00680 static double calculatePvalueForDepletion (int k, int n1, int n2, int t)
00681 {
00682 double pvalue;
00683 int i;
00684
00685 pvalue = 0.0;
00686 for(i = 0; i <= k; i++) {
00687 pvalue += gsl_ran_hypergeometric_pdf (i,n1,n2,t);
00688 }
00689 return pvalue;
00690 }
00691
00692
00693
00694 static Texta calculateGeneEnrichmentOrDepletionForGoTerm (GoNode* goNode, int* numberOfAnnotatedGenes,
00695 int* numberOfGenesOfInterest,
00696 double* pvalue, int analysisMode)
00697 {
00698 static Texta goNodeAnnotatedGenes = NULL;
00699 static Texta goNodeGenesOfInterest = NULL;
00700
00701 textCreateClear (goNodeAnnotatedGenes,1000);
00702 textCreateClear (goNodeGenesOfInterest,1000);
00703 countGenes (goNode,goNodeAnnotatedGenes,goNodeGenesOfInterest);
00704 arraySort (goNodeAnnotatedGenes,(ARRAYORDERF)arrayStrcmp);
00705 arraySort (goNodeGenesOfInterest,(ARRAYORDERF)arrayStrcmp);
00706 arrayUniq (goNodeAnnotatedGenes,NULL,(ARRAYORDERF)arrayStrcmp);
00707 arrayUniq (goNodeGenesOfInterest,NULL,(ARRAYORDERF)arrayStrcmp);
00708 *numberOfAnnotatedGenes = arrayMax (goNodeAnnotatedGenes);
00709 *numberOfGenesOfInterest = arrayMax (goNodeGenesOfInterest);
00710 if (analysisMode == ANALYSIS_MODE_GENE_ENRICHMENT) {
00711 *pvalue = calculatePvalueForEnrichment (*numberOfGenesOfInterest,*numberOfAnnotatedGenes,
00712 arrayMax (goGeneAssociations) - (*numberOfAnnotatedGenes),
00713 arrayMax (genesOfInterest));
00714 }
00715 else if (analysisMode == ANALYSIS_MODE_GENE_DEPLETION) {
00716 *pvalue = calculatePvalueForDepletion (*numberOfGenesOfInterest,*numberOfAnnotatedGenes,
00717 arrayMax (goGeneAssociations) - (*numberOfAnnotatedGenes),
00718 arrayMax (genesOfInterest));
00719 }
00720 else {
00721 die ("Unknown analysis mode: %d",analysisMode);
00722 }
00723 return goNodeGenesOfInterest;
00724 }
00725
00726
00727
00728 static int sortGoStatisticsByPvalue (GoStatistic *a, GoStatistic *b)
00729 {
00730 if (a->pvalue < b->pvalue) {
00731 return -1;
00732 }
00733 if (a->pvalue > b->pvalue) {
00734 return 1;
00735 }
00736 return 0;
00737 }
00738
00739
00740
00741 static Array calculateGeneEnrichmentOrDepletion (Array goNodePointers, int multipleTestingCorrectionMethod, int analysisMode)
00742 {
00743 static Array goStatistics = NULL;
00744 GoStatistic *currGoStatistic;
00745 int i;
00746 GoNode* currGoNode;
00747 int numberOfAnnotatedGenes,numberOfGenesOfInterest;
00748 double pvalue;
00749 Texta genesOfInterest;
00750
00751 if (goStatistics == NULL) {
00752 goStatistics = arrayCreate (10000,GoStatistic);
00753 }
00754 else {
00755 for (i = 0; i < arrayMax (goStatistics); i++) {
00756 currGoStatistic = arrp (goStatistics,i,GoStatistic);
00757 textDestroy (currGoStatistic->genesOfInterest);
00758 }
00759 arrayClear (goStatistics);
00760 }
00761 for (i = 0; i < arrayMax (goNodePointers); i++) {
00762 currGoNode = arru (goNodePointers,i,GoNode*);
00763 genesOfInterest = calculateGeneEnrichmentOrDepletionForGoTerm (currGoNode,&numberOfAnnotatedGenes,
00764 &numberOfGenesOfInterest,&pvalue,analysisMode);
00765 currGoStatistic = arrayp (goStatistics,arrayMax (goStatistics),GoStatistic);
00766 currGoStatistic->goNode = currGoNode;
00767 currGoStatistic->numberOfAnnotatedGenes = numberOfAnnotatedGenes;
00768 currGoStatistic->numberOfGenesOfInterest = numberOfGenesOfInterest;
00769 currGoStatistic->pvalue = pvalue;
00770 currGoStatistic->genesOfInterest = textClone (genesOfInterest);
00771 }
00772 arraySort (goStatistics,(ARRAYORDERF)sortGoStatisticsByPvalue);
00773 for (i = 0; i < arrayMax (goStatistics); i++) {
00774 currGoStatistic = arrp (goStatistics,i,GoStatistic);
00775 if (multipleTestingCorrectionMethod == MULTIPLE_TESTING_CORRECTION_BENJAMINI_HOCHBERG) {
00776 currGoStatistic->pvalueCorrected = MIN (currGoStatistic->pvalue * arrayMax (goStatistics) / (i + 1),1.0);
00777 }
00778 else if (multipleTestingCorrectionMethod == MULTIPLE_TESTING_CORRECTION_BONFERRONI) {
00779 currGoStatistic->pvalueCorrected = MIN (currGoStatistic->pvalue * arrayMax (goStatistics),1.0);
00780 }
00781 else {
00782 die ("Unknown multiple testing correction method: %d",multipleTestingCorrectionMethod);
00783 }
00784 }
00785 return goStatistics;
00786 }
00787
00788
00789
00802 Array geneOntology_calculateGeneEnrichment (Array goNodePointers, int multipleTestingCorrectionMethod)
00803 {
00804 return calculateGeneEnrichmentOrDepletion (goNodePointers,multipleTestingCorrectionMethod,
00805 ANALYSIS_MODE_GENE_ENRICHMENT);
00806 }
00807
00808
00809
00822 Array geneOntology_calculateGeneDepletion (Array goNodePointers, int multipleTestingCorrectionMethod)
00823 {
00824 return calculateGeneEnrichmentOrDepletion (goNodePointers,multipleTestingCorrectionMethod,
00825 ANALYSIS_MODE_GENE_DEPLETION);
00826 }
00827
00828
00829
00830 static void getChildrenAtHierarchyLevel (GoNode* n, Array resultGoNodes, int currentLevel, int specifiedLevel)
00831 {
00832 int i;
00833 GoNode* currGoNode;
00834
00835 if (currentLevel == specifiedLevel) {
00836 i = 0;
00837 while (i < arrayMax (resultGoNodes)) {
00838 if (arru (resultGoNodes,i,GoNode*) == n) {
00839 break;
00840 }
00841 i++;
00842 }
00843 if (i == arrayMax (resultGoNodes)) {
00844 array (resultGoNodes,arrayMax (resultGoNodes),GoNode*) = n;
00845 }
00846 return;
00847 }
00848 if (n->children != NULL) {
00849 for (i = 0; i < arrayMax (n->children); i++) {
00850 currGoNode = arru (n->children,i,GoNode*);
00851 getChildrenAtHierarchyLevel (currGoNode,resultGoNodes,currentLevel + 1,specifiedLevel);
00852 }
00853 }
00854 }
00855
00856
00857
00858
00868 Array geneOntology_getChildrenAtSpecifiedHierarchyLevel (GoNode* currGoNode, int level)
00869 {
00870 static Array resultGoNodes = NULL;
00871
00872 if (resultGoNodes == NULL) {
00873 resultGoNodes = arrayCreate (1000,GoNode*);
00874 }
00875 else {
00876 arrayClear (resultGoNodes);
00877 }
00878 getChildrenAtHierarchyLevel (currGoNode,resultGoNodes,0,level);
00879 return resultGoNodes;
00880 }