00001 #include "format.h" 00002 #include "log.h" 00003 #include "linestream.h" 00004 #include "common.h" 00005 #include "blatParser.h" 00006 00007 00008 00016 #define NUM_PSL_HEADER_LINES 5 00017 00018 00019 00020 static LineStream ls = NULL; 00021 00022 00023 00028 void blatParser_initFromFile (char *fileName) 00029 { 00030 int i; 00031 00032 ls = ls_createFromFile (fileName); 00033 ls_bufferSet (ls,1); 00034 for (i = 0; i < NUM_PSL_HEADER_LINES; i++) { 00035 ls_nextLine (ls); 00036 } 00037 } 00038 00039 00040 00045 void blatParser_initFromPipe (char *command) 00046 { 00047 int i; 00048 00049 ls = ls_createFromPipe (command); 00050 ls_bufferSet (ls,1); 00051 for (i = 0; i < NUM_PSL_HEADER_LINES; i++) { 00052 ls_nextLine (ls); 00053 } 00054 } 00055 00056 00057 00061 void blatParser_deInit (void) 00062 { 00063 ls_destroy (ls); 00064 } 00065 00066 00067 00068 static void blatParser_freeQuery (BlatQuery *currBlatQuery) 00069 { 00070 int i; 00071 PslEntry *currPslEntry; 00072 00073 if (currBlatQuery == NULL) { 00074 return; 00075 } 00076 hlr_free (currBlatQuery->qName); 00077 for (i = 0; i < arrayMax (currBlatQuery->entries); i++) { 00078 currPslEntry = arrp (currBlatQuery->entries,i,PslEntry); 00079 hlr_free (currPslEntry->tName); 00080 arrayDestroy (currPslEntry->blockSizes); 00081 arrayDestroy (currPslEntry->tStarts); 00082 arrayDestroy (currPslEntry->qStarts); 00083 } 00084 arrayDestroy (currBlatQuery->entries); 00085 freeMem (currBlatQuery); 00086 } 00087 00088 00089 00090 static void processCommaSeparatedList (Array results, char *str) 00091 { 00092 WordIter w; 00093 char *tok; 00094 00095 w = wordIterCreate (str,",",0); 00096 while (tok = wordNext (w)) { 00097 if (tok[0] == '\0') { 00098 continue; 00099 } 00100 array (results,arrayMax (results),int) = atoi (tok); 00101 } 00102 wordIterDestroy (w); 00103 } 00104 00105 00106 00111 BlatQuery* blatParser_nextQuery (void) 00112 { 00113 WordIter w; 00114 char *line; 00115 static char *queryName = NULL; 00116 static char *prevBlatQueryName = NULL; 00117 static BlatQuery *currBlatQuery = NULL; 00118 PslEntry *currPslEntry; 00119 int matches,misMatches,repMatches,nCount,qNumInsert,qBaseInsert,tNumInsert,tBaseInsert; 00120 char strand; 00121 int first; 00122 00123 if (!ls_isEof (ls)) { 00124 blatParser_freeQuery (currBlatQuery); 00125 currBlatQuery = NULL; 00126 AllocVar (currBlatQuery); 00127 currBlatQuery->entries = arrayCreate (5,PslEntry); 00128 first = 1; 00129 while (line = ls_nextLine (ls)) { 00130 if (line[0] == '\0') { 00131 continue; 00132 } 00133 w = wordIterCreate (line,"\t",0); 00134 matches = atoi (wordNext (w)); 00135 misMatches = atoi (wordNext (w)); 00136 repMatches = atoi (wordNext (w)); 00137 nCount = atoi (wordNext (w)); 00138 qNumInsert = atoi (wordNext (w)); 00139 qBaseInsert = atoi (wordNext (w)); 00140 tNumInsert = atoi (wordNext (w)); 00141 tBaseInsert = atoi (wordNext (w)); 00142 strand = (wordNext (w))[0]; 00143 strReplace (&queryName,wordNext (w)); 00144 if (first == 1 || strEqual (prevBlatQueryName,queryName)) { 00145 currPslEntry = arrayp (currBlatQuery->entries,arrayMax (currBlatQuery->entries),PslEntry); 00146 currPslEntry->matches = matches; 00147 currPslEntry->misMatches = misMatches; 00148 currPslEntry->repMatches = repMatches; 00149 currPslEntry->nCount = nCount; 00150 currPslEntry->qNumInsert = qNumInsert; 00151 currPslEntry->qBaseInsert = qBaseInsert; 00152 currPslEntry->tNumInsert = tNumInsert; 00153 currPslEntry->tBaseInsert = tBaseInsert; 00154 currPslEntry->strand = strand; 00155 currPslEntry->qSize = atoi (wordNext (w)); 00156 currPslEntry->qStart = atoi (wordNext (w)); 00157 currPslEntry->qEnd = atoi (wordNext (w)); 00158 currPslEntry->tName = hlr_strdup (wordNext (w)); 00159 currPslEntry->tSize = atoi (wordNext (w)); 00160 currPslEntry->tStart = atoi (wordNext (w)); 00161 currPslEntry->tEnd = atoi (wordNext (w)); 00162 currPslEntry->blockCount = atoi (wordNext (w)); 00163 currPslEntry->blockSizes = arrayCreate (5,int); 00164 processCommaSeparatedList (currPslEntry->blockSizes,wordNext (w)); 00165 currPslEntry->qStarts = arrayCreate (5,int); 00166 processCommaSeparatedList (currPslEntry->qStarts,wordNext (w)); 00167 currPslEntry->tStarts = arrayCreate (5,int); 00168 processCommaSeparatedList (currPslEntry->tStarts,wordNext (w)); 00169 } 00170 else { 00171 ls_back (ls,1); 00172 return currBlatQuery; 00173 } 00174 if (first == 1) { 00175 currBlatQuery->qName = hlr_strdup (queryName); 00176 first = 0; 00177 } 00178 strReplace(&prevBlatQueryName,queryName); 00179 wordIterDestroy (w); 00180 } 00181 if (first == 1) { 00182 return NULL; 00183 } 00184 else { 00185 return currBlatQuery; 00186 } 00187 } 00188 blatParser_freeQuery (currBlatQuery); 00189 currBlatQuery = NULL; 00190 return NULL; 00191 }