00001 #include "format.h"
00002 #include "log.h"
00003 #include "linestream.h"
00004 #include "common.h"
00005 #include "blatParser.h"
00006
00007
00008
00016 #define NUM_PSL_HEADER_LINES 5
00017
00018
00019
00020 static LineStream ls = NULL;
00021
00022
00023
00028 void blatParser_initFromFile (char *fileName)
00029 {
00030 int i;
00031
00032 ls = ls_createFromFile (fileName);
00033 ls_bufferSet (ls,1);
00034 for (i = 0; i < NUM_PSL_HEADER_LINES; i++) {
00035 ls_nextLine (ls);
00036 }
00037 }
00038
00039
00040
00045 void blatParser_initFromPipe (char *command)
00046 {
00047 int i;
00048
00049 ls = ls_createFromPipe (command);
00050 ls_bufferSet (ls,1);
00051 for (i = 0; i < NUM_PSL_HEADER_LINES; i++) {
00052 ls_nextLine (ls);
00053 }
00054 }
00055
00056
00057
00061 void blatParser_deInit (void)
00062 {
00063 ls_destroy (ls);
00064 }
00065
00066
00067
00068 static void blatParser_freeQuery (BlatQuery *currBlatQuery)
00069 {
00070 int i;
00071 PslEntry *currPslEntry;
00072
00073 if (currBlatQuery == NULL) {
00074 return;
00075 }
00076 hlr_free (currBlatQuery->qName);
00077 for (i = 0; i < arrayMax (currBlatQuery->entries); i++) {
00078 currPslEntry = arrp (currBlatQuery->entries,i,PslEntry);
00079 hlr_free (currPslEntry->tName);
00080 arrayDestroy (currPslEntry->blockSizes);
00081 arrayDestroy (currPslEntry->tStarts);
00082 arrayDestroy (currPslEntry->qStarts);
00083 }
00084 arrayDestroy (currBlatQuery->entries);
00085 freeMem (currBlatQuery);
00086 }
00087
00088
00089
00090 static void processCommaSeparatedList (Array results, char *str)
00091 {
00092 WordIter w;
00093 char *tok;
00094
00095 w = wordIterCreate (str,",",0);
00096 while (tok = wordNext (w)) {
00097 if (tok[0] == '\0') {
00098 continue;
00099 }
00100 array (results,arrayMax (results),int) = atoi (tok);
00101 }
00102 wordIterDestroy (w);
00103 }
00104
00105
00106
00111 BlatQuery* blatParser_nextQuery (void)
00112 {
00113 WordIter w;
00114 char *line;
00115 static char *queryName = NULL;
00116 static char *prevBlatQueryName = NULL;
00117 static BlatQuery *currBlatQuery = NULL;
00118 PslEntry *currPslEntry;
00119 int matches,misMatches,repMatches,nCount,qNumInsert,qBaseInsert,tNumInsert,tBaseInsert;
00120 char strand;
00121 int first;
00122
00123 if (!ls_isEof (ls)) {
00124 blatParser_freeQuery (currBlatQuery);
00125 currBlatQuery = NULL;
00126 AllocVar (currBlatQuery);
00127 currBlatQuery->entries = arrayCreate (5,PslEntry);
00128 first = 1;
00129 while (line = ls_nextLine (ls)) {
00130 if (line[0] == '\0') {
00131 continue;
00132 }
00133 w = wordIterCreate (line,"\t",0);
00134 matches = atoi (wordNext (w));
00135 misMatches = atoi (wordNext (w));
00136 repMatches = atoi (wordNext (w));
00137 nCount = atoi (wordNext (w));
00138 qNumInsert = atoi (wordNext (w));
00139 qBaseInsert = atoi (wordNext (w));
00140 tNumInsert = atoi (wordNext (w));
00141 tBaseInsert = atoi (wordNext (w));
00142 strand = (wordNext (w))[0];
00143 strReplace (&queryName,wordNext (w));
00144 if (first == 1 || strEqual (prevBlatQueryName,queryName)) {
00145 currPslEntry = arrayp (currBlatQuery->entries,arrayMax (currBlatQuery->entries),PslEntry);
00146 currPslEntry->matches = matches;
00147 currPslEntry->misMatches = misMatches;
00148 currPslEntry->repMatches = repMatches;
00149 currPslEntry->nCount = nCount;
00150 currPslEntry->qNumInsert = qNumInsert;
00151 currPslEntry->qBaseInsert = qBaseInsert;
00152 currPslEntry->tNumInsert = tNumInsert;
00153 currPslEntry->tBaseInsert = tBaseInsert;
00154 currPslEntry->strand = strand;
00155 currPslEntry->qSize = atoi (wordNext (w));
00156 currPslEntry->qStart = atoi (wordNext (w));
00157 currPslEntry->qEnd = atoi (wordNext (w));
00158 currPslEntry->tName = hlr_strdup (wordNext (w));
00159 currPslEntry->tSize = atoi (wordNext (w));
00160 currPslEntry->tStart = atoi (wordNext (w));
00161 currPslEntry->tEnd = atoi (wordNext (w));
00162 currPslEntry->blockCount = atoi (wordNext (w));
00163 currPslEntry->blockSizes = arrayCreate (5,int);
00164 processCommaSeparatedList (currPslEntry->blockSizes,wordNext (w));
00165 currPslEntry->qStarts = arrayCreate (5,int);
00166 processCommaSeparatedList (currPslEntry->qStarts,wordNext (w));
00167 currPslEntry->tStarts = arrayCreate (5,int);
00168 processCommaSeparatedList (currPslEntry->tStarts,wordNext (w));
00169 }
00170 else {
00171 ls_back (ls,1);
00172 return currBlatQuery;
00173 }
00174 if (first == 1) {
00175 currBlatQuery->qName = hlr_strdup (queryName);
00176 first = 0;
00177 }
00178 strReplace(&prevBlatQueryName,queryName);
00179 wordIterDestroy (w);
00180 }
00181 if (first == 1) {
00182 return NULL;
00183 }
00184 else {
00185 return currBlatQuery;
00186 }
00187 }
00188 blatParser_freeQuery (currBlatQuery);
00189 currBlatQuery = NULL;
00190 return NULL;
00191 }