00001 #include "format.h" 00002 #include "log.h" 00003 #include "linestream.h" 00004 #include "common.h" 00005 #include "bowtieParser.h" 00006 00007 00008 00016 static LineStream ls = NULL; 00017 00018 00019 00024 void bowtieParser_initFromFile (char *fileName) 00025 { 00026 ls = ls_createFromFile (fileName); 00027 ls_bufferSet (ls,1); 00028 } 00029 00030 00031 00036 void bowtieParser_initFromPipe (char *command) 00037 { 00038 ls = ls_createFromPipe (command); 00039 ls_bufferSet (ls,1); 00040 } 00041 00042 00043 00047 void bowtieParser_deInit (void) 00048 { 00049 ls_destroy (ls); 00050 } 00051 00052 00053 00054 static void bowtieParser_freeQuery (BowtieQuery *currBowtieQuery) 00055 { 00056 int i; 00057 BowtieEntry *currBowtieEntry; 00058 00059 if (currBowtieQuery == NULL) { 00060 return; 00061 } 00062 hlr_free (currBowtieQuery->sequenceName); 00063 if (currBowtieQuery->entries != NULL) { 00064 for (i = 0; i < arrayMax (currBowtieQuery->entries); i++) { 00065 currBowtieEntry = arrp (currBowtieQuery->entries,i,BowtieEntry); 00066 hlr_free (currBowtieEntry->chromosome); 00067 hlr_free (currBowtieEntry->sequence); 00068 hlr_free (currBowtieEntry->quality); 00069 arrayDestroy (currBowtieEntry->mismatches); 00070 } 00071 arrayDestroy (currBowtieQuery->entries); 00072 } 00073 freeMem (currBowtieQuery); 00074 } 00075 00076 00077 00078 static void bowtieParser_processMismatches (BowtieEntry *currEntry, char* token) 00079 { 00080 WordIter w; 00081 BowtieMismatch *currBowtieMismatch; 00082 char *item,*pos; 00083 00084 currEntry->mismatches = arrayCreate (3,BowtieMismatch); 00085 if (token[0] == '\0') { 00086 return; 00087 } 00088 w = wordIterCreate (token,",",0); 00089 while (item = wordNext (w)) { 00090 currBowtieMismatch = arrayp (currEntry->mismatches,arrayMax (currEntry->mismatches),BowtieMismatch); 00091 pos = strchr (item,':'); 00092 *pos = '\0'; 00093 currBowtieMismatch->offset = atoi (item); 00094 currBowtieMismatch->referenceBase = *(pos + 1); 00095 currBowtieMismatch->readBase = *(pos + 3); 00096 } 00097 wordIterDestroy (w); 00098 } 00099 00100 00101 00102 static void bowtieParser_processLine (char* line, BowtieQuery* currBowtieQuery) 00103 { 00104 WordIter w; 00105 BowtieEntry *currEntry; 00106 00107 currEntry = arrayp (currBowtieQuery->entries,arrayMax (currBowtieQuery->entries),BowtieEntry); 00108 w = wordIterCreate (line,"\t",0); 00109 currEntry->strand = (wordNext (w))[0]; 00110 currEntry->chromosome = hlr_strdup (wordNext (w)); 00111 currEntry->position = atoi (wordNext (w)); 00112 currEntry->sequence = hlr_strdup (wordNext (w)); 00113 currEntry->quality = hlr_strdup (wordNext (w)); 00114 wordNext (w); 00115 bowtieParser_processMismatches (currEntry,wordNext (w)); 00116 wordIterDestroy (w); 00117 } 00118 00119 00120 00121 static BowtieQuery* bowtieParser_processNextQuery (int freeMemory) 00122 { 00123 char *line,*pos; 00124 static char *queryName = NULL; 00125 static char *prevBowtieQueryName = NULL; 00126 static BowtieQuery *currBowtieQuery = NULL; 00127 int first; 00128 00129 if (!ls_isEof (ls)) { 00130 if (freeMemory) { 00131 bowtieParser_freeQuery (currBowtieQuery); 00132 currBowtieQuery = NULL; 00133 } 00134 AllocVar (currBowtieQuery); 00135 currBowtieQuery->entries = arrayCreate (5,BowtieEntry); 00136 first = 1; 00137 while (line = ls_nextLine (ls)) { 00138 if (line[0] == '\0') { 00139 continue; 00140 } 00141 pos = strchr (line,'\t'); 00142 *pos = '\0'; 00143 strReplace (&queryName,line); 00144 if (first == 1 || strEqual (prevBowtieQueryName,queryName)) { 00145 bowtieParser_processLine (pos + 1,currBowtieQuery); 00146 } 00147 else { 00148 ls_back (ls,1); 00149 return currBowtieQuery; 00150 } 00151 if (first == 1) { 00152 currBowtieQuery->sequenceName = hlr_strdup (queryName); 00153 first = 0; 00154 } 00155 strReplace(&prevBowtieQueryName,queryName); 00156 } 00157 if (first == 1) { 00158 return NULL; 00159 } 00160 else { 00161 return currBowtieQuery; 00162 } 00163 } 00164 if (freeMemory) { 00165 bowtieParser_freeQuery (currBowtieQuery); 00166 currBowtieQuery = NULL; 00167 } 00168 return NULL; 00169 } 00170 00171 00172 00225 BowtieQuery* bowtieParser_nextQuery (void) 00226 { 00227 return bowtieParser_processNextQuery (1); 00228 } 00229 00230 00231 00236 Array bowtieParser_getAllQueries () 00237 { 00238 Array bowtieQueries; 00239 BowtieQuery *currBowtieQuery; 00240 00241 bowtieQueries = arrayCreate (100000,BowtieQuery); 00242 while (currBowtieQuery = bowtieParser_processNextQuery (0)) { 00243 array (bowtieQueries,arrayMax (bowtieQueries),BowtieQuery) = *currBowtieQuery; 00244 freeMem (currBowtieQuery); 00245 } 00246 return bowtieQueries; 00247 }