00001 #include "format.h"
00002 #include "log.h"
00003 #include "linestream.h"
00004 #include "common.h"
00005 #include "bowtieParser.h"
00006
00007
00008
00016 static LineStream ls = NULL;
00017
00018
00019
00024 void bowtieParser_initFromFile (char *fileName)
00025 {
00026 ls = ls_createFromFile (fileName);
00027 ls_bufferSet (ls,1);
00028 }
00029
00030
00031
00036 void bowtieParser_initFromPipe (char *command)
00037 {
00038 ls = ls_createFromPipe (command);
00039 ls_bufferSet (ls,1);
00040 }
00041
00042
00043
00047 void bowtieParser_deInit (void)
00048 {
00049 ls_destroy (ls);
00050 }
00051
00052
00053
00054 static void bowtieParser_freeQuery (BowtieQuery *currBowtieQuery)
00055 {
00056 int i;
00057 BowtieEntry *currBowtieEntry;
00058
00059 if (currBowtieQuery == NULL) {
00060 return;
00061 }
00062 hlr_free (currBowtieQuery->sequenceName);
00063 if (currBowtieQuery->entries != NULL) {
00064 for (i = 0; i < arrayMax (currBowtieQuery->entries); i++) {
00065 currBowtieEntry = arrp (currBowtieQuery->entries,i,BowtieEntry);
00066 hlr_free (currBowtieEntry->chromosome);
00067 hlr_free (currBowtieEntry->sequence);
00068 hlr_free (currBowtieEntry->quality);
00069 arrayDestroy (currBowtieEntry->mismatches);
00070 }
00071 arrayDestroy (currBowtieQuery->entries);
00072 }
00073 freeMem (currBowtieQuery);
00074 }
00075
00076
00077
00078 static void bowtieParser_processMismatches (BowtieEntry *currEntry, char* token)
00079 {
00080 WordIter w;
00081 BowtieMismatch *currBowtieMismatch;
00082 char *item,*pos;
00083
00084 currEntry->mismatches = arrayCreate (3,BowtieMismatch);
00085 if (token[0] == '\0') {
00086 return;
00087 }
00088 w = wordIterCreate (token,",",0);
00089 while (item = wordNext (w)) {
00090 currBowtieMismatch = arrayp (currEntry->mismatches,arrayMax (currEntry->mismatches),BowtieMismatch);
00091 pos = strchr (item,':');
00092 *pos = '\0';
00093 currBowtieMismatch->offset = atoi (item);
00094 currBowtieMismatch->referenceBase = *(pos + 1);
00095 currBowtieMismatch->readBase = *(pos + 3);
00096 }
00097 wordIterDestroy (w);
00098 }
00099
00100
00101
00102 static void bowtieParser_processLine (char* line, BowtieQuery* currBowtieQuery)
00103 {
00104 WordIter w;
00105 BowtieEntry *currEntry;
00106
00107 currEntry = arrayp (currBowtieQuery->entries,arrayMax (currBowtieQuery->entries),BowtieEntry);
00108 w = wordIterCreate (line,"\t",0);
00109 currEntry->strand = (wordNext (w))[0];
00110 currEntry->chromosome = hlr_strdup (wordNext (w));
00111 currEntry->position = atoi (wordNext (w));
00112 currEntry->sequence = hlr_strdup (wordNext (w));
00113 currEntry->quality = hlr_strdup (wordNext (w));
00114 wordNext (w);
00115 bowtieParser_processMismatches (currEntry,wordNext (w));
00116 wordIterDestroy (w);
00117 }
00118
00119
00120
00121 static BowtieQuery* bowtieParser_processNextQuery (int freeMemory)
00122 {
00123 char *line,*pos;
00124 static char *queryName = NULL;
00125 static char *prevBowtieQueryName = NULL;
00126 static BowtieQuery *currBowtieQuery = NULL;
00127 int first;
00128
00129 if (!ls_isEof (ls)) {
00130 if (freeMemory) {
00131 bowtieParser_freeQuery (currBowtieQuery);
00132 currBowtieQuery = NULL;
00133 }
00134 AllocVar (currBowtieQuery);
00135 currBowtieQuery->entries = arrayCreate (5,BowtieEntry);
00136 first = 1;
00137 while (line = ls_nextLine (ls)) {
00138 if (line[0] == '\0') {
00139 continue;
00140 }
00141 pos = strchr (line,'\t');
00142 *pos = '\0';
00143 strReplace (&queryName,line);
00144 if (first == 1 || strEqual (prevBowtieQueryName,queryName)) {
00145 bowtieParser_processLine (pos + 1,currBowtieQuery);
00146 }
00147 else {
00148 ls_back (ls,1);
00149 return currBowtieQuery;
00150 }
00151 if (first == 1) {
00152 currBowtieQuery->sequenceName = hlr_strdup (queryName);
00153 first = 0;
00154 }
00155 strReplace(&prevBowtieQueryName,queryName);
00156 }
00157 if (first == 1) {
00158 return NULL;
00159 }
00160 else {
00161 return currBowtieQuery;
00162 }
00163 }
00164 if (freeMemory) {
00165 bowtieParser_freeQuery (currBowtieQuery);
00166 currBowtieQuery = NULL;
00167 }
00168 return NULL;
00169 }
00170
00171
00172
00225 BowtieQuery* bowtieParser_nextQuery (void)
00226 {
00227 return bowtieParser_processNextQuery (1);
00228 }
00229
00230
00231
00236 Array bowtieParser_getAllQueries ()
00237 {
00238 Array bowtieQueries;
00239 BowtieQuery *currBowtieQuery;
00240
00241 bowtieQueries = arrayCreate (100000,BowtieQuery);
00242 while (currBowtieQuery = bowtieParser_processNextQuery (0)) {
00243 array (bowtieQueries,arrayMax (bowtieQueries),BowtieQuery) = *currBowtieQuery;
00244 freeMem (currBowtieQuery);
00245 }
00246 return bowtieQueries;
00247 }