00001 #include "format.h"
00002 #include "log.h"
00003 #include "linestream.h"
00004 #include "common.h"
00005 #include "elandMultiParser.h"
00006
00007
00008
00016 static LineStream ls = NULL;
00017
00018
00019
00024 void elandMultiParser_init (char *fileName)
00025 {
00026 ls = ls_createFromFile (fileName);
00027 }
00028
00029
00030
00034 void elandMultiParser_deInit (void)
00035 {
00036 ls_destroy (ls);
00037 }
00038
00039
00040
00041 static void elandMultiParser_freeQuery (ElandMultiQuery *currElandMultiQuery)
00042 {
00043 int i;
00044 ElandMultiEntry *currElandMultiEntry;
00045
00046 if (currElandMultiQuery == NULL) {
00047 return;
00048 }
00049 hlr_free (currElandMultiQuery->sequenceName);
00050 hlr_free (currElandMultiQuery->sequence);
00051 if (currElandMultiQuery->entries != NULL) {
00052 for (i = 0; i < arrayMax (currElandMultiQuery->entries); i++) {
00053 currElandMultiEntry = arrp (currElandMultiQuery->entries,i,ElandMultiEntry);
00054 hlr_free (currElandMultiEntry->chromosome);
00055 }
00056 arrayDestroy (currElandMultiQuery->entries);
00057 }
00058 freeMem (currElandMultiQuery);
00059 }
00060
00061
00062
00073 ElandMultiQuery* elandMultiParser_nextQuery (void)
00074 {
00075 WordIter w1,w2;
00076 char *line,*token,*firstColon,*lastColon,*pos1,*pos2;
00077 static char* chromosome = NULL;
00078 int lengthToken;
00079 static ElandMultiQuery *currElandMultiQuery = NULL;
00080 ElandMultiEntry *currElandMultiEntry;
00081
00082 while (line = ls_nextLine (ls)) {
00083 if (line[0] == '\0') {
00084 continue;
00085 }
00086 elandMultiParser_freeQuery (currElandMultiQuery);
00087 currElandMultiQuery = NULL;
00088 AllocVar (currElandMultiQuery);
00089 w1 = wordIterCreate (line,"\t",0);
00090 currElandMultiQuery->sequenceName = hlr_strdup (wordNext (w1) + 1);
00091 currElandMultiQuery->sequence = hlr_strdup (wordNext (w1));
00092 token = wordNext (w1);
00093 if (strEqual (token,"NM") || strEqual (token,"QC") || strEqual (token,"RM")) {
00094 wordIterDestroy (w1);
00095 return currElandMultiQuery;
00096 }
00097 firstColon = strchr (token,':');
00098 lastColon = strrchr (token,':');
00099 if (firstColon == NULL || lastColon == NULL) {
00100 die ("Expected the following format: x:y:z");
00101 }
00102 *firstColon = '\0';
00103 *lastColon = '\0';
00104 currElandMultiQuery->exactMatches = atoi (token);
00105 currElandMultiQuery->oneErrorMatches = atoi (firstColon + 1);
00106 currElandMultiQuery->twoErrorMatches = atoi (lastColon + 1);
00107 token = wordNext (w1);
00108 if (token == NULL) {
00109 wordIterDestroy (w1);
00110 return currElandMultiQuery;
00111 }
00112 w2 = wordIterCreate (token,",",0);
00113 currElandMultiQuery->entries = arrayCreate (5,ElandMultiEntry);
00114 while (token = wordNext (w2)) {
00115 currElandMultiEntry = arrayp (currElandMultiQuery->entries,arrayMax (currElandMultiQuery->entries),ElandMultiEntry);
00116 lengthToken = strlen (token);
00117 if (token[lengthToken - 2] == 'F') {
00118 currElandMultiEntry->strand = '+';
00119 }
00120 else if (token[lengthToken - 2] == 'R') {
00121 currElandMultiEntry->strand = '-';
00122 }
00123 else {
00124 die ("Unexpected strand: %s",token);
00125 }
00126 currElandMultiEntry->numErrors = atoi (token + lengthToken - 1);
00127 token[lengthToken - 2] = '\0';
00128 if (pos1 = strchr (token,':')) {
00129 pos2 = strchr (token,'.');
00130 *pos2 = '\0';
00131 strReplace (&chromosome,token);
00132 token = pos1 + 1;
00133 }
00134 currElandMultiEntry->position = atoi (token);
00135 currElandMultiEntry->chromosome = hlr_strdup (chromosome);
00136 }
00137 wordIterDestroy (w2);
00138 wordIterDestroy (w1);
00139 return currElandMultiQuery;
00140 }
00141 elandMultiParser_freeQuery (currElandMultiQuery);
00142 currElandMultiQuery = NULL;
00143 return NULL;
00144 }