00001 00007 #include "log.h" 00008 #include "format.h" 00009 #include "linestream.h" 00010 #include "stringUtil.h" 00011 #include "common.h" 00012 #include "fasta.h" 00013 00014 00015 00016 #define NUM_CHARACTRS_PER_LINE 60 00017 00018 00019 static LineStream lsFasta = NULL; 00020 00021 00022 00028 void fasta_initFromFile (char* fileName) 00029 { 00030 lsFasta = ls_createFromFile (fileName); 00031 ls_bufferSet (lsFasta,1); 00032 } 00033 00034 00035 00039 void fasta_deInit (void) 00040 { 00041 ls_destroy (lsFasta); 00042 } 00043 00044 00045 00050 void fasta_initFromPipe (char* command) 00051 { 00052 lsFasta = ls_createFromPipe (command); 00053 ls_bufferSet (lsFasta,1); 00054 } 00055 00056 00057 00058 static void fasta_freeSeq (Seq* currSeq) 00059 { 00060 if (currSeq == NULL) { 00061 return; 00062 } 00063 hlr_free (currSeq->name); 00064 hlr_free (currSeq->sequence); 00065 freeMem (currSeq); 00066 currSeq = NULL; 00067 } 00068 00069 00070 00071 static Seq* fasta_processNextSequence (int freeMemory, int truncateName) 00072 { 00073 char *line; 00074 static Stringa buffer = NULL; 00075 static Seq* currSeq = NULL; 00076 int count; 00077 00078 if (ls_isEof (lsFasta)) { 00079 if (freeMemory) { 00080 fasta_freeSeq (currSeq); 00081 } 00082 return NULL; 00083 } 00084 count = 0; 00085 stringCreateClear (buffer,1000); 00086 while (line = ls_nextLine (lsFasta)) { 00087 if (line[0] == '\0') { 00088 continue; 00089 } 00090 if (line[0] == '>') { 00091 count++; 00092 if (count == 1) { 00093 if (freeMemory) { 00094 fasta_freeSeq (currSeq); 00095 } 00096 AllocVar (currSeq); 00097 currSeq->name = hlr_strdup (line + 1); 00098 if (truncateName) { 00099 currSeq->name = firstWordInLine (skipLeadingSpaces (currSeq->name)); 00100 } 00101 continue; 00102 } 00103 else if (count == 2) { 00104 currSeq->sequence = hlr_strdup (string (buffer)); 00105 currSeq->size = stringLen (buffer); 00106 ls_back (lsFasta,1); 00107 return currSeq; 00108 } 00109 } 00110 stringCat (buffer,line); 00111 } 00112 currSeq->sequence = hlr_strdup (string (buffer)); 00113 currSeq->size = stringLen (buffer); 00114 return currSeq; 00115 } 00116 00117 00118 00124 Seq* fasta_nextSequence (int truncateName) 00125 { 00126 return fasta_processNextSequence (1,truncateName); 00127 } 00128 00129 00130 00136 Array fasta_readAllSequences (int truncateName) 00137 { 00138 Array seqs; 00139 Seq *currSeq; 00140 00141 seqs = arrayCreate (100000,Seq); 00142 while (currSeq = fasta_processNextSequence (0,truncateName)) { 00143 array (seqs,arrayMax (seqs),Seq) = *currSeq; 00144 freeMem (currSeq); 00145 } 00146 return seqs; 00147 } 00148 00149 00150 00154 void fasta_printOneSequence (Seq* currSeq) 00155 { 00156 char *seq; 00157 00158 seq = insertWordEveryNthPosition (currSeq->sequence,"\n",NUM_CHARACTRS_PER_LINE); 00159 printf(">%s\n%s\n",currSeq->name,seq); 00160 } 00161 00162 00163 00167 void fasta_printSequences (Array seqs) 00168 { 00169 int i; 00170 Seq *currSeq; 00171 00172 for (i = 0; i < arrayMax (seqs); i++) { 00173 currSeq = arrp (seqs,i,Seq); 00174 fasta_printOneSequence (currSeq); 00175 } 00176 }