00001
00007 #include "log.h"
00008 #include "format.h"
00009 #include "linestream.h"
00010 #include "stringUtil.h"
00011 #include "common.h"
00012 #include "fasta.h"
00013
00014
00015
00016 #define NUM_CHARACTRS_PER_LINE 60
00017
00018
00019 static LineStream lsFasta = NULL;
00020
00021
00022
00028 void fasta_initFromFile (char* fileName)
00029 {
00030 lsFasta = ls_createFromFile (fileName);
00031 ls_bufferSet (lsFasta,1);
00032 }
00033
00034
00035
00039 void fasta_deInit (void)
00040 {
00041 ls_destroy (lsFasta);
00042 }
00043
00044
00045
00050 void fasta_initFromPipe (char* command)
00051 {
00052 lsFasta = ls_createFromPipe (command);
00053 ls_bufferSet (lsFasta,1);
00054 }
00055
00056
00057
00058 static void fasta_freeSeq (Seq* currSeq)
00059 {
00060 if (currSeq == NULL) {
00061 return;
00062 }
00063 hlr_free (currSeq->name);
00064 hlr_free (currSeq->sequence);
00065 freeMem (currSeq);
00066 currSeq = NULL;
00067 }
00068
00069
00070
00071 static Seq* fasta_processNextSequence (int freeMemory, int truncateName)
00072 {
00073 char *line;
00074 static Stringa buffer = NULL;
00075 static Seq* currSeq = NULL;
00076 int count;
00077
00078 if (ls_isEof (lsFasta)) {
00079 if (freeMemory) {
00080 fasta_freeSeq (currSeq);
00081 }
00082 return NULL;
00083 }
00084 count = 0;
00085 stringCreateClear (buffer,1000);
00086 while (line = ls_nextLine (lsFasta)) {
00087 if (line[0] == '\0') {
00088 continue;
00089 }
00090 if (line[0] == '>') {
00091 count++;
00092 if (count == 1) {
00093 if (freeMemory) {
00094 fasta_freeSeq (currSeq);
00095 }
00096 AllocVar (currSeq);
00097 currSeq->name = hlr_strdup (line + 1);
00098 if (truncateName) {
00099 currSeq->name = firstWordInLine (skipLeadingSpaces (currSeq->name));
00100 }
00101 continue;
00102 }
00103 else if (count == 2) {
00104 currSeq->sequence = hlr_strdup (string (buffer));
00105 currSeq->size = stringLen (buffer);
00106 ls_back (lsFasta,1);
00107 return currSeq;
00108 }
00109 }
00110 stringCat (buffer,line);
00111 }
00112 currSeq->sequence = hlr_strdup (string (buffer));
00113 currSeq->size = stringLen (buffer);
00114 return currSeq;
00115 }
00116
00117
00118
00124 Seq* fasta_nextSequence (int truncateName)
00125 {
00126 return fasta_processNextSequence (1,truncateName);
00127 }
00128
00129
00130
00136 Array fasta_readAllSequences (int truncateName)
00137 {
00138 Array seqs;
00139 Seq *currSeq;
00140
00141 seqs = arrayCreate (100000,Seq);
00142 while (currSeq = fasta_processNextSequence (0,truncateName)) {
00143 array (seqs,arrayMax (seqs),Seq) = *currSeq;
00144 freeMem (currSeq);
00145 }
00146 return seqs;
00147 }
00148
00149
00150
00154 void fasta_printOneSequence (Seq* currSeq)
00155 {
00156 char *seq;
00157
00158 seq = insertWordEveryNthPosition (currSeq->sequence,"\n",NUM_CHARACTRS_PER_LINE);
00159 printf(">%s\n%s\n",currSeq->name,seq);
00160 }
00161
00162
00163
00167 void fasta_printSequences (Array seqs)
00168 {
00169 int i;
00170 Seq *currSeq;
00171
00172 for (i = 0; i < arrayMax (seqs); i++) {
00173 currSeq = arrp (seqs,i,Seq);
00174 fasta_printOneSequence (currSeq);
00175 }
00176 }