take a look at readseq if you like -- ftp.bio.indiana.edu:/molbio/readseq
for the source.
Here is a snippet of it's format identification routine:
while ( !done ) {
ReadOneLine(sp);
/* check for mailer head & skip past if found */
if (nlines < 4 && !done) {
if ((strstr(sp,"From ") == sp) || (strstr(sp,"Received:") == sp)) {
do {
/* skip all lines until find one blank line */
ReadOneLine(sp);
if (!done) for (k=0; (k<splen) && (sp[k]==' '); k++) ;
} while ((!done) && (k < splen));
*skiplines = nlines; /* !? do we want #lines or #bytes ?? */
}
}
if (sp==NULL || *sp==0)
; /* nada */
/* high probability identities: */
else if ( strstr(sp,"MSF:") && strstr(sp,"Type:") && strstr(sp,"Check:") )
gotMSF= true;
else if ((strstr(sp,"..") != NULL) && (strstr(sp,"Check:") != NULL))
gotuw= true;
else if (strstr(sp,"identity: Data:") != NULL)
gotolsen= true;
// and so on for more format checks...
--
-- d.gilbert--biocomputing--indiana u--bloomington--gilbertd at bio.indiana.edu