Btab for blast2
Nathan O. Siemers
nathan at dna.bms.com
Thu Sep 4 20:37:35 EST 1997
ynakamu at kazusa.or.jp (Yasukazu Nakamura) writes:
> Hi,
>
> Have anyone written Btab (like) program for BLAST2 output?
>
> Thanks in advance.
>
> Yaskaz
>
Just another Perl Hacker. Baby Perl, at that.
nathan
| Nathan Siemers - Department of Applied Genomics - Bristol-Myers |
| Squibb Pharmaceutical Research Institute - K23-05, PO Box 4000, |
| Princeton, NJ 08543-4000 - (609) 252-6568 - siemers at bms.com |
blast22table.pl:
#!/usr/local/bin/perl5
# blast22table.pl
# make tab delimited table of blast2 results
# Nathan Siemers
# just to be nice:
$intorecord = 0;
$count = 0;
$defcount = 0;
$line = 0;
while (<>) {
# lines we wish were not there...
if (/^SCORE_MISMATCH/) {
next;
}
@words = split(" ");
$line++;
if ($line == 1) {
$search = $words[0];
}
if (/^Query=/){
$query = $words[1];
}
if (/^ +\(.* letters\)/) {
$words[0] =~ s/\(//;
$querylength = $words[0];
$querylength =~ s/\,//g;
}
if (/^Database\:/) {
$db = $words[1];
}
if (/^ *Length =/) {
if ($search eq "TBLASTN" || $search eq "BLASTX" || $search eq "BLASTN") {
$lengthline = $intorecord;
$scoreline = $lengthline + 4;
$idline = $scoreline + 1;
$qalignline = $idline + 2;
$salignline = $qalignline + 2;
}
if ($search eq "BLASTP") {
$lengthline = $intorecord;
$scoreline = $lengthline + 2;
$idline = $scoreline + 1;
$qalignline = $idline + 2;
$salignline = $qalignline + 2;
}
$hitgenelength[$count] = $words[2];
$hitgenelength[$count] =~ s/\,//g;
}
if ( $intorecord == $scoreline) {
$score[$count] = $words[2];
$expect[$count] = $words[7];
$expect[$count] =~ s/\,//;
}
if ($intorecord == $idline) {
$t = $words[2];
$t =~ s/.*\///g;
$hitlength[$count] = $t;
$t = $words[3];
$t =~ s/[\(\)\%\,]//g;
$id[$count] = $t;
$frame[$count] = $words[10];
}
if ($intorecord == $qalignline) {
$qstart[$count] = $words[1];
}
if ($intorecord == $salignline) {
$sstart[$count] = $words[1];
}
#scan for first line of annotation
if (/^\>/) {
$intorecord = 1;
$count++;
$accession[$count] = $words[0];
$accession[$count] =~ s/^>//;
shift @words;
$description[$count] = join " ", @words;
}
$intorecord++ if ($intorecord);
}
# print the suckers out
#ignore identical entries!
for ($i = 1; $i <= $count; $i++) {
if ($query ne $accession[$i]) {
print "$query\t$querylength\t$search\t$db\t$accession[$i]\t",
"$description[$i]\t$hitgenelength[$i]",
"\t$frame[$i]\t$score[$i]\t$expect[$i]\t$id[$i]\t$hitlength[$i]\t$qstart[$i]\t$sstart[$i]\n";
}
}
More information about the Bio-soft
mailing list
Send comments to us at biosci-help [At] net.bio.net