In article <7de96a$5qg$1 at nnrp1.dejanews.com>, <gmei at my-dejanews.com> wrote:
>Hi:
>>I am looking for "gb2fasta" program running on Digital Unix platform. I have
>downloaded a linux version from ftp://ulrec3.unil.ch but we have bought a new
>Digital Alpha 4100 recently and I need to install that program. Or is there
>any other similar program out there that I can use (on Digital Unix) to
>convert Genbank files (downloaded from NCBI) to fasta format?
>>If you reply, please also send a copy to zlmei at hotmail.com
I use the following perl script. I hope you find it useful.
Regards,
Tim.
#!/usr/local/bin/perl -w
use strict;
local $/ = "\n//\n";
while (<>)
{
my ($id, $def, $acc, $sv, $title, $sequence);
my (@lines) = split(/\n/, $_);
chomp(@lines);
foreach my $line (@lines)
{
last if ($id && $acc && $def && $sv);
if ($line =~ /^LOCUS\s+(\S+)/)
{
$id = $1;
}
elsif ($line =~ /^DEFINITION\s+(.*)$/)
{
$def = $1;
}
elsif ($line =~ /^ACCESSION\s+(\S.*)$/)
{
$acc = $1;
}
elsif ($line =~ /^VERSION\s+(\S+)/)
{
$sv = $1;
}
}
next unless (($sequence) = m:.*\nORIGIN[^\n]+\s+(.*)//:s);
$sequence =~ tr/0-9//d;
$title = ">$id $acc ";
if ($sv) {
$title .= "($sv) ";
}
$title .= $def;
print &Fasta($title, 60, $sequence);
}
sub Fasta($$@)
{
my $title = shift;
my $length = shift;
# If the length item is omitted, assume 60
unless ($length =~ /^\d+$/)
{
unshift(@_, $length);
$length = 60;
}
# If the caller forgot to put a > on the title, put one there.
unless ($title =~ /^>/)
{
$title = ">$title";
}
# Remove any newlines from the title
$title =~ tr/\n//d;
# Concatenate the input sequences
my $seq = join('', @_);
# Remove any whitespace
$seq =~ s/\s+//gs;
my ($i, $l, @result);
$l = length($seq);
push (@result, "$title\n");
# If the length asked for is zero, then output the sequence on one
# line.
if ($length > 0)
{
for ($i = 0; $i<$l; $i += $length)
{
push (@result, substr($seq, $i, $length), "\n");
}
}
else
{
push (@result, "$seq\n");
}
return join('', @result);
}