in article 50f9e4f.0106130946.29302695 at posting.google.com, Bin Li at
bli at ualberta.ca wrote on 14/6/01 9:58 AM:
> I am new in computational biology. I am writing a simple program to translate
> a
> DNA sequence to encoded amino acid sequence. I guess there is probably a good
> hash
> function out there that hashes the 64 codons, which I assume is the fastest
> way
> to do translation. Anybody knows about it?
I use a combination of look-up tables and hashing:
//: This table maps nucleotide characters into state codes (0-4)
// Valid nucleotides are AaCcGgTtUu mapped to 221133000
// Other letters are mapped to 4
const int kNucleotideNumbers[128]={
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 0-15
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 16-31
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 32-47
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 48-63
4, 2, 4, 1, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, // 64-79
4, 4, 4, 4, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 80-95
4, 2, 4, 1, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, // 96-111
4, 4, 4, 4, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 // 112-127
};
enum GeneticCode {
kNonCoding=-1,
kUniversalCode,
kVertebrateMtCode,
kYeastCode,
kMoldProtozoanMtCode,
kMycoplasmaCode,
kInvertebrateMtCode,
kCiliateCode,
kEchinodermMtCode,
kEuplotidNucCode,
kBacterialCode,
kAltYeastCode,
kAscidianMtCode,
kFlatwormMtCode,
kBlepharismaNucCode,
numGeneticCodes
};
//: The genetic code tables.
// Nucleotides go T, C, A, G - the order given by the Genbank web site
// With the first codon position most significant (i.e. TTT, TTC, TTA, TTG,
// TCT, etc.).
const char kCodeTable[numGeneticCodes][65]={
// Universal
"FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
// Vertebrate Mitochondrial
"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
// Yeast
"FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
// Mold Protozoan Mitochondrial
"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
// Mycoplasma
"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
// Invertebrate Mitochondrial
"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
// Ciliate
"FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
// Echinoderm Mitochondrial
"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
// Euplotid Nuclear
"FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
// Bacterial
"FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
// Alternative Yeast
"FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
// Ascidian Mitochondrial
"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
// Flatworm Mitochondrial
"FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
// Blepharisma Nuclear
"FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG"
};
// This function takes the first, second and third nucleotides in the
// codon and returns the amino acid..
char getCodonNumber(char first, char second, char third)
{
int codonNumber = (kNucleotideNumbers[first] << 4) +
(kNucleotideNumbers[second] << 2) +
kNucleotideNumbers[third];
return kCodeTable[geneticCode][codonNumber];
}
--
===================================================================
Andrew Rambaut, EMAIL - andrew.rambaut at zoo.ox.ac.uk
Zoology Department, WWW - http://evolve.zoo.ox.ac.uk/
University of Oxford, TEL - +44 1865 271261
South Parks Road, Oxford, UK FAX - +44 1865 271249
===================================================================