translation source code
francis at NCBI.NLM.NIH.GOV
francis at NCBI.NLM.NIH.GOV
Mon Aug 12 07:57:29 EST 1996
A problem with this code is that it does not take into account the
various genetic codes which exist.
The Standard Code
The Vertebrate Mitochondrial Code
The Yeast Mitochondrial Code
The Mold, Protozoan, and Coelenterate Mitochondrial Code and the
Mycoplasma/Spiroplasma Code
The Invertebrate Mitochondrial Code
The Ciliate, Dasycladacean and Hexamita Nuclear Code
The Echinoderm Mitochondrial Code
The Euplotid Nuclear Code
The Bacterial "Code"
The Alternative Yeast Nuclear Code
The Ascidian Mitochondrial Code
The Flatworm Mitochondrial Code
The Blepharisma Nuclear Code
To see a description of these codes, have a look at:
http://www3.ncbi.nlm.nih.gov/htbin-post/Taxonomy/wprintgc?mode=c
We have something in our toolkit for translations (using the various
genetic codes as an argument), but I will need to look into it a
little, and tweese out what you may want. On the other hand, the code
below may be all you need as far as translations are concerned, if you
don't care or worry about alternative codes.
cheers,
f.
--
| B.F. Francis Ouellette
| GenBank
|
| francis at ncbi.nlm.nih.gov
> From roach at u.washington.edu Mon Aug 12 05:04:21 1996
> To: bio-soft at net.bio.net
> From: roach at u.washington.edu (Jared Roach)
> Subject: Re: translation source code
> Date: 11 Aug 1996 23:20:04 GMT
> NNTP-Posting-Host: saul3.u.washington.edu
> NNTP-Posting-User: roach
>
> Well, here's C translation source code I wrote as an XFCN for HyperCard
> for the Macintosh, so you would be able to dispense with most of it for
> whatever you wanted it for, most likely. I include the whole code just to
> put it into context. Please don't hesitate to ask for clarification.
> Note that my elegance and experience in programming C probably falls
> somewhere in the middle of the lowest percentile, so be warned!
> The XFCN parts of the code are shareware by Mark Hanrek.
>
> // DNA2Prot XFCN by Jared Roach © August 1996
> // This program translates DNA sequences
> // The XFCN shell was downloaded form the Web and is © 1992 Mark Hanrek
>
>
> //***************************************************************************************
> // Hanrek XCMD Shell 1.2
> //
> // ©1992 Mark Hanrek & The Information Workshop. All Rights Reserved.
> //
> // Note: Do all your programming between the bold black lines below.
> // Put additional functions you create into the "Support Functions"
> // section below that. Put function prototypes into ExampleXFCN.h.
> //
>
> /******************************************************************* Includes ********/
>
> #include "SetUpA4.h"
> #include "HyperXCmd.h"
> #include "SuperCard.h"
> #include "StandardFunctions.h"
> #include "string.h"
>
> /******************************************************************* Main Entry ******/
>
> pascal void main( XCmdPtr paramPtr ) // No need to ever change any of this...
> {
> RememberA0();
> SetUpA4();
> InitializeReturnInfo( paramPtr );
> ExternalHandler( paramPtr );
> RestoreA4();
> }
>
>
>
>
> //¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥
> //
> // DNA2ProtXFCN HyperTalk Syntax:
> // put 1 into frame (should be 1,2, or 3)
> // put 1 into flag
> //
> // put cd fld "Sequence" into seq
> //
> // put DNA2ProtXFCN( seq, frame, flag ) into cd fld "output"
> //
> //
>
> #include "DNA2ProtXFCN.h"
>
> void ExternalHandler( XCmdPtr paramPtr )
> {
> short i=0;
> short k=0;
> short q=0;
> short p=0;
> short nFrame;
> short nProtLength;
>
>
> char pBase;
> char pResidue;
> char pSeq[3000];
> char pProtSeq[1000];
>
>
> size_t nSeqLength;
> short nFlagVariable = 0;
>
>
> ParamToCString( 0, pSeq ); // the first parameter is the sequence
> ParamToShort( 1, &nFrame );
> ParamToShort( 2, &nFlagVariable );
>
> nSeqLength=strlen(pSeq);
> nProtLength = (nSeqLength-nFrame+1)/3;
>
>
>
> if ( nSeqLength < 1 )
> return;
>
> k = nFrame-1;
> q = nProtLength*3+k;
>
> for (i=k; i < q; ++i) {
> pBase = pSeq[i];
> if ( pBase == 'A' ) {
> pBase = pSeq[++i];
> if ( pBase == 'A' ) {
> pBase = pSeq[++i];
> if ( pBase == 'A' ) {
> pResidue = 'K';
> }
> else if ( pBase == 'C' ) {
> pResidue = 'N';
> }
> else if ( pBase == 'G' ) {
> pResidue = 'K';
> }
> else if ( pBase == 'T' ) {
> pResidue = 'N';
> }
> else { // case of N or other character 3rd base
> pResidue = 'X';
> }
> }
> else if ( pBase == 'C' ) {
> ++i;
> pResidue = 'T'; //all codons starting with AC are threonine
> }
> else if ( pBase == 'G' ) {
> pBase = pSeq[++i];
> if ( pBase == 'A' ) {
> pResidue = 'R';
> }
> else if ( pBase == 'C' ) {
> pResidue = 'S';
> }
> else if ( pBase == 'G' ) {
> pResidue = 'R';
> }
> else if ( pBase == 'T' ) {
> pResidue = 'S';
> }
> else { // case of N or other character 3rd base
> pResidue = 'X';
> }
> }
> else if ( pBase == 'T' ) {
> pBase = pSeq[++i];
> if ( pBase == 'A' ) {
> pResidue = 'I';
> }
> else if ( pBase == 'C' ) {
> pResidue = 'I';
> }
> else if ( pBase == 'G' ) {
> pResidue = 'M';
> }
> else if ( pBase == 'T' ) {
> pResidue = 'I';
> }
> else { // case of N or other character 3rd base
> pResidue = 'X';
> }
> }
> else { // case of N or other character 2nd base
> ++i;
> pResidue = 'X';
> }
> }
> else if ( pBase == 'C' ) {
> pBase = pSeq[++i];
> if ( pBase == 'A' ) {
> pBase = pSeq[++i];
> if ( pBase == 'A' ) {
> pResidue = 'Q';
> }
> else if ( pBase == 'C' ) {
> pResidue = 'H';
> }
> else if ( pBase == 'G' ) {
> pResidue = 'Q';
> }
> else if ( pBase == 'T' ) {
> pResidue = 'H';
> }
> else { // case of N or other character 3rd base
> pResidue = 'X';
> }
> }
> else if ( pBase == 'C' ) {
> ++i;
> pResidue = 'P'; //all codons starting with CC are proline
> }
> else if ( pBase == 'G' ) {
> ++i;
> pResidue = 'R'; //all codons starting with CG are arginine
> }
> else if ( pBase == 'T' ) {
> ++i;
> pResidue = 'L'; //all codons starting with CT are leucine
> }
> else { // case of N or other character 2nd base
> ++i;
> pResidue = 'X';
> }
> }
> else if ( pBase == 'G' ) {
> pBase = pSeq[++i];
> if ( pBase == 'A' ) {
> pBase = pSeq[++i];
> if ( pBase == 'A' ) {
> pResidue = 'E';
> }
> else if ( pBase == 'C' ) {
> pResidue = 'D';
> }
> else if ( pBase == 'G' ) {
> pResidue = 'E';
> }
> else if ( pBase == 'T' ) {
> pResidue = 'D';
> }
> else { // case of N or other character 3rd base
> pResidue = 'X';
> }
> }
> else if ( pBase == 'C' ) {
> ++i;
> pResidue = 'A'; //all codons starting with GC are alanine
> }
> else if ( pBase == 'G' ) {
> ++i;
> pResidue = 'G'; //all codons starting with GG are glycine
> }
> else if ( pBase == 'T' ) {
> ++i;
> pResidue = 'V'; //all codons starting with GT are valine
> }
> else { // case of N or other character 2nd base
> ++i;
> pResidue = 'X';
> }
> }
> else if ( pBase == 'T' ) {
> pBase = pSeq[++i];
> if ( pBase == 'A' ) {
> pBase = pSeq[++i];
> if ( pBase == 'A' ) {
> pResidue = ' ';
> }
> else if ( pBase == 'C' ) {
> pResidue = 'Y';
> }
> else if ( pBase == 'G' ) {
> pResidue = ' ';
> }
> else if ( pBase == 'T' ) {
> pResidue = 'Y';
> }
> else { // case of N or other character 3rd base
> pResidue = 'Y'; // I call uncertainty between tyrosine and stop: tyrosine
> }
> }
> else if ( pBase == 'C' ) {
> ++i;
> pResidue = 'S'; //all codons starting with TC are serine
> }
> else if ( pBase == 'G' ) {
> pBase = pSeq[++i];
> if ( pBase == 'A' ) {
> pResidue = ' ';
> }
> else if ( pBase == 'C' ) {
> pResidue = 'C';
> }
> else if ( pBase == 'G' ) {
> pResidue = 'W';
> }
> else if ( pBase == 'T' ) {
> pResidue = 'C';
> }
> else { // case of N or other character 3rd base
> pResidue = 'X';
> }
> }
> else if ( pBase == 'T' ) {
> pBase = pSeq[++i];
> if ( pBase == 'A' ) {
> pResidue = 'L';
> }
> else if ( pBase == 'C' ) {
> pResidue = 'F';
> }
> else if ( pBase == 'G' ) {
> pResidue = 'L';
> }
> else if ( pBase == 'T' ) {
> pResidue = 'F';
> }
> else { // case of N or other character 3rd base
> pResidue = 'X';
> }
> }
> else { // case of N or other character 2nd base
> ++i;
> pResidue = 'X';
> }
> }
> else { // case of N or other character 1st base
> ++i;
> ++i;
> pResidue = 'X';
> }
> pProtSeq[p++]=pResidue;
> }
>
>
>
>
>
>
> //Return Result
> for (i=0; i < p; ++i)
> AppendReturnInfo( kResult, "|b", pProtSeq[i] );
> AppendReturnInfo( kResult, "\r");
>
>
>
> if (nFlagVariable == 1)
> AppendReturnInfo( kResult, "\r DNA2Prot XFCN copyright Jared Roach 8/8/96 v0.01");
> }
>
>
>
>
>
>
>
More information about the Bio-soft
mailing list
Send comments to us at biosci-help [At] net.bio.net