I've now sent corrected sifi.c code to about twenty people. I'm
putting it here in the hope that someone who sees it will play with
it to make it into yet another one of those too-general-to-use
packages. Obviously there are plenty of folks out there who could
use a fairly generic set of routines to manipulate pdb files. This
program should provide enough of an example for any who thusfar have
been afraid to dip their fingers into C programming.
My code is not robust, but I'd be thrilled if anyone who adds to
it or improves it would pass the code along.
Thanks!
Rick
---> Since sending this out the first time, I've found that
it fails under some compilers. There is a
commented line in the read_atom_record() subroutine which
should help anyone who wants to fix it.
--------------------------------------------------------------------
The short C program that follows calculates phi-psi and alpha-carbon
bend and torsion angles for Brookhaven protein data files. The
people who manage the data bank also have FORTRAN routines to do
this.
I have run it on SGI 4D and IBM RS6000's, but you may have to fiddle
with your local math library routines.
To Build: cc sifi.c -o sifi -lm
Usage: sifi <pdbfile>
This routine is not particularly robust, and will fail miserably
if fed anything remotely unfamiliar.
/********************* CUT HERE ********************************/
/***********************************************************************
This program was written by
Rick MacDonald
was: University of Virginia Biophysics
now: MIT Health Sciences and Technology
rbm8p at virginia.edu
This program will print for each amino acid residue in a pdb file:
psi
phi
alpha carbon bend
alpha carbon dihedral
To Build: cc sifi.c -o sifi -lm
Usage: sifi <pdbfile>
***********************************************************************/
#include <stdio.h>
#include <math.h>
#define TRUE 1
#define FALSE 0
#define LINELEN 100
#define MAXATOMS 10000
#define MAXRES 1000
typedef struct {
int num;
char a_type[6];
char r_type[4];
char subunit;
int resnum;
float p[3]; /* position x,y,z */
} ATOM;
typedef struct {
int nitrog;
int alphac;
int carbon;
int oxygen;
char *r_type; /* 3 letter code */
char res_type; /* 1 letter code */
char subunit;
float phi;
float psi;
float cab;
float cad;
int resnum;
} AMINO;
ATOM atom[MAXATOMS];
AMINO res[MAXRES];
int err;
main(int argc, char **argv)
{
char *strstr(), *strncpy(), *strcat();
char *next_str(char *string);
float angle(float *v1, float *v2);
float dot(float *v1, float *v2);
float dihedral(float *p1, float *p2, float *p3, float *p4);
void cross(float *v1, float *v2, float *p);
float len(float *v1);
int cmpstr(char *s1, char *s2);
void read_atom_record(char *rec, ATOM *atom1);
char single_letter_code(char *string);
register i;
FILE *Fpdb;
char tname[80], fname[80], line[LINELEN], *temp, WAS, WUZ, WLB;
float v1[3], v2[3];
int ndx, n_atoms, n_res, rno, is, was;
/*-----------------------------------------------------------------------
Open file specified by command line, if it exists
-----------------------------------------------------------------------*/
if(argc<2) err = printf("Usage: sifi <pdbfile>\n");
if(argc!=2) exit(1);
temp = strncpy(fname,argv[argc-1],sizeof(tname));
if ((Fpdb = fopen(fname,"r"))==NULL)
err = printf("Could not open file %s\n",fname);
/*-----------------------------------------------------------------------
Read through the PDB format file to extract ATOM records
-----------------------------------------------------------------------*/
ndx = 0;
while (fgets(line,LINELEN,Fpdb)!=0) {
if (strstr(line,"ATOM ")!=NULL) {
read_atom_record(line,&atom[ndx]);
ndx++;
}
}
n_atoms = ndx;
/*-----------------------------------------------------------------------
Now we've got the ATOMs. Re-order them into residues.
-----------------------------------------------------------------------*/
rno = 0;
was = atom[0].resnum;
for (ndx = 0; ndx < n_atoms; ndx++) {
is = atom[ndx].resnum;
if (is != was) rno++;
if (cmpstr(atom[ndx].a_type,"CA")!=NULL) {
res[rno].resnum = atom[ndx].resnum;
res[rno].alphac = ndx;
res[rno].r_type = atom[ndx].r_type;
res[rno].subunit= atom[ndx].subunit;
}
else if (cmpstr(atom[ndx].a_type,"N")!=NULL)
res[rno].nitrog = ndx;
else if (cmpstr(atom[ndx].a_type,"C")!=NULL)
res[rno].carbon = ndx;
else if (cmpstr(atom[ndx].a_type,"O")!=NULL)
res[rno].oxygen = ndx;
was = is;
}
n_res = rno;
/*-----------------------------------------------------------------------
Find for each residue:
single letter residue code
phi angle
psi angle
cab angle = bend angle between adjacent alpha carbons
cad angle = is the dihedral() or torsion angle
-----------------------------------------------------------------------*/
res[0].res_type = single_letter_code(res[0].r_type);
res[0].psi = -(dihedral(atom[res[1].nitrog].p,
atom[res[1].alphac].p,
atom[res[1].carbon].p,
atom[res[2].nitrog].p));
res[1].res_type = single_letter_code(res[1].r_type);
res[1].phi = -(dihedral(atom[res[0].carbon].p,
atom[res[1].nitrog].p,
atom[res[1].alphac].p,
atom[res[1].carbon].p));
res[1].psi = -(dihedral(atom[res[1].nitrog].p,
atom[res[1].alphac].p,
atom[res[1].carbon].p,
atom[res[2].nitrog].p));
for (rno = 2; rno <= n_res; rno++) {
res[rno].res_type = single_letter_code(res[rno].r_type);
res[rno].phi = -(dihedral(atom[res[rno-1].carbon].p,
atom[res[rno ].nitrog].p,
atom[res[rno ].alphac].p,
atom[res[rno ].carbon].p));
res[rno].psi = -(dihedral(atom[res[rno ].nitrog].p,
atom[res[rno ].alphac].p,
atom[res[rno ].carbon].p,
atom[res[rno+1].nitrog].p));
res[rno].cad = dihedral( atom[res[rno-2].alphac].p,
atom[res[rno-1].alphac].p,
atom[res[rno ].alphac].p,
atom[res[rno+1].alphac].p);
for (i=0;i<=2;i++) {
v1[i] = atom[res[rno-1].alphac].p[i] - atom[res[rno].alphac].p[i];
v2[i] = atom[res[rno+1].alphac].p[i] - atom[res[rno].alphac].p[i];
}
res[rno].cab = angle(v1, v2);
}
/*-----------------------------------------------------------------------
Print this stuff out as a table.
-----------------------------------------------------------------------*/
printf("%s\n", fname);
printf(" # resid phi psi Cab Cat\n");
WAS = 'x';
WUZ = 'x';
WLB = res[0].subunit;
res[n_res+1].subunit = 'x';
for (rno = 0; rno <= n_res; rno++) {
if (res[rno].subunit != WAS) { /* This gibberish flags the meaningless */
res[rno].phi = 999.; /* values at the ends of each subunit as 999 */
}
if (res[rno].subunit != WUZ) {
res[rno].cab = 999.;
res[rno].cad = 999.;
}
WUZ = WAS;
WAS = res[rno].subunit;
WLB = res[rno+1].subunit;
if (res[rno].subunit != WLB) {
res[rno].psi = 999.;
res[rno].cab = 999.;
res[rno].cad = 999.;
}
printf("%4d %c %3s %c ",
res[rno].resnum, res[rno].subunit, res[rno].r_type, res[rno].res_type);
printf("%5.0f ", res[rno].phi);
printf("%5.0f ", res[rno].psi);
printf("%5.0f ", res[rno].cab);
printf("%5.0f", res[rno].cad);
printf("\n");
}
} /* end main */
/***********************************************************************
***********************************************************************/
void read_atom_record(char *rec, ATOM *atom1)
{
char *next_str(char *string);
char *ptr;
float fl;
ptr = rec;
err = sscanf( (ptr = next_str(ptr)), "%d", &atom1->num );
err = sscanf( (ptr = next_str(ptr)), "%3s", atom1->a_type );
err = sscanf( (ptr = next_str(ptr)), "%3s", atom1->r_type );
ptr = ptr + 4;
atom1->subunit = *ptr;
err = sscanf( (ptr = next_str(ptr)), "%d", &atom1->resnum );
/* Remove this comment to print atom records as read from pdb
printf("\n%5d %4s %s %c %d ", atom1->num, atom1->a_type, atom1->r_type, atom1->subunit, atom1->resnum);
*/
/* This is a silly patch --- but I can't make %Lf read a float! */
err = sscanf( (ptr = next_str(ptr)), "%f", &fl );
atom1->p[0] = fl;
err = sscanf( (ptr = next_str(ptr)), "%f", &fl );
atom1->p[1] = fl;
err = sscanf( (ptr = next_str(ptr)), "%f", &fl );
atom1->p[2] = fl;
return;
}
/***********************************************************************
***********************************************************************/
char single_letter_code(char *string)
{
if (cmpstr(string,"CYS")!=NULL) return 'C';
else if (cmpstr(string,"HIS")!=NULL) return 'H';
else if (cmpstr(string,"ILE")!=NULL) return 'I';
else if (cmpstr(string,"MET")!=NULL) return 'M';
else if (cmpstr(string,"SER")!=NULL) return 'S';
else if (cmpstr(string,"VAL")!=NULL) return 'V';
else if (cmpstr(string,"ALA")!=NULL) return 'A';
else if (cmpstr(string,"GLY")!=NULL) return 'G';
else if (cmpstr(string,"LEU")!=NULL) return 'L';
else if (cmpstr(string,"PRO")!=NULL) return 'P';
else if (cmpstr(string,"THR")!=NULL) return 'T';
else if (cmpstr(string,"PHE")!=NULL) return 'F';
else if (cmpstr(string,"ARG")!=NULL) return 'R';
else if (cmpstr(string,"TYR")!=NULL) return 'Y';
else if (cmpstr(string,"TRP")!=NULL) return 'W';
else if (cmpstr(string,"ASN")!=NULL) return 'N';
else if (cmpstr(string,"GLU")!=NULL) return 'E';
else if (cmpstr(string,"GLN")!=NULL) return 'Q';
else if (cmpstr(string,"LYS")!=NULL) return 'K';
else if (cmpstr(string,"ASP")!=NULL) return 'D';
return '*';
}
/***********************************************************************
cmpstr does a simple compare of a string s1 with string s2 and returns a
pointer to the beginning of s1 if same or NULL if not same
***********************************************************************/
int cmpstr(char *s1, char *s2)
{
loop: if(*s1 != *s2) return(0);
if(*s1 == '\0') return(1);
s1++; s2++;
goto loop;
}
/***********************************************************************
***********************************************************************/
char *next_str(char *string)
{
char c;
while (((c = *string) != ' ') && (c != '\t')) string++;
while (((c = *string) == ' ') || (c == '\t')) string++;
return string;
}
/***********************************************************************
Calculate the dihedral angle between two vectors given endpoints.
***********************************************************************/
float dihedral(float *p1, float *p2, float *p3, float *p4)
{
void cross(float *v1, float *v2, float *p);
float dot(float *v1, float *v2);
float angle(float *v1, float *v2);
float v1[3], v2[3], v3[3], p[3], q[3], r[3], theta;
register i;
for (i = 0; (i < 3); i++) {
v1[i] = (p2[i] - p1[i]);
v2[i] = (p2[i] - p3[i]);
v3[i] = (p4[i] - p3[i]);
}
cross(v1,v2,p);
cross(v2,v3,q);
theta = angle(p,q);
cross(p,q,r);
if(dot(v2,r) < 0) theta = -theta;
return theta;
}
/***********************************************************************
The vector cross product is returned as the third parameter
***********************************************************************/
void cross(float *v1, float *v2, float *p)
{
p[0] = (v1[1] * v2[2]) - (v1[2] * v2[1]);
p[1] = (v1[2] * v2[0]) - (v1[0] * v2[2]);
p[2] = (v1[0] * v2[1]) - (v1[1] * v2[0]);
}
/***********************************************************************
Return angle (degrees) between two vectors
***********************************************************************/
float angle(float *v1, float *v2)
{
float len(float *v1);
float dot(float *v1, float *v2);
float cos_val, sin_val, sinsq;
cos_val = dot(v1,v2)/(len(v1)*len(v2));
sinsq = 1.0 - cos_val*cos_val;
if( sinsq < 0.0 ) sinsq = 0.0;
sin_val = sqrt(sinsq);
return(atan2(sin_val,cos_val) * 57.29578); /* pi radians */
}
/***********************************************************************
Return vector dot product
***********************************************************************/
float dot(float *v1, float *v2)
{
int i;
float sum;
sum = 0;
for (i = 0; (i <= 2); i++) {
sum = sum + (v1[i] * v2[i]);
}
return sum;
}
/***********************************************************************
Return the length of a vector
***********************************************************************/
float len (float *v1)
{
/* This can be faster sometimes.
return ( fexp(0.5*flog ( (v1[0]*v1[0]) + (v1[1]*v1[1]) + (v1[2]*v1[2]) )) );
*/
return ( sqrt ( (v1[0]*v1[0]) + (v1[1]*v1[1]) + (v1[2]*v1[2]) ) );
}