Here is a program that demonstrates the dynamic programming algorithm
for global sequence alignments that I use in my class. It works with
two strings on a command line, and simply scores +1 for a match; -1
for a mismatch, and -2 for a gap.
It would be easy to modify the program to read in different
match/mismatch/deletion penalties. Also, by modifying the max(max())
line and adding a 0, you produce local similarity scores and
alignments.
Bill Pearson
================
/* ademo.c a demonstration of the scoring values
considered by the dynamic programming algorithm
when aligning two strings
This program uses +1 for a match, -1 for a mismatch,
and -2 for a gap (penalty per residue, not affine).
copyright (c) 1989,1996 William R. Pearson. Available under
the GNU Public license.
*/
#include <stdio.h>
#define MATCH 1
#define MISMATCH -1
#define DELETE 2
#define max(a,b) ((a)>(b) ? (a) : (b))
char aa0[21], aa1[21];
int n0, n1;
int row0[21], row1[21], *rowp, *rowc, *rtmp;
char ld[21], l0[21], l1[21];
main(argc,argv)
int argc; char **argv;
{
char *bp, *strchr();
int dval, i0val, i1val, cval;
int i, j;
if (argc<3) {
fprintf(stderr,
"ademo shows the scoring used to align two strings\n");
fprintf(stderr,
"enter the first string (<=15 characters): ");
if (fgets(aa0,sizeof(aa0),stdin)==NULL) exit(1);
if ((bp=strchr(aa0,'\n'))!=NULL) *bp='\0';
n0 = strlen(aa0);
fprintf(stderr,
"enter the second string (<=15 characters): ");
if (fgets(aa1,sizeof(aa1),stdin)==NULL) exit(1);
if ((bp=strchr(aa1,'\n'))!=NULL) *bp='\0';
n1 = strlen(aa1);
}
else {
strncpy(aa0,argv[1],sizeof(aa0));
n0 = strlen(aa0);
strncpy(aa1,argv[2],sizeof(aa1));
n1 = strlen(aa1);
}
rowp = row0;
rowc = row1;
*rowp = *rowc = 0;
printf(" ");
for (i=0; i<n1; i++) {
rowp[i]=rowc[i]=0;
printf(" %c ",aa1[i]);
}
printf("\n");
rowp++; rowc++;
for (j=0; j<n0; j++) {
for (i=0; i<n1; i++) {
dval = rowp[i-1] + ((aa0[j]==aa1[i]) ? MATCH : MISMATCH);
i0val = rowp[i] - DELETE;
i1val = rowc[i-1] - DELETE;
cval = max(dval,max(i0val,i1val));
rowc[i] = cval;
ld[i] = (cval==dval) ? '\\' : ' ';
l0[i] = (cval==i0val) ? '!' : ' ';
l1[i] = (cval==i1val) ? '_' : ' ';
}
printf(" ");
for (i=0; i<n1; i++) printf(" %c %c",ld[i],l0[i]);
printf("\n%c ",aa0[j]);
for (i=0; i<n1; i++) printf(" %c%2d",l1[i],rowc[i]);
printf("\n");
rtmp = rowp;
rowp = rowc;
rowc = rtmp;
}
}
================= sample output ================
81% ademo
ademo shows the scoring used to align two strings
enter the first string (<=15 characters): ABCCDEFFG
enter the second string (<=15 characters): ABDDEFFG
A B D D E F F G
\ \ \ \ \ \ \ \
A 1 _-1 -1 -1 -1 -1 -1 -1
\ ! \ \ \ \ \ \
B -1 2 _ 0 _-2 -2 -2 -2 -2
\ ! \ \ \ \ \ \
C -1 0 1 _-1 _-3 -3 -3 -3
\ \ ! \ ! \ \ \ \ \
C -1 -2 -1 0 _-2 _-4 -4 -4
\ \ \ \ \ \ \ \
D -1 -2 -1 0 -1 _-3 _-5 -5
\ \ \ ! \ ! \
E -1 -2 -3 -2 1 _-1 _-3 _-5
\ \ \ \ ! ! \ \
F -1 -2 -3 -4 -1 2 _ 0 _-2
\ \ \ \ ! \ ! \
F -1 -2 -3 -4 -3 0 3 _ 1
\ \ \ \ \ ! ! ! \
G -1 -2 -3 -4 -5 -2 1 4