IUBio

PDBtoGCG (for VMS)

David Mathog mathog at seqvax.caltech.edu
Thu Dec 2 17:46:00 EST 1993


Sometimes users insist on having the sequences out of PDB files (even
though most of these can be retrieved from other databases).  It's a bit
of a pain to do manually, so here are a short script and a program
it uses that carry out this conversion automatically.  It only works
on protein sequences, and then only if the sequence(s) are properly
recorded in the SEQRES records (so sue me).  Each strand's sequence
is placed in a separate file.

The script is DCL, but the equivalent sh or csh shouldn't be too tough.
(Yes, the GCG conversion code could be put into hackem.for - feel free if
you think that it's worth the effort.) 

No guarantees, warranties, or etc :-)! 

David Mathog
mathog at seqvax.bio.caltech.edu
Manager, sequence analysis facility, biology division, Caltech 

*******  PDBtoGCG.COM  *****************************************************
$! ***************************************************************
$! PDBTOGCG.COM
$!
$! Command procedure to convert PDB sequence(s) to GCG sequence(s).
$! It will accept the name of the PDB file as a parameter, otherwise
$! it prompts for it.
$! This only works for PROTEIN sequences!
$!
$! Be sure to modify the RUN HACKEM line to match your site!
$!
$! David Mathog, CalTech Biology, 1-DEC-1993
$! ************************************************************
$ 
$       on control_y then goto terminate
$       ws := "write sys$output"
$       iq := inquire/nopunctuation
$
$ if (P1 .eqs."")
$then
$  iq pdbfile "Name of the PDB file to convert to GCG sequence format "
$  if pdbfile .nes. "" then goto okpdbname
$  ws "Message from PDBtoGCG:"
$  ws "Cannot convert unless you supply the name of a PDB file!"
$  ws "Conversion aborted"
$  stop
$else
$  pdbfile = P1
$endif
$okpdbname:
$!
$ open/write tfil killmepdbtogcg.com
$ write tfil "$run gcgexrun:hackem"
$ write tfil PDBFILE
$ close tfil
$ @killmepdbtogcg.com/out=nla0:
$ delete killmepdbtogcg.com.
$!
$ define/user sys$output nla0:
$ reformat/threeintoone/protein/default hackem*.pep
$ purge hackem*.pep
$!
$! now rename them
$!
$! extract the root part of the name for constructing the final output 
$! file's name
$!
$ file = F$SEARCH(PDBFILE)
$ root = f$parse(FILE,,,"NAME") ! root name of sequence
$!
$! rename any and all output files
$!
$count = 0
$TOP:
$ file = F$SEARCH("HACKEM*.PEP")
$ if(file .eqs. "")then goto byebye
$ count = count + 1
$ chunk = F$PARSE(FILE,,,"NAME")
$ chunk = chunk - "HACKEM"
$ chunk := "''ROOT'''chunk'.PEP"
$ rename 'file' 'chunk'
$ ws "Sequence ''count' in file ''chunk'"
$ goto TOP
$!
$BYEBYE:
$ delete hackem.out.
$ if count .eq. 0
$ then 
$   ws "PDBtoGCG: WARNING: no sequences were found in ''PDBFILE'"
$ else
$   ws "PDBtoGCG: Extracted ''COUNT' sequences from ''PDBFILE'"
$ endif
$!
$terminate:
***********  hackem.for  *************************************************
C	HACKEM.FOR
C
C	1-DEC-1993, David Mathog, biology division, Caltech
C
C	*Incredibly* simple minded program that takes a PDB file
C	and spits the SEQRES records out into separate files for
C	each chain for further processing by GCG REFORMAT.
C
C	Input:  The name of the PDB file
C	Output: hackem.out
C	           Number of Chains
C	           Names of files = hackem.chain_id
C	        hackem_1.pep
C	        hackem_A.pep
C	        hackem_2.pep etc.
C
C       Note, if a chain has NO NAME there will be a space in the 12
C	position.  In this case, the single output sequence will be named
C	hackem.pep.
C
C
C	SEQRES line format is:
C	     6A1     SEQRES
C             I4     Serial # for current chain
C	      1X     space
C	      A1     Chain identifier (any symbol!)
C	      1X     space
C	      I4     Number of residues in this chain
C	      1X     space
C       13(1X,A3)    Residue names, in three letter code format
C
C	plus, sometimes,  junk on the end
C
	implicit none
	character*200  file,listof,outfile
	character*132  line
	character*1    chain
	integer*4      status,linelen,filelen,count,i,outlen
	logical        OK
c
	write(6,*)'Name of the PDB file to process: '
	read(5,'(q,a)',iostat=status)filelen,file(1:filelen)
	if(status .ne. 0 .or. filelen .eq. 0)then
	   write(6,*)'HACKEM: using default input file of HACKEMPDB'
	   file = 'HACKEMPDB'
	   filelen=9
	end if
c
c	open the input file
c
	open(unit=10,file=file(1:filelen)
	1 ,form='FORMATTED',carriagecontrol='LIST',status='OLD'
	1 ,READONLY,iostat=status)
	if(status .ne. 0)
	1  stop 'HACKEM: fatal error, could not open PDB file'
c
c	scan it for SEQRES records
c	The initial chain value should NEVER occur in a PDB file!
c
	ok    = .true.
	chain = char(1)
	count = 0
c
	do while(ok)
	   read(10,'(q,a)',iostat=status)linelen,line(1:linelen)
	   if(status .eq. 0)then
	     if(line(1:6) .eq. 'SEQRES')then
	        if(chain .ne. line(12:12))then	!new chain
	          chain = line(12:12)
	          count = count + 1
	          listof(count:count) = chain
	          if(count.gt.0)close(11,dispose='SAVE')
	          call makename(outfile,outlen,chain)
	          open(unit=11,file=outfile(1:outlen)
	1         ,form='FORMATTED',carriagecontrol='LIST',status='NEW'
	1         ,iostat=status)
	          if(status .ne. 0)
	1            stop 'HACKEM: fatal error, could not open output file'
	          write(11,'(a)')'Sequence of chain '//chain//' from '
	1         //file(1:filelen)
	          write(11,'(a)')'..'
	        end if
	        if(linelen .gt. 70)linelen=70
	        write(11,'(a)')line(20:linelen)
	     else
	        if(count .ge. 1)ok = .false.
	     end if
	   else			!possibly an error, but treat it like an EOF
	     ok = .false.
	   end if
	end do
c
c	write out hacker.out - number of chains and names of files
c
	if(count.ge.1)close(unit=11,dispose='SAVE')
	open(unit=11,file='hackem.out'
	1  ,form='FORMATTED',carriagecontrol='LIST',status='NEW'
	1  ,iostat=status)
	if(status .ne. 0)
	1  stop 'HACKEM: fatal error, could not create "hackem.out"'
	write(11,'(I4)')count
	if(count.ge.1)then
	  do i = 1, count
	    call makename(outfile,outlen,listof(i:i))
	    write(11,'(a)')outfile(1:outlen)
	  end do
	end if
c
	close(unit=11,dispose='SAVE')
c
	stop 'HACKEM: processing completed'
	end

	subroutine makename(file,flen,chain)
	implicit none
	character*200 file
	character*1  chain
	integer*4    flen
	if(chain .eq. ' ')then
	  file = 'hackem.pep'
	  flen  = 10
	else
	  file = 'hackem_'//chain//'.pep'
	  flen  = 12
	end if
	return
	end
***** that's all folks ******************************************************




More information about the Bio-soft mailing list

Send comments to us at biosci-help [At] net.bio.net