In article <3vstk0$mlv at rc1.vub.ac.be>, gbottu at ben.vub.ac.be (Guy Bottu) writes:
>The GCG distribution contains the file tfsites.dat with the TFD-Sites
>databank in GCG patterns file format. We would like to offer to our users
>the latest version of the TFD-Sites databank. Unfortunately, the file
>sites.dat that can be obtained from the NCBI anonymous ftp server is
>not in GCG format. Does anyone know about a place where one can get
>the TFD-Sites in GCG format or about a software tool that performs the
>format conversion ?
>> Dr. Guy Bottu
The program following my signature worked the last time I did that
conversion.
Regards,
David Mathog
mathog at seqaxp.bio.caltech.edu
Manager, sequence analysis facility, biology division, Caltech
C TFDTOGCG.FOR
C 9-APR-1993 David Mathog, Division of Biology, Caltech
C mathog at seqvax.bio.caltech.edu
C
C This little program takes one of David Ghosh's site.dat
C files and reformats it for GCG usage.
C
C Put the output file into the GCG system as: GENMOREDATA:TFSITES.DAT
C
C FTP to NCBI.NLM.NIH.GOV and look in repository/TFD/tfd.ascii
C for the site.dat file.
C
C This works with GCG 7.2 and VMS 5.5-2 and *may* work on other
C systems (not tested).
C
C Instructions for building an executable:
C
C $ for/nolis tfdtogcg
C $ link/nomap tfdtogcg
C
C Example session (program's prompts not shown):
C
C $ run tfdtogcg
C site.dat
C temporary.out
C 9-APR-1993, Converted TFD X.Y to GCG format
C
C $ copy temporary.out genmoredata:tfsites.dat
C $ set file/prot=w:re genmoredata:tfsites.dat
C $ delete temporary.out.
C
C There is practically NO error checking, so watch out!
C
C Revision 1.0 24-JUN 1993, David Mathog
C TFD 7.0 changes in sites file are:
C GENOME*1 removed
C TRN_UNIT*20 resized to 30
C Modified ghosh_record to reflect these changes.
C
implicit none
character*2048 inline,outline,infile,outfile
integer*4 inlen,istat
integer*4 recsize
logical ok
c
c Ghosh lays out site records like this in TFD 7.0
c
structure /ghosh_record/
character SITE_ID*6
character FAC_NAME*25
character SEQ_NAME*30
character NA_SEQ*45
character SEQ_TYPE*1
character SYSTEM*10
character TRN_UNIT*30
character COMMENTS*80
character MAIN_REF*60
character FAC_SOURCE*16
character LOCAT_REF*20
character LOCATION*20
character METHOD*11
character N_PROB*8
character REF_N*8
character STRAND*1
character BINDING*1
end structure
c
c GCG lays out TFSITE.DAT records like this
c
structure /GCG_RECORD/
character SEQ_NAME*31
character SPACER1*2
character NA_SEQ*45
character SPACER2*5
character FAC_NAME*25
character SPACER3*1
character MAIN_REF*60
end structure
c
record /ghosh_record/ ghosh
record /gcg_record/ gcg
c
c Init the spacers for the GCG record
c
gcg.spacer1 = '0 '
gcg.spacer2 = ' 0 ! '
gcg.spacer3 = ' '
c
write(6,*)'TFDtoGCG'
write(6,*)'This program converts one of David Ghosh''s site'
write(6,*)' files to GCG''s format'
c
write(6,*)'Input the name of the file to process'
read(5,'(q,a)')inlen,infile(1:inlen)
c
open(unit=10,file=infile(1:inlen)
1 ,form='UNFORMATTED',organization='SEQUENTIAL',status='OLD'
1 ,recordtype='VARIABLE', READONLY)
c
write(6,*)'Input the name of the output file'
read(5,'(q,a)')inlen,outfile(1:inlen)
c
open(unit=11,file=outfile(1:inlen)
1 ,form='UNFORMATTED',status='NEW',organization='SEQUENTIAL'
2 ,recordtype='STREAM_LF',recl = 255)
c
c get the comments
c
write(6,*)'Enter as many lines of comments as you would like'
write(6,*)' End each line with a <return>'
write(6,*)' End the last line with <return><return>'
ok = .true.
do while (ok)
read(5,1000)inlen,inline(1:inlen)
if(inlen.eq.0)then
ok = .false.
else
write(11)' '//inline(1:inlen)
end if
end do
write(6,*)'Working ...'
c
c write a title line, this one is *easy*
c
GCG.SEQ_NAME = 'NAME'
GCG.FAC_NAME = 'FACTOR'
GCG.NA_SEQ = 'SEQUENCE'
GCG.MAIN_REF = 'REFERENCE'
write(11)gcg
c
c Now write the divider
c
write(11)'..'
c
istat=0
do while(istat.ge.0)
read(10,iostat=istat)ghosh
1000 format(q,a)
if(istat.ge.0)then
GCG.SEQ_NAME = GHOSH.SEQ_NAME//' '
GCG.FAC_NAME = GHOSH.FAC_NAME
GCG.NA_SEQ = GHOSH.NA_SEQ
GCG.MAIN_REF = GHOSH.MAIN_REF
call fixseqname(GCG.SEQ_NAME,GHOSH.N_PROB)
write(11)gcg
end if
end do
1100 format(a)
close(unit=10)
close(unit=11)
stop 'TFDtoGCG: normal completion'
end
subroutine fixseqname(NAME,N_PROB)
character name*(*)
character N_prob*(*)
integer i,last,inlen,nlen
real limit,value
parameter (limit = 5.0e-4)
c
c Do the length this way so that it will still work if the
c length of NAME, N_PROB change.
c
inlen=len(NAME)
nlen =len(N_PROB)
c
c first put in a ";", if needed to indicate a frequent motif
c
read(N_prob,1000)value
1000 format(F<NLEN>.2)
if(value.gt.limit)NAME = ';'//NAME(1:inlen-1)
c
c now convert any internal spaces to underscores
c if it doesn't find *any* nonspaces, the name becomes "UNKNOWN"
c This is done in two passes.
c
do i = 1, inlen
if(name(i:i).eq.' ')name(i:i)='_'
end do
c
last=inlen+1
i = inlen
do while(last.eq.inlen+1 .and. i.gt.0)
if(name(i:i).ne.'_')then
last = i
else
name(i:i) = ' '
i = i-1
end if
end do
c
if(last.eq.inlen+1)name='UNKNOWN'
c
return
end