IUBio

How can one keep TFD-Sites up-to-date ?

mathog at seqaxp.bio.caltech.edu mathog at seqaxp.bio.caltech.edu
Fri Aug 4 18:47:04 EST 1995


In article <3vstk0$mlv at rc1.vub.ac.be>, gbottu at ben.vub.ac.be (Guy Bottu) writes:
>The GCG distribution contains the file tfsites.dat with the TFD-Sites
>databank in GCG patterns file format. We would like to offer to our users
>the latest version of the TFD-Sites databank. Unfortunately, the file
>sites.dat that can be obtained from the NCBI anonymous ftp server is
>not in GCG format. Does anyone know about a place where one can get
>the TFD-Sites in GCG format or about a software tool that performs the
>format conversion ?
>
>	Dr. Guy Bottu

The program following my signature worked the last time I did that
conversion. 

Regards,

David Mathog
mathog at seqaxp.bio.caltech.edu
Manager, sequence analysis facility, biology division, Caltech 


C	TFDTOGCG.FOR
C	9-APR-1993 David Mathog, Division of Biology, Caltech
C	mathog at seqvax.bio.caltech.edu
C
C	This little program takes one of David Ghosh's site.dat
C	files and reformats it for GCG usage.
C
C	Put the output file into the GCG system as: GENMOREDATA:TFSITES.DAT
C
C	FTP to NCBI.NLM.NIH.GOV and look in repository/TFD/tfd.ascii
C	for the site.dat file.
C
C	This works with GCG 7.2 and VMS 5.5-2 and *may* work on other 
C	systems (not tested).
C
C	Instructions for building an executable:
C
C	   $ for/nolis  tfdtogcg
C	   $ link/nomap tfdtogcg
C
C	Example session (program's prompts not shown):
C
C	   $ run tfdtogcg
C          site.dat
C	   temporary.out
C	   9-APR-1993, Converted TFD X.Y to GCG format
C
C	   $ copy temporary.out genmoredata:tfsites.dat
C	   $ set file/prot=w:re genmoredata:tfsites.dat
C	   $ delete temporary.out.
C
C	There is practically NO error checking, so watch out!
C
C	Revision 1.0 24-JUN 1993, David Mathog
C	  TFD 7.0 changes in sites file are:
C	    GENOME*1       removed
C	    TRN_UNIT*20    resized to 30
C	  Modified ghosh_record to reflect these changes.
C
	implicit none
	character*2048 inline,outline,infile,outfile
	integer*4 inlen,istat
	integer*4 recsize
	logical   ok
c
c  Ghosh lays out site records like this in TFD 7.0
c
	structure /ghosh_record/
	  character SITE_ID*6
	  character FAC_NAME*25
	  character SEQ_NAME*30
	  character NA_SEQ*45
	  character SEQ_TYPE*1
	  character SYSTEM*10
	  character TRN_UNIT*30
	  character COMMENTS*80
	  character MAIN_REF*60
	  character FAC_SOURCE*16
	  character LOCAT_REF*20
	  character LOCATION*20
	  character METHOD*11
	  character N_PROB*8
	  character REF_N*8
	  character STRAND*1
	  character BINDING*1
	end structure
c
c  GCG lays out TFSITE.DAT records like this
c
	structure /GCG_RECORD/
	  character SEQ_NAME*31
	  character SPACER1*2
	  character NA_SEQ*45
	  character SPACER2*5
	  character FAC_NAME*25
	  character SPACER3*1
	  character MAIN_REF*60
	end structure
c
	record /ghosh_record/ ghosh
	record /gcg_record/   gcg
c
c	Init the spacers for the GCG record
c
	gcg.spacer1 = '0 '
	gcg.spacer2 = ' 0 ! '
	gcg.spacer3 = ' '
c
	write(6,*)'TFDtoGCG'
	write(6,*)'This program converts one of David Ghosh''s site'
	write(6,*)'  files to GCG''s format'
c
	write(6,*)'Input the name of the file to process'
	read(5,'(q,a)')inlen,infile(1:inlen)
c
	open(unit=10,file=infile(1:inlen)
	1 ,form='UNFORMATTED',organization='SEQUENTIAL',status='OLD'
	1 ,recordtype='VARIABLE', READONLY)
c
	write(6,*)'Input the name of the output file'
	read(5,'(q,a)')inlen,outfile(1:inlen)
c
	open(unit=11,file=outfile(1:inlen)
	1 ,form='UNFORMATTED',status='NEW',organization='SEQUENTIAL'
	2 ,recordtype='STREAM_LF',recl = 255)
c
c	get the comments
c
	write(6,*)'Enter as many lines of comments as you would like'
	write(6,*)'  End each line with a <return>'
	write(6,*)'  End the last line with <return><return>'
	ok = .true.
	do while (ok)
	   read(5,1000)inlen,inline(1:inlen)
	   if(inlen.eq.0)then
	       ok = .false.
	   else
	       write(11)' '//inline(1:inlen)
	   end if
	end do
	write(6,*)'Working ...'
c
c	write a title line, this one is *easy*
c
	GCG.SEQ_NAME = 'NAME'
	GCG.FAC_NAME = 'FACTOR'
	GCG.NA_SEQ   = 'SEQUENCE'
	GCG.MAIN_REF = 'REFERENCE'
	write(11)gcg
c
c	Now write the divider
c
	write(11)'..'	
c
	istat=0
	do while(istat.ge.0)
	    read(10,iostat=istat)ghosh
1000	    format(q,a)
	    if(istat.ge.0)then
	       GCG.SEQ_NAME = GHOSH.SEQ_NAME//' '
	       GCG.FAC_NAME = GHOSH.FAC_NAME
	       GCG.NA_SEQ   = GHOSH.NA_SEQ
	       GCG.MAIN_REF = GHOSH.MAIN_REF
	       call fixseqname(GCG.SEQ_NAME,GHOSH.N_PROB)
	       write(11)gcg
	    end if
	end do
1100	format(a)
	close(unit=10)
	close(unit=11)
	stop 'TFDtoGCG: normal completion' 
	end

	subroutine fixseqname(NAME,N_PROB)
	character name*(*)
	character N_prob*(*)
	integer i,last,inlen,nlen
	real limit,value
	parameter (limit = 5.0e-4)
c
c	Do the length this way so that it will still work if the
c	length of NAME, N_PROB change.
c
	inlen=len(NAME)
	nlen =len(N_PROB)
c
c	first put in a ";", if needed to indicate a frequent motif
c
	read(N_prob,1000)value
1000	format(F<NLEN>.2)
	if(value.gt.limit)NAME = ';'//NAME(1:inlen-1)
c
c	now convert any internal spaces to underscores
c	if it doesn't find *any* nonspaces, the name becomes "UNKNOWN"
c	This is done in two passes.
c
	do i = 1, inlen
	  if(name(i:i).eq.' ')name(i:i)='_'
	end do
c
	last=inlen+1
	i = inlen
	do while(last.eq.inlen+1 .and. i.gt.0)
	  if(name(i:i).ne.'_')then
	     last = i
	  else
	     name(i:i) = ' '
	     i = i-1
	  end if
	end do
c
	if(last.eq.inlen+1)name='UNKNOWN'
c
	return
	end




More information about the Info-gcg mailing list

Send comments to us at biosci-help [At] net.bio.net