Hi Simon and Robert,

> It sounds like the file as it is read by the script is completely on one
> line. I wonder if this is the old UNIX vs MSDOS line ending problem.

yes, sounds like a linefeed problem. But actually python has universal
newline support. I made some changes to the script (quite a lot
changes, sorry ;-)) and now can read any of mac/unix/dos text files
using MacPyMol.

Cheers,
  Thomas

--
Thomas Holder
Group of Steffen Schmidt
Department of Biochemistry
MPI for Developmental Biology
Spemannstr. 35
D-72076 Tübingen
"""
Copyright (c) 2003 Robert L. Campbell
Copyright (c) 2010 Thomas Holder

Please read below for instructions

contains the functions 
   data2b_atom(mol='',data_file='')
   data2b_res(mol='',data_file='')
   data2q_atom(mol='',data_file='')
   data2q_res(mol='',data_file='')
"""

import sys, re

comment = re.compile('^\s*$|^\s*#')

def atom_data_extract(data_file):
    """
    Read the specified 'by-atom' data file and extract the data from it
    and store it in parallel dictionaries specifying the data
    and residue names (both with keys of chain and residue number and atom name).
    The data file can contain comment lines starting with "#" (on lines by themselves).
    These comment lines are ignored.
    """
    bdat = {}
    chain = ''

    data_lines = file(data_file, 'rU')

    for line in data_lines:
        # ignore comment lines (beginning with a '#') or blank lines
        if not comment.match(line):
            words = line.split()

            # check number of columns of data
            if len(words) == 5:
                chain = words[0]
                resi = words[1]
                resn = words[2]
                name = words[3]
                if chain == '-':
                    chain = ''
                data = float(words[4])
            elif len(words) == 4:
                resi = words[0]
                resn = words[1]
                name = words[2]
                data = float(words[3])
            else:
                sys.stderr.write("Error in reading data files -- check number of columns")
                sys.exit(1)

            bdat.setdefault(chain, {}).setdefault(resi, {})[name] = (data, resn)

    return bdat

def residue_data_extract(data_file):
    """
    Read the specified 'by-residue' data file and extract the data from it
    and store it in parallel dictionaries specifying the data
    and residue names (both with keys of chain and residue number).
    The data file can contain comment lines starting with "#" (on lines by themselves).
    These comment lines are ignored.
    """
    bdat = {}
    chain = ''

    data_lines = file(data_file, 'rU')

    for line in data_lines:
        # ignore comment lines (beginning with a '#') or blank lines
        if not comment.match(line):
            words = line.split()

            # check number of columns of data
            if len(words) == 4:
                chain = words[0]
                resi = words[1]
                resn = words[2]
                if chain == '-':
                    chain = ''
                data = float(words[3])
            elif len(words) == 3:
                resi = words[0]
                resn = words[1]
                data = float(words[2])
            elif len(words) == 2:
                resi = words[0]
                data = float(words[1])
                resn = ''
            else:
                sys.stderr.write("Error in reading data files -- check number of columns\n")
                sys.exit(1)

            bdat.setdefault(chain, {})[resi] = (data, resn)
    
    return bdat

###########################################################################################
# for testing purposes:
# if calling this as a program on its own, read the pdb_file name from
# the command line and run residue_data_extract on it. (does not require
# importing cmd from pymol

if __name__ == '__main__':
    pdb_file = sys.argv[1]
    b_dict = residue_data_extract(pdb_file)
    for chain in sorted(b_dict):
        for resi in sorted(b_dict[chain]):
            b, resn = b_dict[chain][resi]
            print "b-factors %s %s %s %s  new B='%s'" % (pdb_file, chain, resn, resi, b)
    sys.exit()


###########################################################################################
# PyMOL stuff

from pymol import cmd

def data2b_atom(mol='', data_file='', prop='b'):
    """
DESCRIPTION

    Alters the B-factor data by atom.

USAGE

    data2b_atom <mol>, <data_file>

    where <mol> is the molecular object whose B-factor data you wish to modify
    and <data_file> is a file contain the data (one value for each atom)
    The format of <data_file> should be:

         chain resi resn name data
    or
         resi resn name data

    (i.e. "chain" is optional if all atoms are in one chain). 
    Lines beginning with '#' are ignored as comments.

SEE ALSO

    data2b_res, data2q_atom, data2q_res
    """

    b_dict = atom_data_extract(data_file)
    
    def b_lookup(chain, resi, name, b):
        try:
            return b_dict[chain][resi][name][0]
        except KeyError:
            return b
    stored.b = b_lookup

    cmd.alter(mol, '%s=stored.b(chain, resi, name, %s)' % (prop, prop))
    cmd.rebuild()

def data2b_res(mol='', data_file='', prop='b'):
    """
DESCRIPTION

    Alters the B-factor data by residue.

USAGE

    data2b_res <mol>, <data_file>

    where <mol> is the molecular object whose B-factor data you wish to modify
    and <data_file> is a file contain the data (one value for each residue)
    The format of <data_file> should be:

         chain resi resn data
    or
         resi resn data

    (i.e. "chain" is optional). Lines beginning with '#' are ignored as comments.

SEE ALSO

    data2b_atom, data2q_atom, data2q_res
    """

    b_dict = residue_data_extract(data_file)

    def b_lookup(chain, resi, name, b):
        try:
            return b_dict[chain][resi][0]
        except KeyError:
            return b
    stored.b = b_lookup

    cmd.alter(mol, '%s=stored.b(chain, resi, name, %s)' % (prop, prop))
    cmd.rebuild()

def data2q_atom(mol='',data_file=''):
    """
DESCRIPTION

    Alters the occupancy data by atom.

USAGE

    See data2b_atom

SEE ALSO

    data2q_res, data2b_atom, data2b_res
    """
    data2b_atom(mol, data_file, prop='q')

def data2q_res(mol='',data_file=''):
    """
DESCRIPTION

    Alters the occupancy data by residue.

USAGE

    See data2b_res

SEE ALSO

    data2q_atom, data2b_atom, data2b_res
    """
    data2b_res(mol, data_file, prop='q')

cmd.extend('data2b_res',data2b_res)
cmd.extend('data2b_atom',data2b_atom)
cmd.extend('data2q_res',data2q_res)
cmd.extend('data2q_atom',data2q_atom)
------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
_______________________________________________
PyMOL-users mailing list (PyMOL-users@lists.sourceforge.net)
Info Page: https://lists.sourceforge.net/lists/listinfo/pymol-users
Archives: http://www.mail-archive.com/pymol-users@lists.sourceforge.net

Reply via email to