Created
December 5, 2014 19:50
-
-
Save 3ki5tj/02b4916ee1cd8410c91c to your computer and use it in GitHub Desktop.
Wrap coordinates to make atoms in a cluster stay together
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
''' wrapping coordinates ''' | |
import os, sys | |
from math import * | |
fnin = "test.pdb" # input file | |
fnout = None | |
box = [0,0,0] | |
cutoff = 12.0 # angstroms | |
verbose = 0 | |
allatom = False | |
quicknb = 0 | |
fnclus = None | |
def usage(): | |
''' print usage and die ''' | |
print '''pdbwrap.py [OPTIONS] input.pdb [output.pdb] | |
Smartly wrap coordinates of a PDB file | |
Options: | |
-s, --size=: followed by the box size in angstroms (for a cubic box) | |
-X, --bx=: followed by the X dimension in angstroms | |
-Y, --by=: followed by the Y dimension in angstroms | |
-Z, --bz=: followed by the Z dimension in angstroms | |
-c: followed by the cutoff length for neighbors | |
-a, --all: include all atoms, not just alpha-carbon (CA) | |
-q: quick search (for all atoms): search only local neighbors for non-CA atoms | |
-Q: quick search (for heavy atoms): search only local neighbors for heavy atoms | |
--fclus=: file name for cluster information | |
-v: be verbose | |
-vv: be more verbose | |
-h, --help: print this message | |
Examples: | |
pdbwrap.py input.pdb | |
pdbwrap.py -X143.748 -Y143.758 -Z143.759 -c10 -q 2000.pdb | |
''' | |
exit(1) | |
def doargs(): | |
''' handle command line options ''' | |
import getopt, glob | |
try: | |
opts, args = getopt.gnu_getopt(sys.argv[1:], "s:X:Y:Z:c:aqQv:", | |
["bx=", "by=", "bz=", "size=", "all", "fclus=", | |
"verbose=", "help"]) | |
except getopt.GetoptError, err: | |
print str(err) | |
usage() | |
global fnin, fnout, box, cutoff, allatom, quicknb, fnclus | |
for o, a in opts: | |
if o in ("-s", "--size"): | |
box = [float(a),]*3 | |
elif o in ("-X", "--bx",): | |
box[0] = float(a) | |
elif o in ("-Y", "--by",): | |
box[1] = float(a) | |
elif o in ("-Z", "--bz",): | |
box[2] = float(a) | |
elif o in ("-c",): | |
cutoff = float(a) | |
elif o in ("-a", "--all"): | |
allatom = True | |
elif o in ("-q",): | |
quicknb = "CA" | |
allatom = True | |
elif o in ("-Q",): | |
quicknb = "HEAVY" | |
allatom = True | |
elif o in ("--fclus",): | |
fnclus = a | |
elif o in ("-v",): | |
verbose += 1 | |
elif o in ("-h", "--help",): | |
usage() | |
ls = args | |
if len(ls) > 0: | |
fnin = ls[0] | |
if len(ls) > 1: | |
fnout = ls[1] | |
else: | |
ls = glob.glob("*.pdb") | |
def add(a, b): | |
return [a[0] + b[0], a[1] + b[1], a[2] + b[2]] | |
def diff(a, b): | |
return [a[0] - b[0], a[1] - b[1], a[2] - b[2]] | |
def norm(a): | |
return sqrt(a[0]*a[0] + a[1]*a[1] + a[2]*a[2]) | |
def dist(a, b): | |
return norm(diff(a, b)) | |
def pbc(a, b, box): | |
dx = a - b | |
while dx > box/2: dx -= box | |
while dx < -box/2: dx += box | |
return dx | |
def pbcdist(a, b, dr, box): | |
for i in range(3): | |
dr[i] = pbc(a[i], b[i], box[i]) | |
return norm(dr) | |
class PDBAtom: | |
''' a record for an ATOM line in a PDB file ''' | |
def __init__(self, s): | |
self.raw = s | |
x = float( s[30:38] ) | |
y = float( s[38:46] ) | |
z = float( s[46:54] ) | |
self.r = [x, y, z] | |
self.nb = [] # neighbor list | |
self.atom = s[12:17].strip() | |
self.res = s[17:20].strip() | |
self.elem = s[77] | |
self.chain = int( s[72:77].strip()[:-1] ) | |
def write(self): | |
s = self.raw | |
return s[:30] + "%8.3f%8.3f%8.3f" % (self.r[0], self.r[1], self.r[2]) + s[54:] | |
def findbox(atomls, iscubic = False): | |
''' guess the box dimension ''' | |
rmin, rmax, box = [0,0,0], [0,0,0], [0,0,0] | |
for d in range(3): | |
rmin[d] = min(a.r[d] for a in atomls) | |
rmax[d] = max(a.r[d] for a in atomls) | |
box[d] = rmax[d] - rmin[d] | |
# if the box is cubic | |
# make sure the three sides are the same | |
if iscubic: | |
span = max(box) | |
box = [span,]*3 | |
rmax = [rmin[0] + span, rmin[1] + span, rmin[2] + span] | |
return rmin, rmax, box | |
def makenblist(atomls, box, cutoff): | |
''' build a neighbor list ''' | |
n = len(atomls) | |
print "building the neigbhor list of %d atoms..." % n | |
dr = [0]*3 | |
for i in range(n): | |
j0 = 0 | |
j1 = i | |
quit = False | |
# use local search for nonessential atoms | |
if ( ( quicknb == "CA" and atomls[i].atom != "CA" ) | |
or ( quicknb == "HEAVY" and atomls[i].elem == "H" ) ): | |
j0 = max(i - 50, 0) | |
j1 = min(i + 50, n) | |
quit = True | |
ai = atomls[i] | |
for j in range(j0, j1): | |
aj = atomls[j] | |
if ( (quicknb == "CA" and aj.atom != "CA") | |
or (quicknb == "HEAVY" and aj.elem == "H") ): continue | |
dis = pbcdist(ai.r, aj.r, dr, box) | |
if dis < cutoff: | |
ai.nb += [j,] | |
aj.nb += [i,] | |
if quit: break | |
print "building nblist %5d/%5d quick: %s \r" % (i+1, n, quicknb), | |
print "the neigbhor list is completed. " | |
def dock(atomls, j, i, box): | |
''' dock j on to i ''' | |
a = atomls[j].r | |
b = atomls[i].r | |
for d in range(3): | |
dr = pbc(a[d], b[d], box[d]) | |
a[d] = b[d] + dr | |
def wrap(atomls): | |
''' find clusters and wrap their coordinates ''' | |
global box | |
# 1. compute the box | |
rmin, rmax, mybox = findbox(atomls) | |
if box[0] == 0 or box[1] == 0 or box[2] == 0: | |
box = mybox # override the input box | |
print "box: %s, rmin %s, rmax %s" % (box, rmin, rmax) | |
# 2. build a neighbor list of the graph | |
makenblist(atomls, box, cutoff) | |
# 3. find connected components | |
n = len(atomls) | |
visited = [False]*n | |
que = [0] | |
visited[0] = True | |
ic = 0 | |
clusls = [] # list of clusters | |
while 1: | |
ls = [] # atom list of this cluster | |
while len(que): | |
i = que.pop(0) | |
ls += [i] | |
# add neighbors of i in the queue | |
for j in atomls[i].nb: | |
if visited[j]: continue | |
que += [j,] | |
visited[j] = True | |
rj0 = atomls[j].r[:] | |
dock(atomls, j, i, box) # dock j on to i | |
if verbose >= 2: | |
print "dock %d on to %d, %s --> %s" % (j, i, rj0, atomls[j].r) | |
ic += 1 | |
clusls += [ls,] | |
if verbose: | |
print "Cluter %d has %d atoms" % (ic, len(ls)) | |
# add the first unique atom into the queue | |
for i in range(n): | |
if not visited[i]: | |
que = [i,] | |
visited[i] = True | |
break | |
else: | |
break | |
# dump cluster information | |
# and shift the clusters | |
clusls = sorted(clusls, key=lambda x: -len(x)) # sort clusters by size | |
nclus = len(clusls) | |
print "%d clusters" % (nclus) | |
strclus = "# %d\n" % nclus | |
for i in range(nclus): | |
ls = clusls[i] | |
chains = set(atomls[j].chain for j in ls) | |
arr2str = lambda arr: " ".join([str(x) for x in arr]) | |
strclus += "%3d %4d %6d | %s\n" % ( | |
i+1, len(chains), len(ls), arr2str(sorted(list(chains)))) | |
# uncomment this line (and comment the above line) | |
# if you want atom information as well | |
#strclus += "%3d %4d %6d | %s | %s\n" % ( | |
# i+1, len(chains), len(ls), arr2str(sorted(list(chains))), arr2str(sorted(ls))) | |
# compute the center of mass | |
# and shift the cluster as a whole | |
# such that the center of mass lies in the box | |
com = [0,0,0] | |
shift = [0,0,0] | |
for d in range(3): | |
com[d] = sum(atomls[j].r[d] for j in ls)/len(ls) | |
# compute the shift | |
shift[d] = 0 | |
while com[d] + shift[d] > box[d]: shift[d] -= box[d] | |
while com[d] + shift[d] < 0: shift[d] += box[d] | |
# apply the shift to all atoms in this cluster | |
for j in ls: atomls[j].r[d] += shift[d] | |
v2s = lambda v: "(" + ", ".join("%8.3f" % x for x in v) + ")" # print a vector nicely | |
print " cluster %3d/%3d, %6d atoms, %4d chains, com %s -> %s" % ( | |
i+1, nclus, len(ls), len(chains), v2s(com), v2s(add(com, shift)) ) | |
if fnclus: | |
open(fnclus, "w").write(strclus) | |
def pdbwrap(fnin, fnout = None): | |
''' wrap the coordinates in fnin ''' | |
# 1. read in coordinates | |
atomls = [] | |
for s in open(fnin).readlines(): | |
if s.strip() == "": continue # skip empty lines | |
a = PDBAtom(s) | |
if allatom or a.atom == "CA": | |
atomls += [ a ] | |
# 2. wrap coordinates | |
wrap(atomls) | |
# 3. output | |
out = [a.write() for a in atomls] | |
if not fnout: fnout = "out." + fnin | |
open(fnout, "w").writelines(out) | |
print "%s --> %s" % (fnin, fnout) | |
if __name__ == "__main__": | |
doargs() | |
pdbwrap(fnin, fnout) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment