Extract a zone around a model from a MRC density file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: UTF8 -*- | |
# Author: Guillaume Bouvier -- guillaume.bouvier@pasteur.fr | |
# https://research.pasteur.fr/en/member/guillaume-bouvier/ | |
# 2019-04-08 16:15:11 (UTC+0200) | |
import sys | |
import __main__ | |
__main__.pymol_argv = ['pymol', '-cq'] # Optionnaly pass options to pymol | |
import pymol | |
stdout = sys.stdout # To get the stdout in the notebook instead of in the pymol console | |
import pymol.cmd as cmd | |
pymol.finish_launching() | |
sys.stdout = stdout | |
import itertools | |
import argparse | |
import numpy | |
import mrcfile | |
from scipy.spatial import cKDTree | |
def read_mrc(mrcfilename): | |
""" | |
Read a mrc file and return the xyz and density values at the given level | |
if given | |
""" | |
print "Reading mrc file ..." | |
xyz = [] | |
with mrcfile.open(mrcfilename) as emd: | |
nx, ny, nz = emd.header['nx'], emd.header['ny'], emd.header['nz'] | |
x0, y0, z0 = emd.header['origin']['x'], emd.header['origin']['y'],\ | |
emd.header['origin']['z'] | |
x, y, z = numpy.float(x0), numpy.float(y0), numpy.float(z0) | |
dx, dy, dz = emd.voxel_size['x'], emd.voxel_size['y'],\ | |
emd.voxel_size['z'] | |
xyz = numpy.meshgrid(numpy.arange(x0, x0+nx*dx, dx), | |
numpy.arange(y0, y0+ny*dy, dy), | |
numpy.arange(z0, z0+nz*dz, dz), | |
indexing='ij') | |
xyz = numpy.asarray(xyz) | |
xyz = xyz.reshape(3, nx*ny*nz) | |
xyz = xyz.T | |
print "done" | |
return xyz, emd.data.flatten(order='F').reshape(nx, ny, nz) | |
def save_density(data, grid_spacing, outfilename, origin=None): | |
""" | |
Save the density to an mrc file. The origin of the grid will be (0,0,0) | |
• outfilename: the mrc file name for the output | |
""" | |
print "Saving mrc file ..." | |
data = data.astype('float32') | |
with mrcfile.new(outfilename, overwrite=True) as mrc: | |
mrc.set_data(data.T) | |
mrc.voxel_size = grid_spacing | |
if origin is not None: | |
mrc.header['origin']['x'] = origin[0] | |
mrc.header['origin']['y'] = origin[1] | |
mrc.header['origin']['z'] = origin[2] | |
mrc.update_header_from_data() | |
mrc.update_header_stats() | |
print "done" | |
def read_pdb(pdbfilename): | |
""" | |
Read the given pdb file | |
""" | |
print "Reading pdb file ..." | |
cmd.load(pdbfilename, 'struct') | |
coords = cmd.get_coords('struct') | |
print "done" | |
return coords | |
def box_cut(emd, grid, coords, radius, padding=7): | |
""" | |
Cut a box around coords | |
""" | |
print "Cropping density ..." | |
nx, ny, nz = emd.shape | |
spacing = numpy.linalg.norm(numpy.diff(grid, axis=0)[0]) | |
xm, ym, zm = coords.min(axis=0) - radius - spacing*padding | |
xM, yM, zM = coords.max(axis=0) + radius + spacing*padding | |
x0, y0, z0 = grid[0] | |
x1, y1, z1 = grid[-1] + spacing | |
X = numpy.arange(x0, x1, spacing) | |
Y = numpy.arange(y0, y1, spacing) | |
Z = numpy.arange(z0, z1, spacing) | |
selx = numpy.logical_and(X>=xm, X<=xM) | |
sely = numpy.logical_and(Y>=ym, Y<=yM) | |
selz = numpy.logical_and(Z>=zm, Z<=zM) | |
indx = numpy.where(selx)[0] | |
indy = numpy.where(sely)[0] | |
indz = numpy.where(selz)[0] | |
i0, i1 = indx[0], indx[-1] | |
j0, j1 = indy[0], indy[-1] | |
k0, k1 = indz[0], indz[-1] | |
emd = emd[i0:i1, j0:j1, k0:k1] | |
grid = grid.reshape((nx, ny, nz, 3)) | |
grid = grid[i0:i1, j0:j1, k0:k1, :].reshape((-1, 3)) | |
print "done" | |
return grid, emd | |
def zone(emd, grid, coords, radius, padding=7): | |
""" | |
• emd: EM density data | |
• grid: em grid | |
• coords: the atomic coordinates of the model | |
• radius: distance threshold in Å | |
• padding: padding (margin) to add to the resulting density | |
""" | |
print "Zone selection ..." | |
ni, nj, nk = emd.shape | |
ravel = lambda x: numpy.ravel_multi_index(x, (ni, nj, nk)) | |
kdtree = cKDTree(grid) | |
invoxels_ = kdtree.query_ball_point(coords, radius) | |
invoxels = [] | |
for l in invoxels_: | |
invoxels.extend(l) | |
invoxels = numpy.unique(invoxels) | |
sel = numpy.zeros(grid.shape[0], dtype=bool) | |
sel[invoxels] = True | |
sel = sel.reshape(emd.shape) | |
emd[~sel] = emd.min() | |
selindex = numpy.asarray(numpy.where(emd > emd.min())) | |
ind_min = selindex.min(axis=1) | |
ind_max = selindex.max(axis=1) | |
ind_min = numpy.maximum(ind_min-padding, 0) | |
ind_max = numpy.minimum(ind_max+padding, emd.shape) | |
i0, j0, k0 = ind_min | |
i1, j1, k1 = ind_max | |
emd = emd[i0:i1, j0:j1, k0:k1] | |
origin = grid[ravel((i0, j0, k0))] | |
print "done" | |
return emd, origin | |
if __name__ == '__main__': | |
PARSER = argparse.ArgumentParser(description=\ | |
'Zone selection of EM density') | |
PARSER.add_argument('--mrc', | |
dest='MRCFILENAME', | |
type=str, | |
help='mrc file name') | |
PARSER.add_argument('--pdb', | |
dest='PDBFILENAME', | |
type=str, | |
help='pdb file name') | |
PARSER.add_argument('--threshold', | |
dest='THRESHOLD', | |
type=float, | |
help='distance threshold in Å') | |
PARSER.add_argument('--out', | |
dest='OUTFILENAME', | |
type=str, | |
help='OUTPUT MRC FILENAME') | |
ARGS = PARSER.parse_args() | |
GRID, EMD = read_mrc(ARGS.MRCFILENAME) | |
COORDS = read_pdb(ARGS.PDBFILENAME) | |
GRID, EMD = box_cut(EMD, GRID, COORDS, ARGS.THRESHOLD) | |
SPACING = numpy.linalg.norm(numpy.diff(GRID, axis=0)[0]) | |
EMD, ORIGIN = zone(EMD, GRID, COORDS, ARGS.THRESHOLD) | |
save_density(EMD, SPACING, ARGS.OUTFILENAME, origin=ORIGIN) |