There are several reasons why you might want to modify a gsd trajectory. A common one is for backup storage reasons, or for visualization reasons. Sometimes it can also be necessary to modify snapshots between hoomd-blue scripts, e.g. adding/removing particles, changing particle types, or changing box-volumes,

Get the a single snapshot from gsd

One common use case is if you would like to backup your data or clean up your project and you have written a lot equilibration snapshots into your trajectories and did not write a separate restart gsd file. In order to be able to delete the large equilibration gsd file, you'd need to extract a frame (most commonly the last frame) and save it in a new, separate gsd file.

#!/usr/bin/env python3
import numpy as np
import sys
import gsd
from gsd.fl import GSDFile
import gsd.hoomd
import argparse
 
 
parser = argparse.ArgumentParser(description='Get a snapshot from a gsd trajectory')
 
non_opt = parser.add_argument_group('mandatory arguments')
 
non_opt.add_argument('-i','--input', metavar="<gsd>", type=str,dest='input_file', required=True,
    help="gsd file to extract from")
 
non_opt.add_argument('-o','--out', metavar='<gsd>', dest='output_file', required=True,
                   help='Name for the output gsd for saving the extracted snapshot')
 
parser.add_argument('-t','--timestep', metavar='<int>', dest='timestep', type=int,required=False,
                   help='timestep to extract (if not specified, the last snapshot is written out)')
args = parser.parse_args()
 
output_file=args.output_file
input_file=args.input_file
if input_file==output_file:
    parser.error("Input and output file cannot be the SAME file! %s"%(output_file))
 
timestep =args.timestep
print("Creating restart snapshot from gsd trajectory.")
in_file=gsd.fl.GSDFile(name=input_file, mode='rb')
trajectory  = gsd.hoomd.HOOMDTrajectory(in_file)
gsd.fl.create(name=output_file, application='gsd.hoomd ' + gsd.__version__, schema='hoomd', schema_version=[1,1]);
 
all_timesteps = np.linspace(trajectory[0].configuration.step,\
                            trajectory[-1].configuration.step,\
                            len(trajectory),endpoint=True,dtype=int)
 
if timestep==None:
    timestep = trajectory[-1].configuration.step
 
if timestep not in all_timesteps:
    print("ERROR: timestep %d not found in trajectory %s"%(timestep,input_file))
    np.set_printoptions(threshold=6)
    print("timesteps in trajectory: ",all_timesteps)
    exit(1)
else:
    id = int(np.where(all_timesteps == timestep)[0])
 
with gsd.fl.GSDFile(output_file, 'wb') as f:
        traj = gsd.hoomd.HOOMDTrajectory(f)
        traj.append(trajectory[id])
        sys.stdout.write("Frame extracted from %s to %s, timestep : %3d "%(input_file,output_file,trajectory[id].configuration.step))
        sys.stdout.flush()
print("\nDONE")

Trim snapshots from gsd

Similarly to the section above, you might have run a simulation with too many snapshots, and you wish to only keep some of them. This can be useful for trimming down from the beginning or end of the trajectory, as well as keeping every n-th frame only. All of this can reduce the file size sbstantially.

#!/usr/bin/env python3
import numpy as np
import sys, re, os, gsd
import gsd.hoomd
import argparse
import math
 
def convert_size(size_bytes):
    ''' small helper function to convert bytes to human readable format '''
    if size_bytes == 0:
        return "0 B"
    size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
    i = int(math.floor(math.log(size_bytes, 1024)))
    p = math.pow(1024, i)
    s = round(size_bytes / p, 2)
    return "%s %s" % (s, size_name[i])
 
############################### PARSE ##################################
parser = argparse.ArgumentParser(description='Selectively delete frames from a gsd-file')
 
non_opt = parser.add_argument_group('mandatory arguments')
 
non_opt.add_argument('-o','--out',\
                metavar='<gsd-file>', dest='output_file',type=str,\
                required=False,help='Name for the thinned out gsd file (not needed if --info is used)')
 
non_opt.add_argument('-i','--in',\
                metavar='<gsd-file>', dest='input_file',type=str,\
                required=True,help='input trajectory *.gsd file')
 
 
parser.add_argument('--info', dest='info', action='store_true',\
                    help=" Only print file size information and potential period values and exit without writing a new file")
 
parser.add_argument('-p','--p', metavar='<int>', type=int, default=1, required=False,dest='period',
    help='Period of frames to take, -p 10 means take every 10th frame, DEFAULT=1/every')
 
parser.add_argument('-s','--skip',metavar='<int>', dest='skip',type=int,required=False,default=0,
    help='Number of frames to skip at the beginning (positive s) or take from the end (negative s) of the trajectory. DEFAULT=0/no skip')
 
parser.add_argument('-l','--list',\
                metavar='<ints>', dest='to_del', nargs='+', type=int,\
                required=False,help='frame numbers to delete, DEFAULT=none')
 
 
args = parser.parse_args()
input_file = args.input_file
to_del = args.to_del
print(to_del)
 
if not os.path.isfile(input_file):
    print("ERROR: file does not exist! %s"%(input_file))
    exit(1)
if args.info==True:
    # print some info about the file and exit without writing a new file
    print("Only printing information about gsd file: ", input_file)
    trajectory = gsd.hoomd.open(name=input_file, mode='rb')
    print("Total number of frames: ", len(trajectory))
    if len(trajectory)==0:
        print("ERROR: trajectory does not seem to contain any snapshots, check file")
        exit(1)
    # more info about file content
    bytes = os.path.getsize(input_file)
    print("Total file size:",convert_size(bytes))
    s_per_snap = (bytes/len(trajectory))
    print("Size per snapshot", convert_size(s_per_snap))
    print("potential values for p:")
    print("period  frames   size")
    print("--------------------------")
    for p in [2,3,5,10]:
        slice = trajectory[::p]
        new_s_per_snap = s_per_snap*len(slice)
        print(p,"    \t",len(slice),"\t",convert_size(new_s_per_snap))
    exit(0)
 
if args.info==False and (args.input_file is None or args.output_file is None):
    parser.error("Output file is required, specifiy -o/--out <file> or use --info")
 
output_file = args.output_file
if input_file==output_file:
    parser.error("Input and output file cannot be the SAME file! %s"%(output_file))
period = args.period
if period<1:
    parser.error("Period needs to be >= 1! Current value %s"%period)
skip = args.skip
 
trajectory = gsd.hoomd.open(name=input_file, mode='rb')
N_frames = len(trajectory)
# figure out which frames to analyze
if np.abs(skip)>=N_frames:
    print("ERROR: trajectory only has %d frames, skip=%d is too large!"%(N_frames,skip))
    exit(1)
if skip>=0:
    frames_to_write=np.arange(skip,N_frames,1).astype(int)
else:
    frames_to_write=np.arange(N_frames+skip,N_frames,1).astype(int)
 
frames_to_write=frames_to_write[::period]
 
# Return the unique values in frames_to_write that are not in to_del.
frames_to_write = np.setdiff1d(frames_to_write,to_del)
 
if len(frames_to_write)<=0:
    print("ERROR: frames to write seems to be <=0! %s "%(frames_to_write))
    exit(1)
print("total number of frames ",N_frames)
print("writing frames ",frames_to_write[0],"...",frames_to_write[-1])
print("writing every nth frame: ",period)
 
gsd.fl.create(name=output_file, application='gsd.hoomd ' + gsd.__version__, schema='hoomd', schema_version=[1,1])
 
t =  gsd.hoomd.open(name=output_file, mode='wb+')
 
# Iterate over all frames of the sequence.
for i in frames_to_write:
    sys.stdout.write("\rCurrent frame %3d/%3d"%(i,N_frames))
    sys.stdout.flush()
    t.append(trajectory[int(i)])
 
 
sys.stdout.write("\nDONE\n")
sys.stdout.flush()

Modify velocity components to contain some property

The configuration viewer vmd (as well as others) only reads in limited information from each frame in a trajectory, but it does read positions and velocities, It can be quite difficult (but possible) to compute properties of particles with the build in Tcl language, so you might want to use Python and python packages instead. A commonly used trick for this is to use Python to calculate a per-particle property of some kind and then write its value into a velocity component. This way, you can see/color the snapshots by this property using color gradients and build in options intended for velocities.

This code uses the Python package freud to calculate the Steinhardt order parameters q₆ and q₆ and writes them into the velocity components. This script can be easily adapted to color according to clusters, number of nearest neighbors, or any other single-particle property.

#!/usr/bin/env python3
import numpy as np
import sys, re, os, gsd
import gsd.hoomd
import argparse
import math
import freud
 
############################### PARSE ##################################
parser = argparse.ArgumentParser(description='Calculate av q_6,q_4 per particle from gsd-file')
 
non_opt = parser.add_argument_group('mandatory arguments')
 
non_opt.add_argument('-o','--out',\
                metavar='<gsd-file>', dest='output_file',type=str,\
                required=False,help='Name for the  out .gsd file with q_6 and q_4 in the v_x,v_y components')
 
non_opt.add_argument('-i','--in',\
                metavar='<gsd-file>', dest='input_file',type=str,\
                required=True,help='input trajectory *.gsd file')
 
args = parser.parse_args()
input_file = args.input_file
output_file = args.output_file
if input_file==output_file:
    parser.error("Input and output file cannot be the SAME file! %s"%(output_file))
 
# read gsd file
in_file = gsd.fl.GSDFile(name=input_file, mode='rb')
in_trajectory  = gsd.hoomd.HOOMDTrajectory(in_file)
N_frames = int(in_file.nframes)
 
# output gsd file
t = gsd.hoomd.open(name=output_file, mode='wb')
 
for i,frame in enumerate(in_trajectory):
    # print out current frame, calculation takes quite long, so good for checking
    # you can open files in ovito even while they are being written, and reload
    # from time to time
    sys.stdout.write("\rCurrent frame %3d/%3d"%(i,N_frames))
    sys.stdout.flush()
 
    # calculate averaged q_6, q_4
    pos = frame.particles.position
    box = freud.box.Box.from_box(frame.configuration.box[0:3])
    system = (box, pos)
 
    # cutoff of 1.5 - first minimum in the pair correlation function g(r), so
    # should get all nearest neighbours
    neigh = dict(r_max=1.5)
 
    ql_6 = freud.order.Steinhardt(6,average=True)
    ql_6_sc = ql_6.compute(system=system,neighbors=neigh).particle_order
 
    ql_4 = freud.order.Steinhardt(4,average=True)
    ql_4_sc = ql_4.compute(system=system,neighbors=neigh).particle_order
 
    # make a new snapshot
    snapsot_new = gsd.hoomd.Snapshot()
    # copy information from old gsd frame into the new one
    snapsot_new = frame
    # put (q6,q4,0) into velocity 
    vel_new = np.vstack([ql_6_sc,ql_4_sc,np.zeros(len(pos))]).T
 
    snapsot_new.particles.velocity = vel_new
 
    # append the new snapshot to the output gsd file
    t.append(snapsot_new)
 
sys.stdout.write("\nDONE\n")

Convert configuration files to different formats

lammps dump --> gsd

This only reads the last configuration and writes it to a gsd file. It's easily modified to convert the whole trajectory by writing gsd snapshots inside the while-loop instead of after it. Lammps dump files contain no bond information, so that is lost. One can implement a bond-generation algorithm (based on other information or distance) in this as well, but that will depend on the simulation/situation. Particle names for the types are invented starting from capital letters, e.g. A=0, B=1, C=2,…

import numpy as np
import gsd.hoomd
import argparse
import string
 
parser = argparse.ArgumentParser(description='Get last snapshot from a dump trajectory')
 
non_opt = parser.add_argument_group('mandatory arguments')
 
non_opt.add_argument('-i','--input', metavar="<dump>", type=str,dest='input_file', required=True,
    help="gsd file to extract from")
 
non_opt.add_argument('-o','--out', metavar='<gsd>', dest='output_file', required=True,
                   help='Name for the output gsd for saving the last snapshot')
 
args = parser.parse_args()
input_file = args.input_file
output_file = args.output_file
 
print("Creating snapshot from gsd trajectory:")
t =  gsd.hoomd.open(name=output_file, mode='wb')
print("Input:",input_file)
print("Output:",output_file)
 
 
with open(input_file) as f:
    while True:
        line=f.readline()
        if line.strip() == "ITEM: TIMESTEP":
            nexline=f.readline()
            timestep = int(nexline.strip())
            coords=[]
            read = False
        if "ITEM: NUMBER OF ATOMS" in line:
            nexline=f.readline()
            num =int(nexline.strip())
        if "ITEM: BOX" in line:
            lx=float(f.readline().strip().split()[1])*2
            ly=float(f.readline().strip().split()[1])*2
            lz=float(f.readline().strip().split()[1])*2
        if "ITEM: ATOM" in line:
            for i in range(num):
                nextline=f.readline()
                coords.append(nextline.strip().split())
            read = True
        if read == True:
            box=np.array([lx,ly,lz])
            coords=np.asarray(coords)
        if not line: break
 
 
 
 
type_ids=(coords.T[-4:-3].T).astype(np.int)
positions=(coords.T[-3:].T).astype(np.float)
 
# invent particle type names
u = np.unique(type_ids)
letter_list = [string.ascii_uppercase[a] for a in u]
 
# make a new gsd frame
s1 = gsd.hoomd.Snapshot()
s1.configuration.step=timestep
s1.configuration.box=np.array([box[0],box[1],box[2],0,0,0])
s1.configuration.dimensions=3
s1.particles.types=letter_list
 
# if bonds exists or can be generated: 
# s1.bonds.N=len(bonds)
# s1.bonds.types=['bond']
# s1.bonds.typeid=np.zeros(len(bonds))
# s1.bonds.group =bonds
 
s1.particles.N=len(positions)
s1.particles.typeid=type_ids
s1.particles.position=positions
#s1.particles.velocity = np.zeros_like(positions) #if velocity exists, it can be written here
#s1.particles.image = np.zeros_like(positions)
 
t.append(s1)
print("DONE")

gsd --> lammps dump

This converts the last configuration in a gsd file into a lammps dump file. It's easily modified to take any snapshot by changing the index for the frame or by looping over the gsd trajectory. There is no bond info in the lammps file, so that info is lost. Depending on the details the particle id might need to be shifted by +1 (hoomd starts counting from 0 , lammps starts from 1).

import numpy as np
import gsd.hoomd
import argparse
 
parser = argparse.ArgumentParser(description='Get last snapshot from a gsd trajectory')
 
non_opt = parser.add_argument_group('mandatory arguments')
 
non_opt.add_argument('-i','--input', metavar="<gsd>", type=str,dest='input_file', required=True,
    help="gsd file to extract from")
 
non_opt.add_argument('-o','--out', metavar='<dump>', dest='output_file', required=True,
                   help='Name for the output dump for saving the last snapshot')
 
args = parser.parse_args()
input_file = args.input_file
output_file = args.output_file
 
print("Creating snapshot from gsd trajectory:")
in_file = gsd.fl.GSDFile(name=input_file, mode='rb', application='gsd.hoomd ' + gsd.__version__, schema='hoomd', schema_version=[1,4])
print("Input:",input_file)
trajectory  = gsd.hoomd.HOOMDTrajectory(in_file)
print("Output:",output_file)
 
fout = open(output_file,'w')
frame = trajectory[-1] # take last snapshot
 
timestep = frame.configuration.step
Box = frame.configuration.box[0:3]
 
fout.write("ITEM: TIMESTEP\n")
fout.write("%d\n"%timestep)
fout.write("ITEM: NUMBER OF ATOMS\n")
fout.write("%d\n"%len(frame.particles.position))
fout.write("ITEM: BOX BOUNDS xy xz yz pp pp pp\n")
fout.write("%f %f 0.0\n"%(-Box[0]/2.0,Box[0]/2.0))
fout.write("%f %f 0.0\n"%(-Box[1]/2.0,Box[1]/2.0))
fout.write("%f %f 0.0\n"%(-Box[2]/2.0,Box[2]/2.0))
fout.write("ITEM: ATOMS id molid x y z\n")
 
for i,p in enumerate(frame.particles.position):
    fout.write("%d %d %f %f %f\n"%(i,frame.particles.typeid[i],p[0],p[1],p[2]))
 
print("DONE")

StattLabWiki

Table of Contents