Table of Contents
There are several reasons why you might want to modify a gsd trajectory. A common one is for backup storage reasons, or for visualization reasons. Sometimes it can also be necessary to modify snapshots between hoomd-blue scripts, e.g. adding/removing particles, changing particle types, or changing box-volumes,
Get the a single snapshot from gsd
One common use case is if you would like to backup your data or clean up your project and you have written a lot equilibration snapshots into your trajectories and did not write a separate restart gsd file. In order to be able to delete the large equilibration gsd file, you'd need to extract a frame (most commonly the last frame) and save it in a new, separate gsd file.
#!/usr/bin/env python3 import numpy as np import sys import gsd from gsd.fl import GSDFile import gsd.hoomd import argparse parser = argparse.ArgumentParser(description='Get a snapshot from a gsd trajectory') non_opt = parser.add_argument_group('mandatory arguments') non_opt.add_argument('-i','--input', metavar="<gsd>", type=str,dest='input_file', required=True, help="gsd file to extract from") non_opt.add_argument('-o','--out', metavar='<gsd>', dest='output_file', required=True, help='Name for the output gsd for saving the extracted snapshot') parser.add_argument('-t','--timestep', metavar='<int>', dest='timestep', type=int,required=False, help='timestep to extract (if not specified, the last snapshot is written out)') args = parser.parse_args() output_file=args.output_file input_file=args.input_file if input_file==output_file: parser.error("Input and output file cannot be the SAME file! %s"%(output_file)) timestep =args.timestep print("Creating restart snapshot from gsd trajectory.") in_file=gsd.fl.GSDFile(name=input_file, mode='rb') trajectory = gsd.hoomd.HOOMDTrajectory(in_file) gsd.fl.create(name=output_file, application='gsd.hoomd ' + gsd.__version__, schema='hoomd', schema_version=[1,1]); all_timesteps = np.linspace(trajectory[0].configuration.step,\ trajectory[-1].configuration.step,\ len(trajectory),endpoint=True,dtype=int) if timestep==None: timestep = trajectory[-1].configuration.step if timestep not in all_timesteps: print("ERROR: timestep %d not found in trajectory %s"%(timestep,input_file)) np.set_printoptions(threshold=6) print("timesteps in trajectory: ",all_timesteps) exit(1) else: id = int(np.where(all_timesteps == timestep)[0]) with gsd.fl.GSDFile(output_file, 'wb') as f: traj = gsd.hoomd.HOOMDTrajectory(f) traj.append(trajectory[id]) sys.stdout.write("Frame extracted from %s to %s, timestep : %3d "%(input_file,output_file,trajectory[id].configuration.step)) sys.stdout.flush() print("\nDONE")
Trim snapshots from gsd
Similarly to the section above, you might have run a simulation with too many snapshots, and you wish to only keep some of them. This can be useful for trimming down from the beginning or end of the trajectory, as well as keeping every n-th frame only. All of this can reduce the file size sbstantially.
#!/usr/bin/env python3 import numpy as np import sys, re, os, gsd import gsd.hoomd import argparse import math def convert_size(size_bytes): ''' small helper function to convert bytes to human readable format ''' if size_bytes == 0: return "0 B" size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB") i = int(math.floor(math.log(size_bytes, 1024))) p = math.pow(1024, i) s = round(size_bytes / p, 2) return "%s %s" % (s, size_name[i]) ############################### PARSE ################################## parser = argparse.ArgumentParser(description='Selectively delete frames from a gsd-file') non_opt = parser.add_argument_group('mandatory arguments') non_opt.add_argument('-o','--out',\ metavar='<gsd-file>', dest='output_file',type=str,\ required=False,help='Name for the thinned out gsd file (not needed if --info is used)') non_opt.add_argument('-i','--in',\ metavar='<gsd-file>', dest='input_file',type=str,\ required=True,help='input trajectory *.gsd file') parser.add_argument('--info', dest='info', action='store_true',\ help=" Only print file size information and potential period values and exit without writing a new file") parser.add_argument('-p','--p', metavar='<int>', type=int, default=1, required=False,dest='period', help='Period of frames to take, -p 10 means take every 10th frame, DEFAULT=1/every') parser.add_argument('-s','--skip',metavar='<int>', dest='skip',type=int,required=False,default=0, help='Number of frames to skip at the beginning (positive s) or take from the end (negative s) of the trajectory. DEFAULT=0/no skip') parser.add_argument('-l','--list',\ metavar='<ints>', dest='to_del', nargs='+', type=int,\ required=False,help='frame numbers to delete, DEFAULT=none') args = parser.parse_args() input_file = args.input_file to_del = args.to_del print(to_del) if not os.path.isfile(input_file): print("ERROR: file does not exist! %s"%(input_file)) exit(1) if args.info==True: # print some info about the file and exit without writing a new file print("Only printing information about gsd file: ", input_file) trajectory = gsd.hoomd.open(name=input_file, mode='rb') print("Total number of frames: ", len(trajectory)) if len(trajectory)==0: print("ERROR: trajectory does not seem to contain any snapshots, check file") exit(1) # more info about file content bytes = os.path.getsize(input_file) print("Total file size:",convert_size(bytes)) s_per_snap = (bytes/len(trajectory)) print("Size per snapshot", convert_size(s_per_snap)) print("potential values for p:") print("period frames size") print("--------------------------") for p in [2,3,5,10]: slice = trajectory[::p] new_s_per_snap = s_per_snap*len(slice) print(p," \t",len(slice),"\t",convert_size(new_s_per_snap)) exit(0) if args.info==False and (args.input_file is None or args.output_file is None): parser.error("Output file is required, specifiy -o/--out <file> or use --info") output_file = args.output_file if input_file==output_file: parser.error("Input and output file cannot be the SAME file! %s"%(output_file)) period = args.period if period<1: parser.error("Period needs to be >= 1! Current value %s"%period) skip = args.skip trajectory = gsd.hoomd.open(name=input_file, mode='rb') N_frames = len(trajectory) # figure out which frames to analyze if np.abs(skip)>=N_frames: print("ERROR: trajectory only has %d frames, skip=%d is too large!"%(N_frames,skip)) exit(1) if skip>=0: frames_to_write=np.arange(skip,N_frames,1).astype(int) else: frames_to_write=np.arange(N_frames+skip,N_frames,1).astype(int) frames_to_write=frames_to_write[::period] # Return the unique values in frames_to_write that are not in to_del. frames_to_write = np.setdiff1d(frames_to_write,to_del) if len(frames_to_write)<=0: print("ERROR: frames to write seems to be <=0! %s "%(frames_to_write)) exit(1) print("total number of frames ",N_frames) print("writing frames ",frames_to_write[0],"...",frames_to_write[-1]) print("writing every nth frame: ",period) gsd.fl.create(name=output_file, application='gsd.hoomd ' + gsd.__version__, schema='hoomd', schema_version=[1,1]) t = gsd.hoomd.open(name=output_file, mode='wb+') # Iterate over all frames of the sequence. for i in frames_to_write: sys.stdout.write("\rCurrent frame %3d/%3d"%(i,N_frames)) sys.stdout.flush() t.append(trajectory[int(i)]) sys.stdout.write("\nDONE\n") sys.stdout.flush()
Modify velocity components to contain some property
The configuration viewer vmd (as well as others) only reads in limited information from each frame in a trajectory, but it does read positions and velocities, It can be quite difficult (but possible) to compute properties of particles with the build in Tcl language, so you might want to use Python and python packages instead. A commonly used trick for this is to use Python to calculate a per-particle property of some kind and then write its value into a velocity component. This way, you can see/color the snapshots by this property using color gradients and build in options intended for velocities.
This code uses the Python package freud to calculate the Steinhardt order parameters q6 and q6 and writes them into the velocity components. This script can be easily adapted to color according to clusters, number of nearest neighbors, or any other single-particle property.
#!/usr/bin/env python3 import numpy as np import sys, re, os, gsd import gsd.hoomd import argparse import math import freud ############################### PARSE ################################## parser = argparse.ArgumentParser(description='Calculate av q_6,q_4 per particle from gsd-file') non_opt = parser.add_argument_group('mandatory arguments') non_opt.add_argument('-o','--out',\ metavar='<gsd-file>', dest='output_file',type=str,\ required=False,help='Name for the out .gsd file with q_6 and q_4 in the v_x,v_y components') non_opt.add_argument('-i','--in',\ metavar='<gsd-file>', dest='input_file',type=str,\ required=True,help='input trajectory *.gsd file') args = parser.parse_args() input_file = args.input_file output_file = args.output_file if input_file==output_file: parser.error("Input and output file cannot be the SAME file! %s"%(output_file)) # read gsd file in_file = gsd.fl.GSDFile(name=input_file, mode='rb') in_trajectory = gsd.hoomd.HOOMDTrajectory(in_file) N_frames = int(in_file.nframes) # output gsd file t = gsd.hoomd.open(name=output_file, mode='wb') for i,frame in enumerate(in_trajectory): # print out current frame, calculation takes quite long, so good for checking # you can open files in ovito even while they are being written, and reload # from time to time sys.stdout.write("\rCurrent frame %3d/%3d"%(i,N_frames)) sys.stdout.flush() # calculate averaged q_6, q_4 pos = frame.particles.position box = freud.box.Box.from_box(frame.configuration.box[0:3]) system = (box, pos) # cutoff of 1.5 - first minimum in the pair correlation function g(r), so # should get all nearest neighbours neigh = dict(r_max=1.5) ql_6 = freud.order.Steinhardt(6,average=True) ql_6_sc = ql_6.compute(system=system,neighbors=neigh).particle_order ql_4 = freud.order.Steinhardt(4,average=True) ql_4_sc = ql_4.compute(system=system,neighbors=neigh).particle_order # make a new snapshot snapsot_new = gsd.hoomd.Snapshot() # copy information from old gsd frame into the new one snapsot_new = frame # put (q6,q4,0) into velocity vel_new = np.vstack([ql_6_sc,ql_4_sc,np.zeros(len(pos))]).T snapsot_new.particles.velocity = vel_new # append the new snapshot to the output gsd file t.append(snapsot_new) sys.stdout.write("\nDONE\n")
Convert configuration files to different formats
lammps dump --> gsd
This only reads the last configuration and writes it to a gsd file. It's easily modified to convert the whole trajectory by writing gsd snapshots inside the while-loop instead of after it. Lammps dump files contain no bond information, so that is lost. One can implement a bond-generation algorithm (based on other information or distance) in this as well, but that will depend on the simulation/situation. Particle names for the types are invented starting from capital letters, e.g. A=0, B=1, C=2,…
import numpy as np import gsd.hoomd import argparse import string parser = argparse.ArgumentParser(description='Get last snapshot from a dump trajectory') non_opt = parser.add_argument_group('mandatory arguments') non_opt.add_argument('-i','--input', metavar="<dump>", type=str,dest='input_file', required=True, help="gsd file to extract from") non_opt.add_argument('-o','--out', metavar='<gsd>', dest='output_file', required=True, help='Name for the output gsd for saving the last snapshot') args = parser.parse_args() input_file = args.input_file output_file = args.output_file print("Creating snapshot from gsd trajectory:") t = gsd.hoomd.open(name=output_file, mode='wb') print("Input:",input_file) print("Output:",output_file) with open(input_file) as f: while True: line=f.readline() if line.strip() == "ITEM: TIMESTEP": nexline=f.readline() timestep = int(nexline.strip()) coords=[] read = False if "ITEM: NUMBER OF ATOMS" in line: nexline=f.readline() num =int(nexline.strip()) if "ITEM: BOX" in line: lx=float(f.readline().strip().split()[1])*2 ly=float(f.readline().strip().split()[1])*2 lz=float(f.readline().strip().split()[1])*2 if "ITEM: ATOM" in line: for i in range(num): nextline=f.readline() coords.append(nextline.strip().split()) read = True if read == True: box=np.array([lx,ly,lz]) coords=np.asarray(coords) if not line: break type_ids=(coords.T[-4:-3].T).astype(np.int) positions=(coords.T[-3:].T).astype(np.float) # invent particle type names u = np.unique(type_ids) letter_list = [string.ascii_uppercase[a] for a in u] # make a new gsd frame s1 = gsd.hoomd.Snapshot() s1.configuration.step=timestep s1.configuration.box=np.array([box[0],box[1],box[2],0,0,0]) s1.configuration.dimensions=3 s1.particles.types=letter_list # if bonds exists or can be generated: # s1.bonds.N=len(bonds) # s1.bonds.types=['bond'] # s1.bonds.typeid=np.zeros(len(bonds)) # s1.bonds.group =bonds s1.particles.N=len(positions) s1.particles.typeid=type_ids s1.particles.position=positions #s1.particles.velocity = np.zeros_like(positions) #if velocity exists, it can be written here #s1.particles.image = np.zeros_like(positions) t.append(s1) print("DONE")
gsd --> lammps dump
This converts the last configuration in a gsd file into a lammps dump file. It's easily modified to take any snapshot by changing the index for the frame or by looping over the gsd trajectory. There is no bond info in the lammps file, so that info is lost. Depending on the details the particle id might need to be shifted by +1 (hoomd starts counting from 0 , lammps starts from 1).
import numpy as np import gsd.hoomd import argparse parser = argparse.ArgumentParser(description='Get last snapshot from a gsd trajectory') non_opt = parser.add_argument_group('mandatory arguments') non_opt.add_argument('-i','--input', metavar="<gsd>", type=str,dest='input_file', required=True, help="gsd file to extract from") non_opt.add_argument('-o','--out', metavar='<dump>', dest='output_file', required=True, help='Name for the output dump for saving the last snapshot') args = parser.parse_args() input_file = args.input_file output_file = args.output_file print("Creating snapshot from gsd trajectory:") in_file = gsd.fl.GSDFile(name=input_file, mode='rb', application='gsd.hoomd ' + gsd.__version__, schema='hoomd', schema_version=[1,4]) print("Input:",input_file) trajectory = gsd.hoomd.HOOMDTrajectory(in_file) print("Output:",output_file) fout = open(output_file,'w') frame = trajectory[-1] # take last snapshot timestep = frame.configuration.step Box = frame.configuration.box[0:3] fout.write("ITEM: TIMESTEP\n") fout.write("%d\n"%timestep) fout.write("ITEM: NUMBER OF ATOMS\n") fout.write("%d\n"%len(frame.particles.position)) fout.write("ITEM: BOX BOUNDS xy xz yz pp pp pp\n") fout.write("%f %f 0.0\n"%(-Box[0]/2.0,Box[0]/2.0)) fout.write("%f %f 0.0\n"%(-Box[1]/2.0,Box[1]/2.0)) fout.write("%f %f 0.0\n"%(-Box[2]/2.0,Box[2]/2.0)) fout.write("ITEM: ATOMS id molid x y z\n") for i,p in enumerate(frame.particles.position): fout.write("%d %d %f %f %f\n"%(i,frame.particles.typeid[i],p[0],p[1],p[2])) print("DONE")