Source code for waterEntropy.recipes.interfacial_solvent

"""
These functions calculate orientational entropy from labelled
coordination shells
"""

import logging
import time

from dask.distributed import Client

import waterEntropy.analysis.HB as HBond
import waterEntropy.analysis.HB_labels as HBLabels
import waterEntropy.analysis.RAD as RADShell
import waterEntropy.analysis.shell_labels as RADLabels
from waterEntropy.analysis.shells import ShellCollection
from waterEntropy.entropy.convariances import CovarianceCollection
from waterEntropy.entropy.orientations import Orientations
from waterEntropy.entropy.vibrations import Vibrations
import waterEntropy.maths.trig as Trig
from waterEntropy.recipes.forces_torques import get_forces_torques
from waterEntropy.utils.helpers import nested_dict
import waterEntropy.utils.selections as Selections


[docs] def find_interfacial_solvent(solutes, system, shells: ShellCollection): # pylint: disable=too-many-locals """ For a given set of solute molecules, find the RAD shells for each UA in the molecules, if a solvent molecule is in the RAD shell, then save the solvent atom index to a list. A solvent is defined as molecule that constitutes a single UA. These solvent molecule are defined as interfacial molecules. :param solutes: mdanalysis instance of a selection of atoms in solute molecules that are greater than one UA :param system: mdanalysis instance of atoms in a frame :param shells: ShellCollection instance """ solvent_indices = [] molecules = solutes.fragments # fragments is mdanalysis equiv to molecules for molecule in molecules: # 1. find heavy atoms in the molecule UAs = Selections.find_molecule_UAs(molecule) for atom in UAs: # 2. DON'T automatically look for RAD shell as this is slow, # instead, check if waters are in the distance array before using # RAD. Instead, find distances of neighbours first. sorted_indices, sorted_distances = Trig.get_sorted_neighbours( atom.index, system ) sorted_atoms = Selections.get_selection( system, "index", sorted_indices[:20] ) sorted_waters = sorted_atoms.select_atoms("water") # 2b. Only get RAD shells if there is a water in the closest X # neighbours if len(sorted_waters) > 0: # 3. find the shell of each UA atom in a molecule shell = RADShell.get_RAD_shell( atom, system, shells, sorted_indices, sorted_distances ) # get the molecule UA shell shell_indices = shell.UA_shell # 4. for each neighbour in the RAD shell, find single UA molecules shell_atoms = Selections.get_selection(system, "index", shell_indices) waters = shell_atoms.select_atoms("water") solvent_indices.extend(waters.indices) return list(set(solvent_indices))
[docs] def save_solvent_indices( frame: int, atom_idx: int, nearest_resid: int, nearest_resname: str, frame_solvent_indices: dict, ): """ Save the solvent indices at interfaces per frame into a dictionary :param frame: frame number of analysed frame :param atom_idx: solvent atom index :param nearest_resid: residue of number of nearest solute molecule :param nearest_resname: residue name of nearest solute molecule :param frame_solvent_indices: the dictionary to populate """ if nearest_resid not in frame_solvent_indices[frame][nearest_resname]: frame_solvent_indices[frame][nearest_resname][nearest_resid] = [] frame_solvent_indices[frame][nearest_resname][nearest_resid].append(atom_idx) return frame_solvent_indices
[docs] def get_interfacial_shells(system, start: int, end: int, step: int): # pylint: disable=too-many-locals """ For a given system, containing the topology and coordinates of molecules, find the interfacial water molecules around solutes and calculate their orientational entropy if there is a solute atom in the solvent coordination shell. :param system: mdanalysis instance of atoms in a frame :param start: starting frame number :param end: end frame number :param step: steps between frames """ # don't need to include the frame_solvent_indices dictionary frame_solvent_shells = nested_dict() # pylint: disable=unused-variable for ts in system.trajectory[start:end:step]: # initialise the RAD and HB class instances to store shell information shells = ShellCollection() # 1. find > 1 UA molecules in system, these are the solutes resid_list = Selections.find_solute_molecules(system) solutes = Selections.get_selection(system, "resid", resid_list) # 2. find the interfacial solvent molecules that are 1 UA in size # and are in the RAD shell of any solute solvent_indices = find_interfacial_solvent(solutes, system, shells) first_shell_solvent = Selections.get_selection(system, "index", solvent_indices) # 3. iterate through first shell solvent and find their RAD shells, # HBing in the shells and shell labels for solvent in first_shell_solvent: # 3a. find RAD shell of interfacial solvent shell = RADShell.get_RAD_shell(solvent, system, shells) shell.nearest_nonlike_idx = RADLabels.get_nearest_nonlike(shell, system) if shell.nearest_nonlike_idx is not None: # 3b. populate the shells into a dictionary for stats # only if a different atom is in the RAD shell nearest = system.atoms[shell.nearest_nonlike_idx] frame_solvent_shells = save_solvent_shells( ts.frame, shell.atom_idx, shell.UA_shell, frame_solvent_shells, ) return frame_solvent_shells
[docs] def save_solvent_shells( frame: int, atom_idx: int, shell_indices: list, frame_solvent_shells: dict, ): """ Save the solvent indices at interfaces per frame into a dictionary :param frame: frame number of analysed frame :param atom_idx: solvent atom index :param nearest_resid: residue of number of nearest solute molecule :param nearest_resname: residue name of nearest solute molecule :param frame_solvent_indices: the dictionary to populate """ if atom_idx not in frame_solvent_shells[frame]: frame_solvent_shells[frame][atom_idx] = shell_indices return frame_solvent_shells
[docs] def get_interfacial_water_orient_entropy( system, start: int, end: int, step: int, temperature=298, parallel=False, client=None, ): # pylint: disable=E1121 # pylint: disable=R0913 # pylint: disable=too-many-locals """ For a given system, containing the topology and coordinates of molecules, find the interfacial water molecules around solutes and calculate their orientational entropy if there is a solute atom in the solvent coordination shell. This method is a common calling function for the serial and parallel implementation and should be the calling point into those functions. Defaults to serial. :param system: mdanalysis instance of atoms in a frame :param start: starting frame number :param end: end frame number :param step: steps between frames :param parallel: set to True to run the parallel calculation :param client: dask client instance if custom cluster layout required """ # Initialise data structures to hold processed data. frame_solvent_indices = nested_dict() covariances = CovarianceCollection() hb_labels = HBLabels.HBLabelCollection() n_frames = 0 results = [] # Steps 1,2 and 3 in function calls. indices = list(range(start, end, step)) if parallel is True: # If no Dask client cluster has been setup externally then default to single host. if client is None: client = Client( processes=True, threads_per_worker=1, silence_logs=logging.CRITICAL ) # parallelise over frames by packing them and vars into batches and mapping # them across dask workers. batch_size = client.scheduler_info()["n_workers"] batches = [ indices[i : i + batch_size] for i in range(0, len(indices), batch_size) ] for i, batch in enumerate(batches): args = [(index, system) for index in batch] futures = client.map(_entropy_per_step, args) results.extend(client.gather(futures)) if i < len(batches) - 1: client.restart() time.sleep(2) # Otherwise we are on HPC and we can run without the memory fixes. else: args = [(index, system) for index in indices] futures = client.map(_entropy_per_step, args) results.extend(client.gather(futures)) client.close() else: for index in indices: results.append(_entropy_per_step((index, system))) # merge the solvent indices dicts for res in results: frame_solvent_indices.update(res[0]) covariances.merge(res[1]) hb_labels.merge(res[2]) n_frames += res[3] # 4. get the orientational entropy of interfacial waters and save # them to a dictionary # TO-DO: add average Nc in Sorient dict # Sorient_dict = Orient.get_resid_orientational_entropy_from_dict( # hb_labels.resid_labelled_shell_counts # ) # # 5. Get the vibrational entropy of interfacial waters # initialise the Vibrations class instance to store vibrational entropies Sorients = Orientations() Sorients.add_data(hb_labels) Sorient_dict = Sorients.resid_labelled_Sorient vibrations = Vibrations(temperature) vibrations.add_data(covariances, diagonalise=True) return ( Sorient_dict, covariances, vibrations, frame_solvent_indices, n_frames, )
[docs] def _entropy_per_step(args): # pylint: disable=too-many-locals """ For a given system, containing the topology and coordinates of molecules, find the interfacial water molecules around solutes and calculate their orientational entropy if there is a solute atom in the solvent coordination shell. :param args: tuple of variables containing frame index and MDA system frame. """ # 1. unpack vars index, system = args ts = system.trajectory[index] # 2. redeclare these on the frame by frame basis, and assemble them into the # main data structures upon return. frame_solvent_indices = nested_dict() # 3. initialise the Covariance class instance to store covariance matrices covariances = CovarianceCollection() hb_labels = HBLabels.HBLabelCollection() # and store number of frames analysed n_frames = 1 # 4. initialise the RAD and HB class instances to store shell information shells = ShellCollection() HBs = HBond.HBCollection() # 5. find > 1 UA molecules in system, these are the solutes resid_list = Selections.find_solute_molecules(system) solutes = Selections.get_selection(system, "resid", resid_list) # 6. find the interfacial solvent molecules that are 1 UA in size # and are in the RAD shell of any solute solvent_indices = find_interfacial_solvent(solutes, system, shells) first_shell_solvent = Selections.get_selection(system, "index", solvent_indices) # 7. iterate through first shell solvent and find their RAD shells, # HBing in the shells and shell labels for solvent in first_shell_solvent: # 8a. find RAD shell of interfacial solvent shell = RADShell.get_RAD_shell(solvent, system, shells) # 8b. find HBing in the shell HBond.get_shell_HBs(shell, system, HBs, shells) # 8c. find RAD shell labels shell = RADLabels.get_shell_labels(solvent.index, system, shell, shells) # 8d. find HB labels HBLabels.get_HB_labels(solvent.index, system, HBs, shells) if shell.nearest_nonlike_idx is not None: # 8e. populate the labels into a dictionary for stats # only if a different atom is in the RAD shell nearest = system.atoms[shell.nearest_nonlike_idx] nearest_resid = nearest.resid nearest_resname = nearest.resname hb_labels.add_data( nearest_resid, nearest_resname, shell.labels, shell.donates_to_labels, shell.accepts_from_labels, shell.N_w, ) frame_solvent_indices = save_solvent_indices( ts.frame, shell.atom_idx, nearest_resid, nearest_resname, frame_solvent_indices, ) # 3f. calculate the running average of force and torque # covariance matrices solvent_molecule = system.atoms[solvent.index].fragment # get molecule get_forces_torques( covariances, solvent_molecule, f"{nearest_resname}_{nearest_resid}", system, ) return frame_solvent_indices, covariances, hb_labels, n_frames