Source code for waterEntropy.recipes.interfacial_solvent

"""
These functions calculate orientational entropy from labelled
coordination shells
"""

import logging
import time

from dask.distributed import Client

import waterEntropy.analysis.HB as HBond
import waterEntropy.analysis.HB_labels as HBLabels
import waterEntropy.analysis.RAD as RADShell
import waterEntropy.analysis.shell_labels as RADLabels
from waterEntropy.analysis.shells import ShellCollection
from waterEntropy.entropy.convariances import CovarianceCollection
from waterEntropy.entropy.orientations import Orientations
from waterEntropy.entropy.vibrations import Vibrations
import waterEntropy.maths.trig as Trig
from waterEntropy.recipes.forces_torques import get_forces_torques
from waterEntropy.utils.helpers import nested_dict
import waterEntropy.utils.selections as Selections



[docs]
def find_interfacial_solvent(solutes, system, shells: ShellCollection):
    # pylint: disable=too-many-locals
    """
    For a given set of solute molecules, find the RAD shells for each UA in the
    molecules, if a solvent molecule is in the RAD shell, then save the solvent
    atom index to a list. A solvent is defined as molecule that constitutes a
    single UA. These solvent molecule are defined as interfacial molecules.

    :param solutes: mdanalysis instance of a selection of atoms in solute
        molecules that are greater than one UA
    :param system: mdanalysis instance of atoms in a frame
    :param shells: ShellCollection instance
    """
    solvent_indices = []
    molecules = solutes.fragments  # fragments is mdanalysis equiv to molecules
    for molecule in molecules:
        # 1. find heavy atoms in the molecule
        UAs = Selections.find_molecule_UAs(molecule)
        for atom in UAs:
            # 2. DON'T automatically look for RAD shell as this is slow,
            # instead, check if waters are in the distance array before using
            # RAD. Instead, find distances of neighbours first.
            sorted_indices, sorted_distances = Trig.get_sorted_neighbours(
                atom.index, system
            )
            sorted_atoms = Selections.get_selection(
                system, "index", sorted_indices[:20]
            )
            sorted_waters = sorted_atoms.select_atoms("water")
            # 2b. Only get RAD shells if there is a water in the closest X
            # neighbours
            if len(sorted_waters) > 0:
                # 3. find the shell of each UA atom in a molecule
                shell = RADShell.get_RAD_shell(
                    atom, system, shells, sorted_indices, sorted_distances
                )  # get the molecule UA shell
                shell_indices = shell.UA_shell
                # 4. for each neighbour in the RAD shell, find single UA molecules
                shell_atoms = Selections.get_selection(system, "index", shell_indices)
                waters = shell_atoms.select_atoms("water")
                solvent_indices.extend(waters.indices)
    return list(set(solvent_indices))




[docs]
def save_solvent_indices(
    frame: int,
    atom_idx: int,
    nearest_resid: int,
    nearest_resname: str,
    frame_solvent_indices: dict,
):
    """
    Save the solvent indices at interfaces per frame into a dictionary

    :param frame: frame number of analysed frame
    :param atom_idx: solvent atom index
    :param nearest_resid: residue of number of nearest solute molecule
    :param nearest_resname: residue name of nearest solute molecule
    :param frame_solvent_indices: the dictionary to populate
    """
    if nearest_resid not in frame_solvent_indices[frame][nearest_resname]:
        frame_solvent_indices[frame][nearest_resname][nearest_resid] = []
    frame_solvent_indices[frame][nearest_resname][nearest_resid].append(atom_idx)
    return frame_solvent_indices




[docs]
def print_frame_solvent_dicts(frame_solvent_indices: dict):
    """
    Print the interfacial solvent for each analysed frame

    :param frame_solvent_indices: dictionary containing solvent indices in the
        first shell of solute atoms over each frame analysed
    """
    for frame, resname_key in sorted(list(frame_solvent_indices.items())):
        for resname, resid_key in sorted(list(resname_key.items())):
            for resid, solvents in sorted(list(resid_key.items())):
                print(frame, resname, resid, len(solvents), solvents)




[docs]
def get_interfacial_shells(system, start: int, end: int, step: int):
    # pylint: disable=too-many-locals
    """
    For a given system, containing the topology and coordinates of molecules,
    find the interfacial water molecules around solutes and calculate their
    orientational entropy if there is a solute atom in the solvent coordination
    shell.

    :param system: mdanalysis instance of atoms in a frame
    :param start: starting frame number
    :param end: end frame number
    :param step: steps between frames
    """
    # don't need to include the frame_solvent_indices dictionary
    frame_solvent_shells = nested_dict()
    # pylint: disable=unused-variable
    for ts in system.trajectory[start:end:step]:
        # initialise the RAD and HB class instances to store shell information
        shells = ShellCollection()
        # 1. find > 1 UA molecules in system, these are the solutes
        resid_list = Selections.find_solute_molecules(system)
        solutes = Selections.get_selection(system, "resid", resid_list)
        # 2. find the interfacial solvent molecules that are 1 UA in size
        #   and are in the RAD shell of any solute
        solvent_indices = find_interfacial_solvent(solutes, system, shells)
        first_shell_solvent = Selections.get_selection(system, "index", solvent_indices)
        # 3. iterate through first shell solvent and find their RAD shells,
        #   HBing in the shells and shell labels
        for solvent in first_shell_solvent:
            # 3a. find RAD shell of interfacial solvent
            shell = RADShell.get_RAD_shell(solvent, system, shells)
            shell.nearest_nonlike_idx = RADLabels.get_nearest_nonlike(shell, system)
            if shell.nearest_nonlike_idx is not None:
                # 3b. populate the shells into a dictionary for stats
                # only if a different atom is in the RAD shell
                nearest = system.atoms[shell.nearest_nonlike_idx]
                frame_solvent_shells = save_solvent_shells(
                    ts.frame,
                    shell.atom_idx,
                    shell.UA_shell,
                    frame_solvent_shells,
                )
    return frame_solvent_shells




[docs]
def save_solvent_shells(
    frame: int,
    atom_idx: int,
    shell_indices: list,
    frame_solvent_shells: dict,
):
    """
    Save the solvent indices at interfaces per frame into a dictionary

    :param frame: frame number of analysed frame
    :param atom_idx: solvent atom index
    :param nearest_resid: residue of number of nearest solute molecule
    :param nearest_resname: residue name of nearest solute molecule
    :param frame_solvent_indices: the dictionary to populate
    """
    if atom_idx not in frame_solvent_shells[frame]:
        frame_solvent_shells[frame][atom_idx] = shell_indices
    return frame_solvent_shells




[docs]
def print_frame_solvent_shells(frame_solvent_shells: dict):
    """
    Print the interfacial solvent for each analysed frame

    :param frame_solvent_indices: dictionary containing solvent indices in the
        first shell of solute atoms over each frame analysed
    """
    for frame, atom_idx_key in sorted(list(frame_solvent_shells.items())):
        for atom_idx, shell_indices in sorted(list(atom_idx_key.items())):
            print(frame, atom_idx, shell_indices, len(shell_indices))




[docs]
def get_interfacial_water_orient_entropy(
    system,
    start: int,
    end: int,
    step: int,
    temperature=298,
    parallel=False,
    client=None,
):
    # pylint: disable=E1121
    # pylint: disable=R0913
    # pylint: disable=too-many-locals
    """
    For a given system, containing the topology and coordinates of molecules,
    find the interfacial water molecules around solutes and calculate their
    orientational entropy if there is a solute atom in the solvent coordination
    shell. This method is a common calling function for the serial and parallel
    implementation and should be the calling point into those functions. Defaults
    to serial.

    :param system: mdanalysis instance of atoms in a frame
    :param start: starting frame number
    :param end: end frame number
    :param step: steps between frames
    :param parallel: set to True to run the parallel calculation
    :param client: dask client instance if custom cluster layout required
    """
    # Initialise data structures to hold processed data.
    frame_solvent_indices = nested_dict()
    covariances = CovarianceCollection()
    hb_labels = HBLabels.HBLabelCollection()
    n_frames = 0
    results = []
    # Steps 1,2 and 3 in function calls.
    indices = list(range(start, end, step))
    if parallel is True:
        # If no Dask client cluster has been setup externally then default to single host.
        if client is None:
            client = Client(
                processes=True, threads_per_worker=1, silence_logs=logging.CRITICAL
            )
            # parallelise over frames by packing them and vars into batches and mapping
            # them across dask workers.
            batch_size = client.scheduler_info()["n_workers"]
            batches = [
                indices[i : i + batch_size] for i in range(0, len(indices), batch_size)
            ]
            for i, batch in enumerate(batches):
                args = [(index, system) for index in batch]
                futures = client.map(_entropy_per_step, args)
                results.extend(client.gather(futures))
                if i < len(batches) - 1:
                    client.restart()
                    time.sleep(2)
        # Otherwise we are on HPC and we can run without the memory fixes.
        else:
            args = [(index, system) for index in indices]
            futures = client.map(_entropy_per_step, args)
            results.extend(client.gather(futures))
        client.close()
    else:
        for index in indices:
            results.append(_entropy_per_step((index, system)))
    # merge the solvent indices dicts
    for res in results:
        frame_solvent_indices.update(res[0])
        covariances.merge(res[1])
        hb_labels.merge(res[2])
        n_frames += res[3]
    # 4. get the orientational entropy of interfacial waters and save
    #   them to a dictionary
    # TO-DO: add average Nc in Sorient dict
    # Sorient_dict = Orient.get_resid_orientational_entropy_from_dict(
    #     hb_labels.resid_labelled_shell_counts
    # )
    # # 5. Get the vibrational entropy of interfacial waters
    # initialise the Vibrations class instance to store vibrational entropies
    Sorients = Orientations()
    Sorients.add_data(hb_labels)
    Sorient_dict = Sorients.resid_labelled_Sorient
    vibrations = Vibrations(temperature)
    vibrations.add_data(covariances, diagonalise=True)

    return (
        Sorient_dict,
        covariances,
        vibrations,
        frame_solvent_indices,
        n_frames,
    )




[docs]
def _entropy_per_step(args):
    # pylint: disable=too-many-locals
    """
    For a given system, containing the topology and coordinates of molecules,
    find the interfacial water molecules around solutes and calculate their
    orientational entropy if there is a solute atom in the solvent coordination
    shell.

    :param args: tuple of variables containing frame index and MDA system frame.
    """
    # 1. unpack vars
    index, system = args
    ts = system.trajectory[index]
    # 2. redeclare these on the frame by frame basis, and assemble them into the
    # main data structures upon return.
    frame_solvent_indices = nested_dict()
    # 3. initialise the Covariance class instance to store covariance matrices
    covariances = CovarianceCollection()
    hb_labels = HBLabels.HBLabelCollection()
    # and store number of frames analysed
    n_frames = 1
    # 4. initialise the RAD and HB class instances to store shell information
    shells = ShellCollection()
    HBs = HBond.HBCollection()
    # 5. find > 1 UA molecules in system, these are the solutes
    resid_list = Selections.find_solute_molecules(system)
    solutes = Selections.get_selection(system, "resid", resid_list)
    # 6. find the interfacial solvent molecules that are 1 UA in size
    #   and are in the RAD shell of any solute
    solvent_indices = find_interfacial_solvent(solutes, system, shells)
    first_shell_solvent = Selections.get_selection(system, "index", solvent_indices)
    # 7. iterate through first shell solvent and find their RAD shells,
    #   HBing in the shells and shell labels
    for solvent in first_shell_solvent:
        # 8a. find RAD shell of interfacial solvent
        shell = RADShell.get_RAD_shell(solvent, system, shells)
        # 8b. find HBing in the shell
        HBond.get_shell_HBs(shell, system, HBs, shells)
        # 8c. find RAD shell labels
        shell = RADLabels.get_shell_labels(solvent.index, system, shell, shells)
        # 8d. find HB labels
        HBLabels.get_HB_labels(solvent.index, system, HBs, shells)
        if shell.nearest_nonlike_idx is not None:
            # 8e. populate the labels into a dictionary for stats
            # only if a different atom is in the RAD shell
            nearest = system.atoms[shell.nearest_nonlike_idx]
            nearest_resid = nearest.resid
            nearest_resname = nearest.resname
            hb_labels.add_data(
                nearest_resid,
                nearest_resname,
                shell.labels,
                shell.donates_to_labels,
                shell.accepts_from_labels,
                shell.N_w,
            )
            frame_solvent_indices = save_solvent_indices(
                ts.frame,
                shell.atom_idx,
                nearest_resid,
                nearest_resname,
                frame_solvent_indices,
            )
            # 3f. calculate the running average of force and torque
            # covariance matrices
            solvent_molecule = system.atoms[solvent.index].fragment  # get molecule
            get_forces_torques(
                covariances,
                solvent_molecule,
                f"{nearest_resname}_{nearest_resid}",
                system,
            )
    return frame_solvent_indices, covariances, hb_labels, n_frames