Source code for alchemlyb.estimators.mbar_

from __future__ import annotations
from typing import Literal

import numpy as np
import pandas as pd
import pymbar
import numpy.typing as npt
from sklearn.base import BaseEstimator

from . import BAR
from .base import _EstimatorMixOut


[docs] class MBAR(BaseEstimator, _EstimatorMixOut): r"""Multi-state Bennett acceptance ratio (MBAR). Parameters ---------- maximum_iterations : int, optional Set to limit the maximum number of iterations performed. relative_tolerance : float, optional Set to determine the relative tolerance convergence criteria. initial_f_k : np.ndarray, float, shape=(K), optional or String `BAR` When `isinstance(initial_f_k, np.ndarray)`, `initial_f_k` will be used as initial guess for MBAR estimator. initial_f_k should be dimensionless free energies. When `initial_f_k` is ``None``, ``initial_f_k`` will be set to 0. When `initial_f_k` is set to "BAR", a BAR calculation will be done and the result is used as the initial guess (default). .. versionchanged:: 2.3.0 The new default is now "BAR" as it provides a substantial speedup over the previous default `None`. method : str, optional, default="robust" The optimization routine to use. This can be any of the methods available via :func:`scipy.optimize.minimize` or :func:`scipy.optimize.root`. n_bootstraps : int, optional Whether to use bootstrap to estimate uncertainty. `0` means use analytic error estimation. 50~200 is a reasonable range to do bootstrap. verbose : bool, optional Set to ``True`` if verbose debug output from :mod:`pymbar` is desired. Attributes ---------- delta_f_ : DataFrame The estimated dimensionless free energy difference between each state. d_delta_f_ : DataFrame The estimated statistical uncertainty (one standard deviation) in dimensionless free energy differences. delta_h_ : DataFrame The estimated dimensionless enthalpy difference between each state. d_delta_h_ : DataFrame The estimated statistical uncertainty (one standard deviation) in dimensionless enthalpy differences. delta_sT_ : DataFrame, optional The estimated dimensionless entropy difference between each state. d_delta_sT_ : DataFrame The estimated statistical uncertainty (one standard deviation) in dimensionless entropy differences. theta_ : DataFrame The theta matrix. states_ : list Lambda states for which free energy differences were obtained. Notes ----- See [Shirts2008]_ for details of the derivation and cite the paper when using MBAR in published work. See Also -------- pymbar.mbar.MBAR .. versionchanged:: 1.0.0 `delta_f_`, `d_delta_f_`, `states_` are view of the original object. .. versionchanged:: 2.0.0 default value for `method` was changed from "hybr" to "robust" .. versionchanged:: 2.1.0 `n_bootstraps` option added. .. versionchanged:: 2.4.0 Handle initial estimate, initial_f_k, from bar in the instance that not all lambda states represented as column headers are represented in the indices of u_nk. .. versionchanged:: 2.5.0 Added computation of enthalpy and entropy """
[docs] def __init__( self, maximum_iterations: int = 10000, relative_tolerance: float = 1.0e-7, initial_f_k: np.ndarray | Literal["BAR"] | None = "BAR", method: str = "robust", n_bootstraps: int = 0, verbose: bool = False, ) -> None: self.maximum_iterations = maximum_iterations self.relative_tolerance = relative_tolerance if isinstance(initial_f_k, str) and initial_f_k != "BAR": raise ValueError( f"Only `BAR` is supported as string input to `initial_f_k`. Got ({initial_f_k})." ) else: self.initial_f_k = initial_f_k self.method = method self.verbose = verbose self.n_bootstraps = n_bootstraps # handle for pymbar.MBAR object self._mbar = None
[docs] def fit(self, u_nk: pd.DataFrame, compute_entropy_enthalpy: bool = False) -> "MBAR": """ Compute overlap matrix of reduced potentials using multi-state Bennett acceptance ratio. Parameters ---------- u_nk : DataFrame ``u_nk[n, k]`` is the reduced potential energy of uncorrelated configuration ``n`` evaluated at state ``k``. compute_entropy_enthalpy : bool, optional, default=False Compute entropy and enthalpy. """ # sort by state so that rows from same state are in contiguous blocks u_nk = u_nk.sort_index(level=u_nk.index.names[1:]) # type: ignore[arg-type] groups = u_nk.groupby(level=u_nk.index.names[1:]) N_k = [ ( len(groups.get_group(i if isinstance(i, tuple) else (i,))) # type: ignore[unreachable] if i in groups.groups else 0 ) for i in u_nk.columns ] self._states_ = u_nk.columns.values.tolist() if isinstance(self.initial_f_k, str) and self.initial_f_k == "BAR": bar = BAR( maximum_iterations=self.maximum_iterations, relative_tolerance=self.relative_tolerance, verbose=self.verbose, ) bar.fit(u_nk) initial_f_k = bar.delta_f_.iloc[0, :] # type: ignore[union-attr] if len(bar.delta_f_.iloc[0, :]) != len(self._states_): # type: ignore[union-attr] states = [ x for i, x in enumerate(self._states_[:-1]) if N_k[i] > 0 and N_k[i + 1] > 0 ] initial_f_k = pd.Series( [ initial_f_k.loc[x] if x in states else np.nan for x in self._states_ ], index=self._states_, dtype=float, ) else: initial_f_k = self.initial_f_k self._mbar = pymbar.MBAR( u_nk.T, N_k, maximum_iterations=self.maximum_iterations, relative_tolerance=self.relative_tolerance, verbose=self.verbose, initial_f_k=initial_f_k, solver_protocol=self.method, n_bootstraps=self.n_bootstraps, ) uncertainty_method = None if self.n_bootstraps == 0 else "bootstrap" out = self._mbar.compute_free_energy_differences( # type: ignore[attr-defined] return_theta=True, uncertainty_method=uncertainty_method ) if compute_entropy_enthalpy: out.update( self._mbar.compute_entropy_and_enthalpy( # type: ignore[attr-defined] uncertainty_method=uncertainty_method ) ) self._delta_f_ = pd.DataFrame( out["Delta_f"], columns=self._states_, index=self._states_, ) self._d_delta_f_ = pd.DataFrame( out["dDelta_f"], columns=self._states_, index=self._states_, ) self.theta_ = pd.DataFrame( out["Theta"], columns=self._states_, index=self._states_, ) if compute_entropy_enthalpy: self._delta_h_ = pd.DataFrame( out["Delta_u"], columns=self._states_, index=self._states_, ) self._d_delta_h_ = pd.DataFrame( out["dDelta_u"], columns=self._states_, index=self._states_, ) self._delta_sT_ = pd.DataFrame( out["Delta_s"], columns=self._states_, index=self._states_, ) self._d_delta_sT_ = pd.DataFrame( out["dDelta_s"], columns=self._states_, index=self._states_, ) self._delta_f_.attrs = u_nk.attrs self._d_delta_f_.attrs = u_nk.attrs if compute_entropy_enthalpy: self._delta_h_.attrs = u_nk.attrs # type: ignore[union-attr] self._d_delta_h_.attrs = u_nk.attrs # type: ignore[union-attr] self._delta_sT_.attrs = u_nk.attrs # type: ignore[union-attr] self._d_delta_sT_.attrs = u_nk.attrs # type: ignore[union-attr] return self
@property def overlap_matrix(self) -> npt.NDArray: r"""MBAR overlap matrix. The estimated state overlap matrix :math:`O_{ij}` is an estimate of the probability of observing a sample from state :math:`i` in state :math:`j`. The :attr:`overlap_matrix` is computed on-the-fly. Assign it to a variable if you plan to re-use it. See Also --------- pymbar.MBAR.compute_overlap """ return self._mbar.compute_overlap()["matrix"] # type: ignore[attr-defined, no-any-return]