Source code for alchemlyb.estimators.bar_

import numpy as np
import pandas as pd
from pymbar.other_estimators import bar as BAR_
from sklearn.base import BaseEstimator

from .base import _EstimatorMixOut


[docs] class BAR(BaseEstimator, _EstimatorMixOut): """Bennett acceptance ratio (BAR). Parameters ---------- maximum_iterations : int, optional Set to limit the maximum number of iterations performed. relative_tolerance : float, optional Set to determine the relative tolerance convergence criteria. method : str, optional, default='false-position' choice of method to solve BAR nonlinear equations, one of 'self-consistent-iteration' or 'false-position' (default: 'false-position') verbose : bool, optional Set to True if verbose debug output is desired. Attributes ---------- delta_f_ : DataFrame The estimated dimensionless free energy difference between each state. d_delta_f_ : DataFrame The estimated statistical uncertainty (one standard deviation) in dimensionless free energy differences. states_ : list Lambda states for which free energy differences were obtained. Notes ----- See [Bennett1976]_ for details of the derivation and cite the paper (together with [Shirts2008]_ for the Python implementation in :mod:`pymbar`) when using BAR in published work. When possible, use MBAR instead of BAR as it makes better use of the available data. See Also -------- MBAR pymbar.other_estimators.bar .. versionchanged:: 1.0.0 `delta_f_`, `d_delta_f_`, `states_` are view of the original object. .. versionchanged:: 2.4.0 Added assessment of lambda states represented in the indices of u_nk to provide meaningful errors to ensure proper use. """
[docs] def __init__( self, maximum_iterations: int = 10000, relative_tolerance: float = 1.0e-7, method: str = "false-position", verbose: bool = False, ) -> None: self.maximum_iterations = maximum_iterations self.relative_tolerance = relative_tolerance self.method = method self.verbose = verbose # handle for pymbar.BAR object self._bar = None
[docs] def fit(self, u_nk: pd.DataFrame) -> "BAR": """ Compute overlap matrix of reduced potentials using Bennett acceptance ratio. Parameters ---------- u_nk : DataFrame u_nk[n,k] is the reduced potential energy of uncorrelated configuration n evaluated at state k. """ # sort by state so that rows from same state are in contiguous blocks u_nk = u_nk.sort_index(level=u_nk.index.names[1:]) # type: ignore[arg-type] # get a list of the lambda states that are sampled self._states_ = u_nk.columns.values.tolist() # group u_nk by lambda states groups = u_nk.groupby(level=u_nk.index.names[1:]) N_k = [ ( len(groups.get_group(i if isinstance(i, tuple) else (i,))) # type: ignore[unreachable] if i in groups.groups else 0 ) for i in u_nk.columns ] # Pull lambda states from indices states = list(set(x[1:] if len(x[1:]) > 1 else x[1] for x in u_nk.index)) for state in states: if state not in self._states_: raise ValueError( f"Indexed lambda state, {state}, is not represented in u_nk columns:" f" {self._states_}" ) states.sort(key=lambda x: self._states_.index(x)) # type: ignore[union-attr] # Now get free energy differences and their uncertainties for each step deltas = np.array([]) d_deltas = np.array([]) for k in range(len(N_k) - 1): if N_k[k] == 0 or N_k[k + 1] == 0: continue # get us from lambda step k uk = groups.get_group( self._states_[k] if isinstance(self._states_[k], tuple) else (self._states_[k],) ) # get w_F w_f = uk.iloc[:, k + 1] - uk.iloc[:, k] # get us from lambda step k+1 uk1 = groups.get_group( self._states_[k + 1] if isinstance(self._states_[k + 1], tuple) else (self._states_[k + 1],) ) # get w_R w_r = uk1.iloc[:, k] - uk1.iloc[:, k + 1] # now determine df and ddf using pymbar.BAR out = BAR_( w_f, w_r, method=self.method, maximum_iterations=self.maximum_iterations, relative_tolerance=self.relative_tolerance, verbose=self.verbose, ) df, ddf = out["Delta_f"], out["dDelta_f"] deltas = np.append(deltas, df) d_deltas = np.append(d_deltas, ddf**2) if len(deltas) == 0 and len(states) > 1: raise ValueError( "u_nk does not contain energies computed between any adjacent states.\n" "To compute the free energy with BAR, ensure that values in u_nk exist" f" for the columns:\n{states}." ) # build matrix of deltas between each state adelta = np.zeros((len(deltas) + 1, len(deltas) + 1)) ad_delta = np.zeros_like(adelta) for j in range(len(deltas)): out = [] dout = [] for i in range(len(deltas) - j): out.append(deltas[i : i + j + 1].sum()) # See https://github.com/alchemistry/alchemlyb/pull/60#issuecomment-430720742 # Error estimate generated by BAR ARE correlated # Use the BAR uncertainties between two neighbour states if j == 0: dout.append(d_deltas[i : i + j + 1].sum()) # Other uncertainties are unknown at this point else: dout.append(np.nan) adelta += np.diagflat(np.array(out), k=j + 1) ad_delta += np.diagflat(np.array(dout), k=j + 1) # yield standard delta_f_ free energies between each state self._delta_f_ = pd.DataFrame(adelta - adelta.T, columns=states, index=states) # yield standard deviation d_delta_f_ between each state self._d_delta_f_ = pd.DataFrame( np.sqrt(ad_delta + ad_delta.T), columns=states, index=states ) self._delta_f_.attrs = u_nk.attrs self._d_delta_f_.attrs = u_nk.attrs return self