from string import Template
from typing import Tuple, Union
import numpy as np
import numpy.typing as npt
[docs]def read_cv(colvar_file: str,
usecols: int,
dim:int,
):
if dim == 1 and len(usecols) == 1:
cv = np.loadtxt(colvar_file, usecols=usecols)
return cv
elif dim == 2 and len(usecols) == 2:
cv = np.loadtxt(colvar_file, usecols=usecols)
cv_x = cv[:, 0]
cv_y = cv[:, 1]
return cv_x, cv_y
[docs]def read_bias(colvar_file: str,
usecols: int=3,
) -> npt.NDArray:
bias = np.loadtxt(colvar_file, usecols=usecols)
return bias
[docs]def compute_bw_silverman(cv_array: npt.NDArray,
dim: int,
**kwargs,
) -> float:
"""
Compute the bandwidth using Silverman's rule of thumb.
Parameters:
cv_array (npt.NDArray): The input array for which the bandwidth is to be computed.
dim (int): The dimensionality of the data.
**kwargs: Additional keyword arguments.
- weights (optional, array-like): Weights for the data points.
- bias_factor (optional, float): Bias factor to adjust the bandwidth. Default is 1.0.
Returns:
float: The computed bandwidth.
Notes:
- If weights are provided, the effective sample size (neff) is computed.
- The bandwidth is computed using the formula:
bw = sigma_0 / (sqrt(bias_factor)) * ((neff * (dim + 2) / 4.0) ** (-1.0 / (dim + 4.0)))
- The function prints intermediate values for sigma_0, number of configurations (nconf),
effective sample size (neff), and the computed bandwidth (bw).
"""
weights = kwargs.get('weights', None)
bias_factor = kwargs.get('bias_factor', 1.0)
if weights is not None:
neff = np.power(np.sum(weights), 2) / np.sum(np.power(weights, 2))
else:
neff = cv_array.shape[0]
sigma_0 = cv_array.std()
#sigma_0 = sigma_0/
bw = sigma_0/(np.sqrt(bias_factor))*np.power((neff *(dim+2)/4.0), -1.0/(dim+4.0))
print(f"sigma_0 {sigma_0:.3f} nconf: {cv_array.shape[0]} neff: {neff:.3f} bw: {bw:.3f}")
return bw
[docs]def read_fes(file_fes: str,
dim: int,
**kwargs,
) -> Union[Tuple[npt.NDArray, npt.NDArray],
Tuple[npt.NDArray, npt.NDArray, npt.NDArray]
]:
"""
Read the free energy surface (FES) data from PLUMED fes files.
"""
if dim == 2:
if 'num_grid_points_x' not in kwargs or 'num_grid_points_y' not in kwargs:
raise ValueError("For 2D FES, 'num_grid_points_x' and 'num_grid_points_y' must be provided in kwargs.")
num_grid_points_x = kwargs.get('num_grid_points_x', None)
num_grid_points_y = kwargs.get('num_grid_points_y', None)
if dim == 2:
print("Reading 2D FES")
X = np.loadtxt(file_fes, usecols=0)
Y = np.loadtxt(file_fes, usecols=1)
Z = np.loadtxt(file_fes, usecols=2)
X = X.reshape(num_grid_points_x, num_grid_points_y)
Y = Y.reshape(num_grid_points_x, num_grid_points_y)
Z = Z.reshape(num_grid_points_x, num_grid_points_y)
Z = Z*0.0103636 # to eV
Z_min = np.min(Z)
Z = Z - Z_min
return X,Y,Z
elif dim == 1:
print("Reading 1D FES")
x = np.loadtxt(file_fes, usecols=0)
z = np.loadtxt(file_fes, usecols=1)
z = z*0.0103636 # to eV
z_min = np.min(z)
z = z - z_min
return x, z
else:
raise ValueError("dim should be either 1 or 2")
[docs]def make_grid(grid_bin_cv_x: int,
cv_x_min: float,
cv_x_max: float,
dim: int,
**kwargs
) -> Union[npt.NDArray, Tuple[npt.NDArray, npt.NDArray]]:
"""
Generates a grid of coordinates based on the specified parameters.
Parameters:
grid_bin_cv_x (int): Number of bins for the x-axis.
grid_bin_cv_y (int): Number of bins for the y-axis.
cv_x_min (float): Minimum value for the x-axis.
cv_x_max (float): Maximum value for the x-axis.
cv_y_min (float): Minimum value for the y-axis.
cv_y_max (float): Maximum value for the y-axis.
Returns:
tuple: one 1D array representing the x coordinates of the grid.
tuple: Two 2D arrays representing the x and y coordinates of the grid.
"""
grid_bin_cv_x += 1
gird_cv_x_min = cv_x_min
gird_cv_x_max = cv_x_max
grid_cv_x = np.linspace(gird_cv_x_min, gird_cv_x_max, grid_bin_cv_x)
if dim == 1:
return grid_cv_x
elif dim == 2:
gird_cv_y_min = kwargs.get("cv_y_min", None)
gird_cv_y_max = kwargs.get("cv_y_max", None)
grid_bin_cv_y = kwargs.get("grid_bin_cv_y", None)
grid_bin_cv_y += 1
grid_cv_y = np.linspace(gird_cv_y_min, gird_cv_y_max, grid_bin_cv_y)
X,Y=np.meshgrid(grid_cv_x,grid_cv_y)
return X, Y
[docs]def calc_one_FES_point(cv_x: npt.NDArray,
bias: npt.NDArray,
point_x: float,
sigma_x: float,
kbT: float,
dim: int,
**kwargs
) -> float:
"""
Calculate the free energy surface (FES) value for a given point.
Parameters:
- cv_x: float, x-coordinate of the collective variable (CV)
- cv_y: float, y-coordinate of the collective variable (CV)
- bias: float, bias potential value
- point_x: float, x-coordinate of the point
- point_y: float, y-coordinate of the point
- sigma_x: float, standard deviation of the x-coordinate
- sigma_y: float, standard deviation of the y-coordinate
- kbT: float, product of Boltzmann constant and temperature
Returns:
- fes: float, free energy surface value for the given point
"""
cv_y = kwargs.get("cv_y", None)
point_y = kwargs.get("point_y", None)
sigma_y = kwargs.get("sigma_y", None)
bias = bias / kbT
dist_x = (point_x - cv_x) / sigma_x
if dim == 1:
arg = bias - 0.5*dist_x*dist_x
elif dim == 2:
dist_y = (point_y - cv_y) / sigma_y
arg = bias - 0.5*dist_x*dist_x - 0.5*dist_y*dist_y
else:
raise ValueError("dim should be either 1 or 2")
fes = -kbT*np.logaddexp.reduce(arg)
return fes
#TODO: add documentation in jupyter book
[docs]def calc_FES(cv_x: npt.NDArray,
bandwidth_x: float,
nbins_x: int,
bias: npt.NDArray,
kbT: float,
dim: int=2,
**kwargs,
) -> npt.NDArray:
"""
Calculate the Free Energy Surface (FES) using the provided collective variables (CVs) and bias.
Parameters:
cv_x (npt.NDArray): Collective variable values along the x-axis.
bandwidth_x (float): Bandwidth for the Gaussian kernel along the x-axis.
nbins_x (int): Number of bins along the x-axis for the grid.
bias (npt.NDArray): Bias values corresponding to the CVs. must be divied by kbT to make it dimensionless before passing to this function.
kbT (float): Thermal energy (Boltzmann constant times temperature).
dim (int, optional): Dimensionality of the FES calculation (1 or 2). Default is 2.
**kwargs: Additional keyword arguments:
- cv_y (npt.NDArray, optional): Collective variable values along the y-axis (for 2D FES).
- bandwidth_y (float, optional): Bandwidth for the Gaussian kernel along the y-axis (for 2D FES).
- nbins_y (int, optional): Number of bins along the y-axis for the grid (for 2D FES).
- min_cv_x (float, optional): Minimum value of the CV along the x-axis.
- max_cv_x (float, optional): Maximum value of the CV along the x-axis.
- min_cv_y (float, optional): Minimum value of the CV along the y-axis (for 2D FES).
- max_cv_y (float, optional): Maximum value of the CV along the y-axis (for 2D FES).
- save_file (bool, optional): Whether to save the FES data to a file. Default is True.
- name_file (str, optional): Filename to save the FES data. Default is "fes.dat".
- name_cv_x (str, optional): Name of the CV along the x-axis. Default is "cv_x".
- name_cv_y (str, optional): Name of the CV along the y-axis. Default is "cv_y".
Returns:
np.ndarray: Calculated FES values on the grid.
"""
# Extract the keyword arguments
save_file = kwargs.get("save_file", True)
name_file = kwargs.get("name_file", "fes.dat")
name_cv_x = kwargs.get("name_cv_x", "cv_x")
min_cv_x = kwargs.get("min_cv_x", cv_x.min())
max_cv_x = kwargs.get("max_cv_x", cv_x.max())
# the syntax is ugly.. any better way to do this?
template_header_1d = \
"""Free energy surface generated by ECToolkits
FIELDS ${name_cv_x} () free_energy (kJ/mol)
SET min_${name_cv_x} ${min_cv_x}
SET max_${name_cv_x} ${max_cv_x}
SET num_grid_points_${name_cv_x} ${num_grid_points_x}"""
template_header_2d = \
"""Free energy surface generated by ECToolkits
FIELDS ${name_cv_x} () ${name_cv_y} () free_energy (kJ/mol)
SET min_${name_cv_x} ${min_cv_x}
SET max_${name_cv_x} ${max_cv_x}
SET num_grid_points_${name_cv_x} ${num_grid_points_x}
SET min_${name_cv_y} ${min_cv_y}
SET max_${name_cv_y} ${max_cv_y}
SET num_grid_points_${name_cv_y} ${num_grid_points_y}"""
if dim == 1:
print("Calculating 1D FES")
fes_x = make_grid(grid_bin_cv_x=nbins_x,
cv_x_min=min_cv_x,
cv_x_max=max_cv_x,
dim=dim
)
# Note the num_grid_points_x are nbins_x+1
num_grid_points_x = len(fes_x)
fes = np.zeros(num_grid_points_x)
for i in range(num_grid_points_x):
print(f' working... {(i/num_grid_points_x):.0%}', end='\r')
fes[i] = calc_one_FES_point(cv_x=cv_x,
bias=bias,
point_x=fes_x[i],
sigma_x=bandwidth_x,
kbT=kbT,
dim=dim)
if save_file:
header = Template(template_header_1d).substitute(name_cv_x=name_cv_x,
min_cv_x=f"{min_cv_x:10.3f}",
max_cv_x=f"{max_cv_x:10.3f}",
num_grid_points_x=num_grid_points_x,
)
np.savetxt(name_file,
np.array([fes_x.flatten(), fes.flatten()]).T,
fmt="%.6f",
header=header,
)
return fes
elif dim == 2:
print("Calculating 2D FES")
name_cv_y = kwargs.get("name_cv_y", "cv_y")
cv_y = kwargs.get("cv_y", None)
bandwidth_y = kwargs.get("bandwidth_y", None)
nbins_y = kwargs.get("nbins_y", None)
min_cv_y = kwargs.get("min_cv_y", cv_y.min())
max_cv_y = kwargs.get("max_cv_y", cv_y.max())
fes_x, fes_y = make_grid(grid_bin_cv_x=nbins_x,
grid_bin_cv_y=nbins_y,
cv_x_min=min_cv_x,
cv_x_max=max_cv_x,
cv_y_min=min_cv_y,
cv_y_max=max_cv_y,
dim=dim,
)
# Note the num_grid_points_x and num_grid_points_y are nbins_x+1 and nbins_y+1
num_grid_points_x = len(fes_x)
num_grid_points_y = len(fes_y)
fes = np.zeros((num_grid_points_x, num_grid_points_y))
for i in range(num_grid_points_x):
print(f' working... {(i/num_grid_points_x):.0%}', end='\r')
for j in range(num_grid_points_y):
fes[i, j] = calc_one_FES_point(cv_x=cv_x,
cv_y=cv_y,
bias=bias,
point_x=fes_x[i, j],
point_y=fes_y[i, j],
sigma_x=bandwidth_x,
sigma_y=bandwidth_y,
kbT=kbT,
dim=dim,
)
if save_file:
header = Template(template_header_2d).substitute(name_cv_x=name_cv_x,
name_cv_y=name_cv_y,
min_cv_x=f"{min_cv_x:10.3f}",
max_cv_x=f"{max_cv_x:10.3f}",
num_grid_points_x=num_grid_points_x,
min_cv_y=f"{min_cv_y:10.3f}",
max_cv_y=f"{max_cv_y:10.3f}",
num_grid_points_y=num_grid_points_y,
)
np.savetxt(name_file,
np.array([fes_x.flatten(), fes_y.flatten(), fes.flatten()]).T,
fmt="%.6f",
header=header,
)
return fes
else:
raise ValueError("dim should be either 1 or 2")