fabian-paul/pcca.py

## pcca.py
import typing
import numpy as np
import scipy.optimize


def _pcca_plus_score(partial_coarse_graining_matrix_flat: np.ndarray, eigenvectors: np.ndarray, n: int) -> float:
    """Implementation of the PCCA+ score.

    Parameters
    ----------
    partial_coarse_graining_matrix_flat : np.ndarray of shape n*n
        Flattened version of the (current guess of the) coarse-graining matrix with the zeroth row and column removed.
    eigenvectors : np.ndarray of shape (N, n + 1)
        the n + 1 dominant eigenvectors of the transition matrix or generalized singular vectors of the count matrix
        describing the dynamics
    n: int
        the dimension n of partial_coarse_graining_matrix_flat before flattening

    Returns
    -------
    float
        the PCCA+ score for the input coarse-graining matrix

    Notes
    -----
    Refer to section 4.3.1 (trace(S)) in the publication by S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+:
    application to Markov state models and data classification. Adv Data Anal Classif 7, 147-179 (2013).

    The implementation of the score assumes that the constant eigenvector (or singular vector) is in position zero and is represented as a
    column of ones.

    The implementation is agnostic to the value of the stationary distribution. In fact the stationary distribution can be replaced by
    any vector of weights (as long as the eigenvectors/ singular vectors are properly orthonormalized). This function can be used for VAMP,
    for G-PCCA or for kinetic coarse-graining of a reversible transition matrix as described in the original PCCA+ publication.
    """
    partial_coarse_graining_matrix = np.reshape(partial_coarse_graining_matrix_flat, (n, n))
    coarse_graining_matrix = _feasible_coarse_graining_matrix(partial_coarse_graining_matrix=partial_coarse_graining_matrix,
                                                              eigenvectors=eigenvectors)
    return -np.sum(coarse_graining_matrix ** 2 / coarse_graining_matrix[0, np.newaxis, :])


def _pcca_plus_matrix(eigenvectors: np.ndarray, coarse_graining_matrix: np.ndarray, n_clusters: int, opt_method: str = "Nelder-Mead",
                      opt_options: typing.Optional[typing.Mapping[str, typing.Any]] = None) -> np.ndarray:
    """Find the PCCA+ coarse-graining matrix by optimizing the PCCA+ score.

    Parameters
    ----------
    eigenvectors : np.ndarray of shape (N, n)
         the dominant eigenvectors or (possibly generalized) singular vectors of a transition matrix describing the dynamics
    coarse_graining_matrix : np.ndarray of shape (n_clusters, n_clusters)
        initial guess for the coarse_graining_matrix, e.g. as computed with :meth:`_inner_simplex`
    n_clusters: int
        the number of desired clusters, must be compatible with the dimensions of `coarse_graining_matrix`
    opt_method: str, default = "Nelder-Mead"
        Passed as the argument `method` in :meth:`scipy.optimize.minimize`.
    opt_options: Mapping[str, Any] or None, default = None
        Passed as the argument `options` in :meth:`scipy.optimize.minimize`.

    Returns
    -------
    np.ndarray of shape (n, n)
        the optimal coarse graining matrix according to the PCCA+ score
        Right-multiplying the dominant eigenvectors by this returned matrix yields an approximation of the cluster membership vectors.
        The membership vectors `membership[i, j]` describe how each grid point i relates to each kinetic cluster j.
        Memberships sum to one along the cluster dimension (j).

    Notes
    -----
    Refer to the publication by S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+:
    application to Markov state models and data classification. Adv Data Anal Classif 7, 147-179 (2013).
    """
    if opt_options is None:
        opt_options = {}
    if "disp" not in opt_options:
        opt_options["disp"] = False
    if not np.allclose(eigenvectors[:, 0], 1.):
        raise ValueError("Zeroth eigenvector must consist of ones.")
    eigenvectors = eigenvectors[:, 0:n_clusters]
    partial_coarse_graining_matrix = coarse_graining_matrix[1:, 1:]
    n = partial_coarse_graining_matrix.shape[0]
    if n != partial_coarse_graining_matrix.shape[1]:
        raise ValueError("partial_coarse_graining_matrix must be a square matrix")
    partial_coarse_graining_matrix_flat = np.reshape(partial_coarse_graining_matrix, n * n)
    partial_coarse_graining_matrix_flat_opt = scipy.optimize.minimize(fun=_pcca_plus_score, x0=partial_coarse_graining_matrix_flat,
                                                                      args=(eigenvectors, n), method=opt_method, options=opt_options).x
    partial_coarse_graining_matrix_opt = np.reshape(partial_coarse_graining_matrix_flat_opt, (n, n))
    return _feasible_coarse_graining_matrix(partial_coarse_graining_matrix=partial_coarse_graining_matrix_opt, eigenvectors=eigenvectors)


def _feasible_coarse_graining_matrix(partial_coarse_graining_matrix: np.ndarray, eigenvectors: np.ndarray) -> np.ndarray:
    r"""Convert an incomplete coarse-graining matrix into a full matrix by adding the missing parts such that constrains are fulfilled.

    Parameters
    ----------
    partial_coarse_graining_matrix : np.ndarray of shape (n-1, n-1)
        the coarse-graining matrix with the zeroth row and column removed
    eigenvectors : np.ndarray of shape (N, n)
        the n dominant eigenvectors of the transition matrix or generalized singular vectors of the count matrix describing the dynamics

    Returns
    -------
    np.ndarray of shape (n, n)
        the complete coarse-graining matrix with all constraints fulfilled

    Notes
    -----
    The first row of the coarse-graining matrix A is computed by fulfilling the inequality
    \sum_{i=0} V_{k,i} A_{i,j} = A_{0,j} + \sum_{i=1} V_{k,i} A_{i,j} \ge 0 for all k, j
    where V is the matrix of eigenvectors and A is the coarse-graining matrix. This inequality is required to have non-negative cluster
    memberships. The inequality is used to set A_{0,j} = \max_k \left( -\sum_{i=1} V_{k,i} A_{i,j} \right)
    The first column of the coarse-graining matrix A is computed from the identity \sum_j A_{i,j} = \delta_{i,0} which is
    required to obtain a normalized transition matrix (probability conservation) after coarse-graining. The identity is used to set
    A_{i,0} = \delta_{i,0} - \sum{j=1} A_{i,j}

    The implementation of the score assumes that the constant eigenvector (or singular vectors) is in position zero and is represented as a
    column of ones. Furthermore the implementation assumes that the eigenvectors (or singular vectors) are mutually orthogonal with
    respect to some implicit weighting vector.

    Refer to section 4.3 in the publication by S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+:
    application to Markov state models and data classification. Adv Data Anal Classif 7, 147-179 (2013).
    """
    n = partial_coarse_graining_matrix.shape[0]
    coarse_graining_matrix = np.zeros((n + 1, n + 1))
    coarse_graining_matrix[1:, 1:] = partial_coarse_graining_matrix
    coarse_graining_matrix[1:, 0] = -np.sum(partial_coarse_graining_matrix, axis=1)
    memberships_partial = np.dot(eigenvectors[:, 1:], coarse_graining_matrix[1:, :])
    memberships_partial_max = np.max(-memberships_partial, axis=0)
    coarse_graining_matrix[0, :] = memberships_partial_max
    return coarse_graining_matrix / np.sum(memberships_partial_max)


def _orthonormalize(vertices: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]:
    """Convert a set of points to the pair (origin, orthonormal vectors) that span the affine space of the original points.

    Parameters
    ----------
    vertices : np.ndarray of shape (n, n)
        some sets of points

    Returns
    -------
    np.ndarray of shape n
         the point at the origin
    np.ndarray of shape (n, n)
         the orthonormal unit vectors
         The point of origin and the the unit vectors span the same affine space as the set of input points.
    """
    # pick vertex that is closest to zero as the origin
    i0 = np.argmin(np.linalg.norm(vertices, axis=1))
    v0 = vertices[i0, :]
    # for the remaining vertices, subtract v0 and orthonormalize
    a = np.concatenate((vertices[0:i0, :], vertices[i0 + 1:, :])) - v0[np.newaxis, :]
    q, _ = np.linalg.qr(a.T)
    return v0, q.T


def _pcca_plus_matrix_inner_simplex(eigenvectors: np.ndarray) -> np.ndarray:
    """Compute a good initial guess of the PCCA+ coarse-graining matrix using the "inner simplex" algorithm.

    Parameters
    ----------
    eigenvectors: np.ndarray of shape (N, n)
        the n dominant eigenvectors of the transition matrix or generalized singular vectors of the count matrix describing the dynamics

    Returns
    -------
    np.ndarray of shape (n, n)
        The map from the eigenvector space to the simplex. Right-multiplying the dominant eigenvectors by this returned matrix yields an
        approximation of the cluster membership vectors. The membership vectors `membership[i, j]` describe how each grid point i relates
        to each kinetic cluster j. Memberships sum to one along the cluster dimension (j). Memberships computed with this method can be
        negative. Use :meth:`_pcca_plus_matrix` for non-negative memberships.

    Notes
    -----
    The inner simplex algorithm finds a large n-simplex contained in the point cloud formed by the n dominant eigenfunctions defined on the
    grid points (or k-means clusters centers). This algorithm might not find the larget possible n-simplex.
    The initial guess of the returned PCCA+ coarse-graining matrix maps the n vertices of the simplex to the n canonical Euclidean unit
    vectors.

    Refer to the publication by Weber and Galliat: Characterization of Transition States in Conformational Dynamics using Fuzzy Sets
    Technical Report 02-12, Zuse-Institut Takustr. 7, 14195 Berlin (2002)
    """
    dim, n_clusters = eigenvectors.shape
    vertices = np.empty((2, n_clusters))
    vertices[0, :] = eigenvectors[np.argmax(np.linalg.norm(eigenvectors, axis=1)), :]
    vertices[1, :] = eigenvectors[np.argmax(np.linalg.norm(eigenvectors - vertices[0, np.newaxis, :], axis=1)), :]
    # further passes, follow the algorithm form Weber & Galliat (similar to quick hull)
    for k in range(vertices.shape[0], n_clusters):
        v0, w = _orthonormalize(vertices)
        projector = np.einsum('ij,ik->jk', w, w) - np.eye(n_clusters)
        dists_candidate = np.linalg.norm(np.dot(eigenvectors - v0[np.newaxis, :], projector), axis=1)
        vertices = np.vstack((vertices, eigenvectors[np.argmax(dists_candidate), :]))
    return np.linalg.inv(vertices)


def pcca_plus_for_transition_matrix(transition_matrix: np.ndarray, n_clusters: int) -> typing.Tuple[
    np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Compute the membership vectors for fuzzy kinetic clusters of a transition matrix.

    Parameters
    ----------
    transition_matrix : np.ndarray of shape (N, N)
        The grid point-to-grid point (or k-means cluster to k-means cluster) transition matrix.
    n_clusters : int
        The number of states to cluster state space into. Must be 2 or larger.

    Returns
    -------
    np.ndarray of shape (N, n_clusters)
        the cluster memberships; `membership[i, j]` is the membership of grid point i (or k-mean cluster i) in kinetic cluster j.
        Memberships sum to one along the cluster dimension (j).
    np.ndarray of shape (N, n_clusters)
        the right dominant eigenvectors of the transition matrix, column-wise, ordered descendingly by eigenvalue; the constant
        eigenvector is in zeroth position and is encoded as a column of ones.
    np.ndarray of shape N
        the stationary distribution of the transition matrix
    np.ndarray of shape n_clusters
        the dominant eigenvalues of the transition matrix, ordered descendingly by magnitude

    Notes
    -----
    Refer to the publication by S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+:
    application to Markov state models and data classification. Adv Data Anal Classif 7, 147-179 (2013).
    """
    eigenvalues_left, eigenvectors_left = np.linalg.eig(transition_matrix.T)
    stationary_distribution = eigenvectors_left[:, np.argmax(eigenvalues_left)]
    stationary_distribution = stationary_distribution / np.sum(stationary_distribution)
    eigenvalues, eigenvectors = np.linalg.eig(T)
    order = np.argsort(np.abs(eigenvalues))[::-1]
    eigenvectors = eigenvectors[:, order]
    norm_0 = eigenvectors[:, 0].sum() / eigenvectors.shape[0]
    eigenvectors[:, 0] = eigenvectors[:, 0] / norm_0
    norm_rest = np.sum(eigenvectors[:, 1:] ** 2 * stationary_distribution[:, np.newaxis], axis=0) ** 0.5
    eigenvectors[:, 1:] = eigenvectors[:, 1:] / norm_rest[np.newaxis, :]
    coarse_graining_matrix = _pcca_plus_matrix_inner_simplex(eigenvectors=eigenvectors[:, 0:n_clusters])
    coarse_graining_matrix = _pcca_plus_matrix(eigenvectors=eigenvectors[:, 0:n_clusters], coarse_graining_matrix=coarse_graining_matrix,
                                               n_clusters=n_clusters)
    return eigenvectors[:, 0:n_clusters].dot(coarse_graining_matrix), eigenvectors, stationary_distribution, eigenvalues[order][
                                                                                                             0:n_clusters]


if __name__ == "__main__":
    import msmtools


    def same_cols(a, b, **kwargs) -> bool:
        found = set()
        for i, col_a in enumerate(a.T):
            for col_b in b.T:
                if np.allclose(col_a, col_b, **kwargs):
                    found.add(i)
        return found == {i for i in range(a.shape[1])}


    def proportional_columns(a, b, **kwargs) -> bool:
        result = True
        for col_a, col_b in zip(a.T, b.T):
            non_zero = col_b != 0
            factor = col_a[non_zero][0] / col_b[non_zero][0]
            result &= np.allclose(col_a, factor * col_b, **kwargs)
        return result


    C = np.random.rand(40, 40)
    C = C + C.T
    T = C / C.sum(axis=1)[:, np.newaxis]

    for n_clusters_ in [2, 3, 4, 5, ]:
        memberships, eigenvectors_, pi, eigenvalues_ = pcca_plus_for_transition_matrix(transition_matrix=T, n_clusters=n_clusters_)
        eps = 1E-7
        assert np.all(memberships <= 1 + eps)
        assert np.all(memberships > - eps)
        assert np.allclose(memberships.sum(axis=1), 1.0)

        A = _pcca_plus_matrix_inner_simplex(eigenvectors_[:, 0:n_clusters_])
        A2 = _pcca_plus_matrix(eigenvectors_, A, n_clusters_)

        A_feasible = _feasible_coarse_graining_matrix(partial_coarse_graining_matrix=A[1:, 1:],
                                                      eigenvectors=eigenvectors_[:, 0:n_clusters_])
        assert np.allclose(A_feasible.sum(axis=1), np.eye(1, A_feasible.shape[0], 0))
        assert np.all(eigenvectors_[:, 0:n_clusters_].dot(A_feasible) >= -eps)

        # check coarse-graining properties of the cluster memberships
        T_cg = np.linalg.inv(np.dot(memberships.T * pi[np.newaxis, :], memberships)).dot(
            memberships.T.dot(T * pi[:, np.newaxis]).dot(memberships))
        assert np.allclose(T_cg.sum(axis=1), np.ones(n_clusters_))  # check normalization
        eigenvalues_cg, eigenvectors_cg = np.linalg.eig(T_cg)
        order_ = np.argsort(np.abs(eigenvalues_cg))[::-1]
        eigenvalues_cg = eigenvalues_cg[order_]
        eigenvectors_cg = eigenvectors_cg[:, order_]
        assert np.allclose(eigenvalues_cg[0:n_clusters_], eigenvalues_)  # check eigenvalues

        # check stationary distribution
        eigenvalues_cg_left, eigenvectors_cg_left = np.linalg.eig(T_cg.T)
        stationary_distribution_cg = eigenvectors_cg_left[:, np.argmax(eigenvalues_cg_left)]
        stationary_distribution_cg = stationary_distribution_cg / np.sum(stationary_distribution_cg)
        assert np.allclose(stationary_distribution_cg, memberships.T.dot(pi))

        # check eigenvectors
        assert proportional_columns(eigenvectors_cg, np.linalg.inv(A2))  # equation at the bottom of page 162 in Röblitz & Weber

        # comparison to msmtools
        chi_, Aref = msmtools.analysis.dense.pcca._pcca_connected_isa(eigenvectors_, n_clusters_)
        assert same_cols(A, Aref)
        assert same_cols(eigenvectors_[:, 0:n_clusters_].dot(A), chi_)

        A2ref = msmtools.analysis.dense.pcca._opt_soft(eigenvectors_, Aref, n_clusters_)

        assert same_cols(A2, A2ref)

        memberships_ref = msmtools.analysis.pcca_memberships(T, n_clusters_)
        assert same_cols(memberships, memberships_ref)

    T = np.array([[0.9, 0.1, 0.0, 0.0, 0.0],
                  [0.1, 0.9, 0.0, 0.0, 0.0],
                  [0.0, 0.1, 0.8, 0.1, 0.0],
                  [0.0, 0.0, 0.0, 0.8, 0.2],
                  [0.0, 0.0, 0.0, 0.2, 0.8]])
    memberships_, *_ = pcca_plus_for_transition_matrix(transition_matrix=T, n_clusters=2)
    ref = np.array([[1., 0.],
                    [1., 0.],
                    [0.5, 0.5],
                    [0., 1.],
                    [0., 1.]])
    np.assert_allclose(memberships_, ref)
    memberships_, *_ = pcca_plus_for_transition_matrix(transition_matrix=T, n_clusters=4)
    ref = np.array([[1., 0., 0., 0.],
                    [0., 1., 0., 0.],
                    [0., 0.5, 0.5, 0.],
                    [0., 0., 1., 0.],
                    [0., 0., 0., 1.]])
    np.assert_allclose(memberships_, ref)

    print("OK")
	import typing
	import numpy as np
	import scipy.optimize


	def _pcca_plus_score(partial_coarse_graining_matrix_flat: np.ndarray, eigenvectors: np.ndarray, n: int) -> float:
	"""Implementation of the PCCA+ score.

	Parameters
	----------
	partial_coarse_graining_matrix_flat : np.ndarray of shape n*n
	Flattened version of the (current guess of the) coarse-graining matrix with the zeroth row and column removed.
	eigenvectors : np.ndarray of shape (N, n + 1)
	the n + 1 dominant eigenvectors of the transition matrix or generalized singular vectors of the count matrix
	describing the dynamics
	n: int
	the dimension n of partial_coarse_graining_matrix_flat before flattening

	Returns
	-------
	float
	the PCCA+ score for the input coarse-graining matrix

	Notes
	-----
	Refer to section 4.3.1 (trace(S)) in the publication by S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+:
	application to Markov state models and data classification. Adv Data Anal Classif 7, 147-179 (2013).

	The implementation of the score assumes that the constant eigenvector (or singular vector) is in position zero and is represented as a
	column of ones.

	The implementation is agnostic to the value of the stationary distribution. In fact the stationary distribution can be replaced by
	any vector of weights (as long as the eigenvectors/ singular vectors are properly orthonormalized). This function can be used for VAMP,
	for G-PCCA or for kinetic coarse-graining of a reversible transition matrix as described in the original PCCA+ publication.
	"""
	partial_coarse_graining_matrix = np.reshape(partial_coarse_graining_matrix_flat, (n, n))
	coarse_graining_matrix = _feasible_coarse_graining_matrix(partial_coarse_graining_matrix=partial_coarse_graining_matrix,
	eigenvectors=eigenvectors)
	return -np.sum(coarse_graining_matrix ** 2 / coarse_graining_matrix[0, np.newaxis, :])


	def _pcca_plus_matrix(eigenvectors: np.ndarray, coarse_graining_matrix: np.ndarray, n_clusters: int, opt_method: str = "Nelder-Mead",
	opt_options: typing.Optional[typing.Mapping[str, typing.Any]] = None) -> np.ndarray:
	"""Find the PCCA+ coarse-graining matrix by optimizing the PCCA+ score.

	Parameters
	----------
	eigenvectors : np.ndarray of shape (N, n)
	the dominant eigenvectors or (possibly generalized) singular vectors of a transition matrix describing the dynamics
	coarse_graining_matrix : np.ndarray of shape (n_clusters, n_clusters)
	initial guess for the coarse_graining_matrix, e.g. as computed with :meth:`_inner_simplex`
	n_clusters: int
	the number of desired clusters, must be compatible with the dimensions of `coarse_graining_matrix`
	opt_method: str, default = "Nelder-Mead"
	Passed as the argument `method` in :meth:`scipy.optimize.minimize`.
	opt_options: Mapping[str, Any] or None, default = None
	Passed as the argument `options` in :meth:`scipy.optimize.minimize`.

	Returns
	-------
	np.ndarray of shape (n, n)
	the optimal coarse graining matrix according to the PCCA+ score
	Right-multiplying the dominant eigenvectors by this returned matrix yields an approximation of the cluster membership vectors.
	The membership vectors `membership[i, j]` describe how each grid point i relates to each kinetic cluster j.
	Memberships sum to one along the cluster dimension (j).

	Notes
	-----
	Refer to the publication by S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+:
	application to Markov state models and data classification. Adv Data Anal Classif 7, 147-179 (2013).
	"""
	if opt_options is None:
	opt_options = {}
	if "disp" not in opt_options:
	opt_options["disp"] = False
	if not np.allclose(eigenvectors[:, 0], 1.):
	raise ValueError("Zeroth eigenvector must consist of ones.")
	eigenvectors = eigenvectors[:, 0:n_clusters]
	partial_coarse_graining_matrix = coarse_graining_matrix[1:, 1:]
	n = partial_coarse_graining_matrix.shape[0]
	if n != partial_coarse_graining_matrix.shape[1]:
	raise ValueError("partial_coarse_graining_matrix must be a square matrix")
	partial_coarse_graining_matrix_flat = np.reshape(partial_coarse_graining_matrix, n * n)
	partial_coarse_graining_matrix_flat_opt = scipy.optimize.minimize(fun=_pcca_plus_score, x0=partial_coarse_graining_matrix_flat,
	args=(eigenvectors, n), method=opt_method, options=opt_options).x
	partial_coarse_graining_matrix_opt = np.reshape(partial_coarse_graining_matrix_flat_opt, (n, n))
	return _feasible_coarse_graining_matrix(partial_coarse_graining_matrix=partial_coarse_graining_matrix_opt, eigenvectors=eigenvectors)


	def _feasible_coarse_graining_matrix(partial_coarse_graining_matrix: np.ndarray, eigenvectors: np.ndarray) -> np.ndarray:
	r"""Convert an incomplete coarse-graining matrix into a full matrix by adding the missing parts such that constrains are fulfilled.

	Parameters
	----------
	partial_coarse_graining_matrix : np.ndarray of shape (n-1, n-1)
	the coarse-graining matrix with the zeroth row and column removed
	eigenvectors : np.ndarray of shape (N, n)
	the n dominant eigenvectors of the transition matrix or generalized singular vectors of the count matrix describing the dynamics

	Returns
	-------
	np.ndarray of shape (n, n)
	the complete coarse-graining matrix with all constraints fulfilled

	Notes
	-----
	The first row of the coarse-graining matrix A is computed by fulfilling the inequality
	\sum_{i=0} V_{k,i} A_{i,j} = A_{0,j} + \sum_{i=1} V_{k,i} A_{i,j} \ge 0 for all k, j
	where V is the matrix of eigenvectors and A is the coarse-graining matrix. This inequality is required to have non-negative cluster
	memberships. The inequality is used to set A_{0,j} = \max_k \left( -\sum_{i=1} V_{k,i} A_{i,j} \right)
	The first column of the coarse-graining matrix A is computed from the identity \sum_j A_{i,j} = \delta_{i,0} which is
	required to obtain a normalized transition matrix (probability conservation) after coarse-graining. The identity is used to set
	A_{i,0} = \delta_{i,0} - \sum{j=1} A_{i,j}

	The implementation of the score assumes that the constant eigenvector (or singular vectors) is in position zero and is represented as a
	column of ones. Furthermore the implementation assumes that the eigenvectors (or singular vectors) are mutually orthogonal with
	respect to some implicit weighting vector.

	Refer to section 4.3 in the publication by S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+:
	application to Markov state models and data classification. Adv Data Anal Classif 7, 147-179 (2013).
	"""
	n = partial_coarse_graining_matrix.shape[0]
	coarse_graining_matrix = np.zeros((n + 1, n + 1))
	coarse_graining_matrix[1:, 1:] = partial_coarse_graining_matrix
	coarse_graining_matrix[1:, 0] = -np.sum(partial_coarse_graining_matrix, axis=1)
	memberships_partial = np.dot(eigenvectors[:, 1:], coarse_graining_matrix[1:, :])
	memberships_partial_max = np.max(-memberships_partial, axis=0)
	coarse_graining_matrix[0, :] = memberships_partial_max
	return coarse_graining_matrix / np.sum(memberships_partial_max)


	def _orthonormalize(vertices: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]:
	"""Convert a set of points to the pair (origin, orthonormal vectors) that span the affine space of the original points.

	Parameters
	----------
	vertices : np.ndarray of shape (n, n)
	some sets of points

	Returns
	-------
	np.ndarray of shape n
	the point at the origin
	np.ndarray of shape (n, n)
	the orthonormal unit vectors
	The point of origin and the the unit vectors span the same affine space as the set of input points.
	"""
	# pick vertex that is closest to zero as the origin
	i0 = np.argmin(np.linalg.norm(vertices, axis=1))
	v0 = vertices[i0, :]
	# for the remaining vertices, subtract v0 and orthonormalize
	a = np.concatenate((vertices[0:i0, :], vertices[i0 + 1:, :])) - v0[np.newaxis, :]
	q, _ = np.linalg.qr(a.T)
	return v0, q.T


	def _pcca_plus_matrix_inner_simplex(eigenvectors: np.ndarray) -> np.ndarray:
	"""Compute a good initial guess of the PCCA+ coarse-graining matrix using the "inner simplex" algorithm.

	Parameters
	----------
	eigenvectors: np.ndarray of shape (N, n)
	the n dominant eigenvectors of the transition matrix or generalized singular vectors of the count matrix describing the dynamics

	Returns
	-------
	np.ndarray of shape (n, n)
	The map from the eigenvector space to the simplex. Right-multiplying the dominant eigenvectors by this returned matrix yields an
	approximation of the cluster membership vectors. The membership vectors `membership[i, j]` describe how each grid point i relates
	to each kinetic cluster j. Memberships sum to one along the cluster dimension (j). Memberships computed with this method can be
	negative. Use :meth:`_pcca_plus_matrix` for non-negative memberships.

	Notes
	-----
	The inner simplex algorithm finds a large n-simplex contained in the point cloud formed by the n dominant eigenfunctions defined on the
	grid points (or k-means clusters centers). This algorithm might not find the larget possible n-simplex.
	The initial guess of the returned PCCA+ coarse-graining matrix maps the n vertices of the simplex to the n canonical Euclidean unit
	vectors.

	Refer to the publication by Weber and Galliat: Characterization of Transition States in Conformational Dynamics using Fuzzy Sets
	Technical Report 02-12, Zuse-Institut Takustr. 7, 14195 Berlin (2002)
	"""
	dim, n_clusters = eigenvectors.shape
	vertices = np.empty((2, n_clusters))
	vertices[0, :] = eigenvectors[np.argmax(np.linalg.norm(eigenvectors, axis=1)), :]
	vertices[1, :] = eigenvectors[np.argmax(np.linalg.norm(eigenvectors - vertices[0, np.newaxis, :], axis=1)), :]
	# further passes, follow the algorithm form Weber & Galliat (similar to quick hull)
	for k in range(vertices.shape[0], n_clusters):
	v0, w = _orthonormalize(vertices)
	projector = np.einsum('ij,ik->jk', w, w) - np.eye(n_clusters)
	dists_candidate = np.linalg.norm(np.dot(eigenvectors - v0[np.newaxis, :], projector), axis=1)
	vertices = np.vstack((vertices, eigenvectors[np.argmax(dists_candidate), :]))
	return np.linalg.inv(vertices)


	def pcca_plus_for_transition_matrix(transition_matrix: np.ndarray, n_clusters: int) -> typing.Tuple[
	np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
	"""Compute the membership vectors for fuzzy kinetic clusters of a transition matrix.

	Parameters
	----------
	transition_matrix : np.ndarray of shape (N, N)
	The grid point-to-grid point (or k-means cluster to k-means cluster) transition matrix.
	n_clusters : int
	The number of states to cluster state space into. Must be 2 or larger.

	Returns
	-------
	np.ndarray of shape (N, n_clusters)
	the cluster memberships; `membership[i, j]` is the membership of grid point i (or k-mean cluster i) in kinetic cluster j.
	Memberships sum to one along the cluster dimension (j).
	np.ndarray of shape (N, n_clusters)
	the right dominant eigenvectors of the transition matrix, column-wise, ordered descendingly by eigenvalue; the constant
	eigenvector is in zeroth position and is encoded as a column of ones.
	np.ndarray of shape N
	the stationary distribution of the transition matrix
	np.ndarray of shape n_clusters
	the dominant eigenvalues of the transition matrix, ordered descendingly by magnitude

	Notes
	-----
	Refer to the publication by S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+:
	application to Markov state models and data classification. Adv Data Anal Classif 7, 147-179 (2013).
	"""
	eigenvalues_left, eigenvectors_left = np.linalg.eig(transition_matrix.T)
	stationary_distribution = eigenvectors_left[:, np.argmax(eigenvalues_left)]
	stationary_distribution = stationary_distribution / np.sum(stationary_distribution)
	eigenvalues, eigenvectors = np.linalg.eig(T)
	order = np.argsort(np.abs(eigenvalues))[::-1]
	eigenvectors = eigenvectors[:, order]
	norm_0 = eigenvectors[:, 0].sum() / eigenvectors.shape[0]
	eigenvectors[:, 0] = eigenvectors[:, 0] / norm_0
	norm_rest = np.sum(eigenvectors[:, 1:] ** 2 * stationary_distribution[:, np.newaxis], axis=0) ** 0.5
	eigenvectors[:, 1:] = eigenvectors[:, 1:] / norm_rest[np.newaxis, :]
	coarse_graining_matrix = _pcca_plus_matrix_inner_simplex(eigenvectors=eigenvectors[:, 0:n_clusters])
	coarse_graining_matrix = _pcca_plus_matrix(eigenvectors=eigenvectors[:, 0:n_clusters], coarse_graining_matrix=coarse_graining_matrix,
	n_clusters=n_clusters)
	return eigenvectors[:, 0:n_clusters].dot(coarse_graining_matrix), eigenvectors, stationary_distribution, eigenvalues[order][
	0:n_clusters]


	if __name__ == "__main__":
	import msmtools


	def same_cols(a, b, **kwargs) -> bool:
	found = set()
	for i, col_a in enumerate(a.T):
	for col_b in b.T:
	if np.allclose(col_a, col_b, **kwargs):
	found.add(i)
	return found == {i for i in range(a.shape[1])}


	def proportional_columns(a, b, **kwargs) -> bool:
	result = True
	for col_a, col_b in zip(a.T, b.T):
	non_zero = col_b != 0
	factor = col_a[non_zero][0] / col_b[non_zero][0]
	result &= np.allclose(col_a, factor * col_b, **kwargs)
	return result


	C = np.random.rand(40, 40)
	C = C + C.T
	T = C / C.sum(axis=1)[:, np.newaxis]

	for n_clusters_ in [2, 3, 4, 5, ]:
	memberships, eigenvectors_, pi, eigenvalues_ = pcca_plus_for_transition_matrix(transition_matrix=T, n_clusters=n_clusters_)
	eps = 1E-7
	assert np.all(memberships <= 1 + eps)
	assert np.all(memberships > - eps)
	assert np.allclose(memberships.sum(axis=1), 1.0)

	A = _pcca_plus_matrix_inner_simplex(eigenvectors_[:, 0:n_clusters_])
	A2 = _pcca_plus_matrix(eigenvectors_, A, n_clusters_)

	A_feasible = _feasible_coarse_graining_matrix(partial_coarse_graining_matrix=A[1:, 1:],
	eigenvectors=eigenvectors_[:, 0:n_clusters_])
	assert np.allclose(A_feasible.sum(axis=1), np.eye(1, A_feasible.shape[0], 0))
	assert np.all(eigenvectors_[:, 0:n_clusters_].dot(A_feasible) >= -eps)

	# check coarse-graining properties of the cluster memberships
	T_cg = np.linalg.inv(np.dot(memberships.T * pi[np.newaxis, :], memberships)).dot(
	memberships.T.dot(T * pi[:, np.newaxis]).dot(memberships))
	assert np.allclose(T_cg.sum(axis=1), np.ones(n_clusters_)) # check normalization
	eigenvalues_cg, eigenvectors_cg = np.linalg.eig(T_cg)
	order_ = np.argsort(np.abs(eigenvalues_cg))[::-1]
	eigenvalues_cg = eigenvalues_cg[order_]
	eigenvectors_cg = eigenvectors_cg[:, order_]
	assert np.allclose(eigenvalues_cg[0:n_clusters_], eigenvalues_) # check eigenvalues

	# check stationary distribution
	eigenvalues_cg_left, eigenvectors_cg_left = np.linalg.eig(T_cg.T)
	stationary_distribution_cg = eigenvectors_cg_left[:, np.argmax(eigenvalues_cg_left)]
	stationary_distribution_cg = stationary_distribution_cg / np.sum(stationary_distribution_cg)
	assert np.allclose(stationary_distribution_cg, memberships.T.dot(pi))

	# check eigenvectors
	assert proportional_columns(eigenvectors_cg, np.linalg.inv(A2)) # equation at the bottom of page 162 in Röblitz & Weber

	# comparison to msmtools
	chi_, Aref = msmtools.analysis.dense.pcca._pcca_connected_isa(eigenvectors_, n_clusters_)
	assert same_cols(A, Aref)
	assert same_cols(eigenvectors_[:, 0:n_clusters_].dot(A), chi_)

	A2ref = msmtools.analysis.dense.pcca._opt_soft(eigenvectors_, Aref, n_clusters_)

	assert same_cols(A2, A2ref)

	memberships_ref = msmtools.analysis.pcca_memberships(T, n_clusters_)
	assert same_cols(memberships, memberships_ref)

	T = np.array([[0.9, 0.1, 0.0, 0.0, 0.0],
	[0.1, 0.9, 0.0, 0.0, 0.0],
	[0.0, 0.1, 0.8, 0.1, 0.0],
	[0.0, 0.0, 0.0, 0.8, 0.2],
	[0.0, 0.0, 0.0, 0.2, 0.8]])
	memberships_, *_ = pcca_plus_for_transition_matrix(transition_matrix=T, n_clusters=2)
	ref = np.array([[1., 0.],
	[1., 0.],
	[0.5, 0.5],
	[0., 1.],
	[0., 1.]])
	np.assert_allclose(memberships_, ref)
	memberships_, *_ = pcca_plus_for_transition_matrix(transition_matrix=T, n_clusters=4)
	ref = np.array([[1., 0., 0., 0.],
	[0., 1., 0., 0.],
	[0., 0.5, 0.5, 0.],
	[0., 0., 1., 0.],
	[0., 0., 0., 1.]])
	np.assert_allclose(memberships_, ref)

	print("OK")
No results found