Source code for occuspytial.gibbs.logit

from math import sqrt

import numpy as np
from numpy.linalg import multi_dot
from polyagamma import random_polyagamma
from scipy.linalg import solve_triangular
from scipy.sparse import block_diag
from scipy.sparse.linalg import minres
from scipy.special import expit

from ..distributions import ensure_sums_to_zero, precision_mvnorm

from .base import GibbsBase


class _EtaICARPosterior:
    r"""Posterior distribution of the eta parameter of LogitICARGibbs.

    Parameters
    ----------
    Q : scipy sparse matrix
        ICAR precision matrix

    Methods
    -------
    rvs(b, omega, tau)

    Notes
    -----
    The distribution is a Multivariate Normal truncated on the hyperplane
    :math:`\mathbf{1}^T\mathbf{x} = \mathbf{0}`, and is of the form:

    .. math::

        \mathcal{N}(\mathbf{\Lambda}^{-1}\mathbf{b}, \mathbf{\Lambda}^{-1})


    where :math:`\mathbf{\Lambda} = (\tau \mathbf{Q} + \mathbf{S})`.

    This implementation allows one to sample from this normal distribution
    without explicitly factorizing :math:`\mathbf{\Lambda}` or computing its
    inverse. Since :math:`\mathbf{Q}` is known beforehand and can be factorized
    exactly once, and that :math:`\mathbf{S}` is diagonal, a random draw from
    this distribution can be computed efficiently as follows:

        1. Compute the square-root of :math:`\mathbf{S}` and multiply it by
           a standard normal draw.
        2. Muliply :math`\tau` by the pre-computed eigenfactor of
          :math:`\mathbf{Q}` and a standard normal draw of appropriate size.
        3. Sum the result of step 1) and 2) with :math:`\mathbf{b}`. The
           resulting array has the distribution

           .. math::

              \mathbf{y} = \mathcal{N}(\mathbf{b}, \mathbf{\Lambda})

        4. To get the draw from the desired distribution, we solve the linear
           system :math::`\mathbf{\Lambda}\mathbf{x} = \mathbf{y}` for
           :math:`\mathbf{x}`, and apply Algorithm 2 of [1]_ where the
           :math:`\mathbf{G}` in our case is :math:`\mathbf{1}^T` and
           :math:`\mathbf{r}` is :math:`\mathbf{0}`.
    """

    def __init__(self, Q):
        self._block_Q = block_diag((Q, Q), format='csc')
        s, u = np.linalg.eigh(Q.toarray())
        self._eigen = u[:, 1:] * np.sqrt(s[1:])
        self._n = Q.shape[0]
        self._rhs = np.ones(self._n * 2)
        self._n_plus_k = self._n + self._eigen.shape[1]
        self._guess = None

    def rvs(self, b, omega, tau, random_state):
        """Generate a random draw from this distribution."""
        eps = random_state.standard_normal(self._n_plus_k)
        rnorm1 = np.sqrt(omega) * eps[:self._n]
        rnorm2 = self._eigen @ (sqrt(tau) * eps[self._n:])
        out = b + rnorm1 + rnorm2

        block_prec = self._block_Q.copy()
        block_prec.data = tau * block_prec.data
        diag_vec = np.tile(omega, 2)
        block_prec.setdiag(block_prec.diagonal() + diag_vec)

        self._rhs[:self._n] = out
        # Iterative solvers are efficient at solving sparse large systems.
        xz, fail = minres(block_prec, self._rhs, x0=self._guess)
        # update starting guess for next call to the function
        self._guess = xz

        if fail:
            raise RuntimeError('MINRES solver did not converge!')

        x = xz[:self._n]
        z = xz[self._n:]

        ensure_sums_to_zero(x, z, out)

        return out


[docs]class LogitICARGibbs(GibbsBase): r"""Gibbs sample using logit link and ICAR model for spatial random effects. Parameters ---------- Q : np.ndarray Spatial precision matrix of spatial random effects. W : Dict[int, np.ndarray] Dictionary of detection corariates where the keys are the site numbers of the surveyed sites and the values are arrays containing the design matrix of each corresponding site. X : np.ndarray Design matrix of species occupancy covariates. y : Dict[int, np.ndarray] Dictionary of survey data where the keys are the site numbers of the surveyed sites and the values are number arrays of 1's and 0's where 0's indicate "no detection" and 1's indicate "detection". The length of each array equals the number of visits in the corresponding site. hparams : {None, Dict[str, Union[float, np.ndarray]}, optional Hyperparameters of the occupancy model. valid keys for the dictionary are: - ``a_mu``: mean of the normal prior of detection covariates. - ``a_prec``: precision matrix of the normal prior of detection covariates. - ``b_mu``: mean of the normal prior of occupancy covariates. - ``b_prec``: precision matrix of the normal prior of occupancy covariates. - ``tau_rate``: rate parameter of the Gamma prior of the spatial parameter. - ``tau_shape``: shape parameter of the Gamma prior of the spatial parameter. random_state : {None, int, numpy.random.SeedSequence} A seed to initialize the bitgenerator. pertub : float, optional The value by which to pertube the diagonal of the spatial precision matrix in order to stabilize the cholesky factorization of the posterior precision matrix of the :math:`\\eta` parameter. This is sometimes referred to as modified cholesky decompostion [1]_ . It helps get numerically stable samples from the conditional distribution of this parameter. Methods ------- sample(size, burnin=0, start=None, chains=1, progressbar=True) See Also -------- occuspytial.gibbs.probit.ProbitRSRGibbs : A gibbs sampler using a probit link function Notes ----- The algorithm developed here is the same as the one presented in [2]_ except the model used to account for spatial correlation is the Intrinsic Conditional Autoregressive (ICAR) model. A polya-gamma random variable distribution is used in a data augmentation strategy in order to obtain known conditional distributions to sample from, thus arising the Gibbs sampler impelemented here. Details of the sampler are explained in [2]_ . References ---------- .. [1] McSweeney, T. Modified Cholesky Decomposition and Applications. 2017; Masters thesis, University of Manchester. .. [2] Clark, AE, Altwegg, R. Efficient Bayesian analysis of occupancy models with logit link functions. Ecol Evol. 2019; 9: 756– 768. https://doi.org/10.1002/ece3.4850. """ def __init__(self, Q, W, X, y, hparams=None, random_state=None): super().__init__(Q, W, X, y, hparams, random_state) self._configure(Q, hparams) def _configure(self, Q, hparams): super()._configure(Q, hparams) self.dists.eta_posterior = _EtaICARPosterior(self.fixed.Q) def _update_omega_a(self): """Update the latent variable ``omega_a``. This variable is ssociated with the cofficients of the conditional detection covariates. """ # get occupancy state of the sites where species was not observed. not_obs_occupancy = [i for i in self.fixed.not_obs if self.state.z[i]] self.state.exists = self.fixed.obs + not_obs_occupancy self.state.W = self.W[self.state.exists] b = self.state.W @ self.state.alpha self.state.omega_a = random_polyagamma( 1, b, disable_checks=True, random_state=self.rng ) def _update_omega_b(self): """Update the latent variable ``omega_b``. This variable is associated with the cofficients of the occupancy covariates. """ b = self.X @ self.state.beta + self.state.spatial self.state.omega_b = random_polyagamma( 1, b, disable_checks=True, random_state=self.rng ) def _update_tau(self): eta = self.state.eta rate = 0.5 * (eta @ self.fixed.Q @ eta) + self.fixed.tau_rate self.state.tau = self.rng.gamma(self.fixed.tau_shape, 1 / rate) def _update_eta(self): omega = self.state.omega_b b = self.state.k - (omega * (self.X @ self.state.beta)) self.state.eta = self.dists.eta_posterior.rvs( b, omega, self.state.tau, self.rng ) self.state.spatial = self.state.eta def _update_alpha(self): WT = self.state.W.T y = self.y[self.state.exists] - 0.5 A = (WT * self.state.omega_a) @ WT.T + self.fixed.a_prec b = WT @ y + self.fixed.a_prec_by_mu self.state.alpha = precision_mvnorm(b, A, random_state=self.rng) def _update_beta(self): spat = self.state.spatial omega = self.state.omega_b XT = self.X.T A = (XT * omega) @ self.X + self.fixed.b_prec b = XT @ (self.state.k - (omega * spat)) + self.fixed.b_prec_by_mu self.state.beta = precision_mvnorm(b, A, random_state=self.rng) def _update_z(self): """Update the occupancy state of each site.""" no = self.fixed.not_obs ns = self.fixed.not_surveyed beta = self.state.beta spat = self.state.spatial num1 = expit(self.X[no] @ beta + spat[no]) num2 = expit(self.fixed.W_not_obs @ -self.state.alpha) stack_prod = np.multiply.reduceat(num2, self.fixed.stacked_w_indices) num = num1 * stack_prod p = num / ((1 - num1) + num) self.state.z[no] = self.rng.uniform(size=self.fixed.n_no) < p if ns: p = expit(self.X[ns] @ beta + spat[ns]) self.state.z[ns] = self.rng.uniform(size=self.fixed.n_ns) < p self.state.k = self.state.z - 0.5
[docs] def step(self): """Complete one gibbs sampler update. The method should not be called directly. It is called internally by the ``sample`` method. """ self._update_omega_b() self._update_tau() self._update_eta() self._update_beta() self._update_omega_a() self._update_alpha() self._update_z()
class _EtaRSRPosterior: r"""Posterior distribution of the eta parameter of LogitRSRGibbs. Parameters ---------- Q : np.ndarray RSR precision matrix. Methods ------- rvs(b, omega, tau) Notes ----- The distribution is of the form: .. math:: \mathcal{N}(\mathbf{\Lambda}^{-1}\mathbf{b}, \mathbf{\Lambda}^{-1}) where .. math:: \mathbf{\Lambda} = (\tau \mathbf{Q}+\mathbf{K}^{-1}\mathbf{S}\mathbf{K}) This implementation allows one to sample from this normal distribution without explicitly factorizing :math:`\mathbf{\Lambda}` or computing its inverse. Since :math:`\mathbf{Q}` and :math:`\mathbf{K}` are known beforehand and can be factorized exactly once, and that :math:`\mathbf{S}` is diagonal, a random draw from this distribution can be computed efficiently as follows: 1. Multiply :math:`\mathbf{K}^{-1}` with the square-root of :math:`\mathbf{S}` and a standard normal draw. 2. Muliply :math`\tau` by the pre-computed eigenfactor of :math:`\mathbf{Q}` and a standard normal draw of appropriate size. 3. Sum the result of step 1) and 2) with :math:`\mathbf{b}`. The resulting array has the distribution .. math:: \mathbf{y} = \mathcal{N}(\mathbf{b}, \mathbf{\Lambda}) 4. To get the draw from the desired distribution, we solve the linear system :math::`\mathbf{\Lambda}\mathbf{x} = \mathbf{y}` for :math:`\mathbf{x}` """ def __init__(self, Q, K): s, u = np.linalg.eigh(Q) self._Q = Q self._eigen = u * np.sqrt(s) self._KT = K.T self._n = Q.shape[0] self._n_plus_k = self._n + K.shape[0] def rvs(self, b, omega, tau, random_state): """Generate a random draw from this distribution.""" factor1 = self._KT * np.sqrt(omega) factor2 = sqrt(tau) * self._eigen eps = random_state.standard_normal(self._n_plus_k) rnorm1 = factor1 @ eps[self._n:] rnorm2 = factor2 @ eps[:self._n] out = b + rnorm1 + rnorm2 prec = factor1 @ factor1.T + tau * self._Q out = np.linalg.solve(prec, out) return out
[docs]class LogitRSRGibbs(LogitICARGibbs): """Gibbs sampler using logit link and RSR model for spatial random effects. This algorithm is an implementation of the gibbs sampler in [1]_ where a Reduced Spatial Regression (RSR) model is used to account for spatial autocorrelation in a single-season site occupancy model. Parameters ---------- Q : np.ndarray Spatial precision matrix of spatial random effects. W : Dict[int, np.ndarray] Dictionary of detection corariates where the keys are the site numbers of the surveyed sites and the values are arrays containing the design matrix of each corresponding site. X : np.ndarray Design matrix of species occupancy covariates. y : Dict[int, np.ndarray] Dictionary of survey data where the keys are the site numbers of the surveyed sites and the values are number arrays of 1's and 0's where 0's indicate "no detection" and 1's indicate "detection". The length of each array equals the number of visits in the corresponding site. hparams : {None, Dict[str, Union[float, np.ndarray]}, optional Hyperparameters of the occupancy model. valid keys for the dictionary are: - ``a_mu``: mean of the normal prior of detection covariates. - ``a_prec``: precision matrix of the normal prior of detection covariates. - ``b_mu``: mean of the normal prior of occupancy covariates. - ``b_prec``: precision matrix of the normal prior of occupancy covariates. - ``tau_rate``: rate parameter of the Gamma prior of the spatial parameter. - ``tau_shape``: shape parameter of the Gamma prior of the spatial parameter. random_state : {None, int, numpy.random.SeedSequence} A seed to initialize the bitgenerator. r : float, optional The threshold of non-negative eigenvalues to keep of the Moran matrix to form the RSR precision matrix. Defaults to 0.5, meaning only columns of the Moran matrix that have corresponding eigenvalues greater than 0.5 will be used. If `q` is set, then this parameter is ignored. q : int, optional The number of columns of the Moran matrix to use in order to form the spatial precision matrix of the RSR model. If this parameter is used, then the value of the `r` parameter is ignore. If the value is None, then the default value of `r` is used to create the spatial precision matrix of the RSR model. Defaults to None. Methods ------- sample(size, burnin=0, start=None, chains=1, progressbar=True) See Also -------- occuspytial.gibbs.logiit.LogitICARGibbs : The same sampler but using an ICAR model. occuspytial.gibbs.probit.ProbitRSRGibbs : A gibbs sampler using a probit link function. References ---------- .. [1] Clark, AE, Altwegg, R. Efficient Bayesian analysis of occupancy models with logit link functions. Ecol Evol. 2019; 9: 756– 768. https://doi.org/10.1002/ece3.4850. """ def __init__( self, Q, W, X, y, hparams=None, random_state=None, r=0.5, q=None, ): super().__init__(Q, W, X, y, hparams, random_state) self._configure_rsr(r, q, hparams) def _configure_rsr(self, r, q, hparams): # XTX_i = inv(self.X.T @ self.X) chol = np.linalg.cholesky(self.X.T @ self.X) z = solve_triangular(chol, np.eye(self.X.shape[1]), lower=True) XTX_i = solve_triangular(chol, z, lower=True, trans=1) # P = I - X @ XTX_i @ XT P = -multi_dot([self.X, XTX_i, self.X.T]) P[np.diag_indices_from(P)] += 1 A = self.fixed.Q.copy() A.data = -A.data A.setdiag(0) omega = self.fixed.n * (P.T @ A @ P) / A.sum() # return eigen vectors of omega and keep first q columns # corresponding to the q largest eigenvalues of omega greater # than the specified threshold w, v = np.linalg.eigh(omega) if q: self.fixed.q = q else: if not 0 <= r <= 1: raise ValueError('Threshold value needs to be in [0, 1]') # number of eigenvalues > threshold self.fixed.q = w[w >= r].size if not self.fixed.q: raise ValueError( 'The Moran Operator Matrix of the data has no positive ' 'eigenvalues. Set threshold to a lower value' ) # keep first q eigenvectors of ordered eigens K = v[:, -self.fixed.q:] # replace Q with Minv Q_copy = self.fixed.Q del self.fixed.Q self.fixed.Q = K.T @ Q_copy @ K self.fixed.K = K if not hparams: # `_set_default_hyperparams` has been called so modify tau_shape del self.fixed.tau_shape self.fixed.tau_shape = 0.5 + 0.5 * self.fixed.q del self.dists.eta_posterior self.dists.eta_posterior = _EtaRSRPosterior(self.fixed.Q, K) def _initialize_default_start(self, state): state = super()._initialize_default_start(state) state.eta = self.rng.normal(scale=5, size=self.fixed.q) state.spatial = self.fixed.K @ self.state.eta return state def _initialize_posterior_state(self, start=None): if start is None: self.state = self._initialize_default_start(self.state) else: self.state.alpha = start['alpha'] self.state.beta = start['beta'] self.state.tau = start['tau'] self.state.eta = start['eta'] self.state.spatial = self.fixed.K @ self.state.eta def _update_eta(self): K = self.fixed.K omega = self.state.omega_b b = K.T @ (self.state.k - omega * (self.X @ self.state.beta)) self.state.eta = self.dists.eta_posterior.rvs( b, omega, self.state.tau, self.rng ) self.state.spatial = K @ self.state.eta