Source code for occuspytial.utils

import warnings

import numpy as np
from scipy.linalg import pinvh


[docs]def get_generator(random_state=None): """Get an instance of a numpy random number generator object. This instance uses `SFC64 <https://tinyurl.com/y2jtyly7>`_ bitgenerator, which is the fastest numpy currently has to offer as of version 1.19. This function conveniently instantiates a generator of this kind and should be used in all modules. Parameters ---------- random_state : {None, int, array_like[ints], numpy.random.SeedSequence} A seed to initialize the bitgenerator. Defaults to ``None``. Returns ------- numpy.random.Generator Instance of numpy's Generator class, which exposes a number of random number generating methods. Examples -------- >>> from occuspytial.utils import get_generator >>> rng = get_generator() # The instance can be used to access functions of ``numpy.random`` >>> rng.standard_normal() -0.203 # random """ bitgenerator = np.random.SFC64(random_state) return np.random.default_rng(bitgenerator)
[docs]def rand_precision_mat(lat_row, lat_col, max_neighbors=8, rho=1): """Generate a random spatial precision matrix. The spatial precision matrix is generated using a rectengular lattice of dimensions `lat_row` x `lat_col`, and thus the row and colum size of the matrix is (`lat_row` x `lat_col`). Parameters ---------- lat_row : int Number of rows of the lattice used to generate the matrix. lat_col : int Number of columns of the lattice used to generate the matrix. max_neighbors : {4, 8}, optional The maximum number of neighbors for each site. The default is 8. rho : float, optional The spatial weight parameter. Takes values between 0 and 1, with 0 implying independent random effects and 1 implying strong spatial autocorrelation. Setting the value to 1 is equivalent to generating the Intrinsic Autoregressive Model. Returns ------- scipy.sparse.coo_matrix Spatial precision matrix Raises ------ ValueError If the `max_neighbours` is any value other than 4 or 8. Examples -------- >>> from occuspytial.utils import rand_precision_mat >>> Q = rand_precision_mat(10, 5) >>> Q <50x50 sparse matrix of type '<class 'numpy.int64'>' with 364 stored elements in COOrdinate format> # The matrix can be converted to numpy format using method ``toarray()`` >>> Q.toarray() array([[ 3, -1, 0, ..., 0, 0, 0], [-1, 5, -1, ..., 0, 0, 0], [ 0, -1, 5, ..., 0, 0, 0], ..., [ 0, 0, 0, ..., 5, -1, 0], [ 0, 0, 0, ..., -1, 5, -1], [ 0, 0, 0, ..., 0, -1, 3]]) """ if max_neighbors == 8: nn = 'queen' elif max_neighbors == 4: nn = 'rook' else: raise ValueError('Maximum number of neighbors should be one of {4, 8}') with warnings.catch_warnings(): # ignore the "geopandas not available" warning since it is not relevant warnings.simplefilter('ignore', UserWarning) import libpysal W = libpysal.weights.lat2SW(lat_row, lat_col, criterion=nn, row_st=False) W = W.tocoo() D = W.sum(axis=1).A1 W.data = -W.data * rho W.setdiag(D) return W
[docs]def make_data( n=150, min_v=None, max_v=None, ns=None, p=3, q=3, tau_range=(0.25, 1.5), max_neighbors=8, random_state=None, ): """Generate random data to use for modelling species occupancy. Parameters ---------- n : int, optional Number of sites. Defaults to 150. min_v : int, optional Minimum number of visits per site. If None, the maximum number is set to 2. Defaults to None. max_v : int, optional Maximum number of visits per site. If None, the maximum number is set to 10% of `n`. Defaults to None. ns : int, optional Number of surveyed sites out of `n`. If None, then this parameter is set to 50% of `n`. Defaults to None. p : int, optional Number covariates to use for species occupancy. Defaults to 3. q : int, optional Number of covariates to use for conditonal detection. Defaults to 3. tau_range : tuple, optional The range to randomly sample the precision parameter value from. Defaults to (0.25, 1.5). max_neighbors : int, optional Maximum number of neighbors per site. Should be one of {4, 8}. Default is 8. random_state : int, optional The seed to use for random number generation. Useful for reproducing generated data. If None then a random seed is chosen. Defaults to None. Returns ------- Q : scipy.sparse.coo_matrix Spatial precision matrix W : Dict[int, np.ndarray] Dictionary of detection corariates where the keys are the site numbers of the surveyed sites and the values are arrays containing the design matrix of each corresponding site. X : np.ndarray Design matrix of species occupancy covariates. y : Dict[int, np.ndarray] Dictionary of survey data where the keys are the site numbers of the surveyed sites and the values are number arrays of 1's and 0's where 0's indicate "no detection" and 1's indicate "detection". The length of each array equals the number of visits in the corresponding site. alpha : np.ndarray True values of coefficients of detection covariates. beta : np.ndarray True values of coefficients of occupancy covariates. tau : np.ndarray True value of the precision parameter z : np.ndarray True occupancy state for all `n` sites. Raises ------ ValueError When `n` is less than the default 150 sites. When `min_v` is less than 1. When `max_v` is less than 2 or greater than `n`. When `ns` is not a positive integer or greater than `n`. Examples -------- >>> from occuspytial.utils import make_data >>> Q, W, X, y, alpha, beta, tau, z = make_data() >>> Q <150x150 sparse matrix of type '<class 'numpy.float64'>' with 1144 stored elements in COOrdinate format> >>> Q.toarray() array([[ 3., -1., 0., ..., 0., 0., 0.], # random [-1., 5., -1., ..., 0., 0., 0.], [ 0., -1., 5., ..., 0., 0., 0.], ..., [ 0., 0., 0., ..., 5., -1., 0.], [ 0., 0., 0., ..., -1., 5., -1.], [ 0., 0., 0., ..., 0., -1., 3.]]) >>> W {81: array([[ 1. , 1.01334565, 0.93150242], # random [ 1. , 0.19276808, -1.71939657], [ 1. , 0.23866531, 0.0559545 ], [ 1. , 1.36102304, 1.73611887], [ 1. , 0.47247886, 0.73410589], [ 1. , -1.9018879 , 0.0097963 ]]), 131: array([[ 1. , 1.67846707, -1.12476746], [ 1. , -1.63131532, -1.32216705], [ 1. , -1.37431173, -0.79734213], ..., 21: array([[ 1. , 1.6416734 , -1.91642502], [ 1. , 0.2256312 , -1.68929118], [ 1. , 1.36953093, 1.08758129], [ 1. , -1.08029212, 0.40219588]])} >>> X array([[ 1. , 0.71582433, 1.76344395], [ 1. , 0.8561976 , 1.0520401 ], [ 1. , -0.28051247, 0.16809809], ..., [ 1. , 0.86702262, -1.18225448], [ 1. , -0.41346399, -0.9633078 ], [ 1. , -0.23182363, 1.69930761]]) >>> y {15: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), # random 81: array([0, 0, 0, 1, 1, 0]), ..., 21: array([0, 1, 0, 0])} >>> alpha array([-1.43291816, -0.87932413, -1.84927642]) # random >>> beta array([-0.62084322, -1.09645564, -0.93371374]) # random >>> tau 1.415532667780688 # random >>> z array([0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0]) """ rng = get_generator(random_state) if n < 150: raise ValueError('n cant be lower than 150') if min_v is None: min_v = 2 elif min_v < 1: raise ValueError('min_v needs to be at least 1') if max_v is None: max_v = n // 10 elif max_v < 2: raise ValueError('max_v is too small') elif max_v > n: raise ValueError('max_v cant be more than n') if ns is None: ns = n // 2 elif ns == 0: raise ValueError('ns should be positive') elif ns > n: raise ValueError('ns cant be more than n') surveyed_sites = rng.choice(range(n), size=ns, replace=False) visits_per_site = rng.integers(min_v, max_v, size=ns, endpoint=True) alpha = rng.standard_normal(q) beta = rng.standard_normal(p) tau = rng.uniform(*tau_range) factors = [] for i in range(3, n): if (n % i) == 0: factors.append(i) row = rng.choice(factors) col = n // row Q = rand_precision_mat(row, col, max_neighbors=max_neighbors).astype(float) Q_pinv = pinvh(Q.toarray(), cond=1e-5) eta = rng.multivariate_normal(np.zeros(n), Q_pinv / tau, method='eigh') X = rng.uniform(-2, 2, n * p).reshape(n, -1) X[:, 0] = 1 psi = np.exp(-np.logaddexp(0, -X @ beta + eta)) z = rng.binomial(1, p=psi, size=n) W, y = {}, {} for i, j in zip(surveyed_sites, visits_per_site): _W = rng.uniform(-2, 2, size=j * q).reshape(j, -1) _W[:, 0] = 1 d = np.exp(-np.logaddexp(0, -_W @ alpha)) W[i] = _W y[i] = rng.binomial(1, z[i] * d) return Q, W, X, y, alpha, beta, tau, z