Source code for pycorrelate.pycorrelate

"""
Functions to compute linear correlation on discrete signals (uniformly
sampled in time) **or** on point-processes (e.g. timestamps of events).
"""

import numpy as np
import numba


[docs]@numba.jit(nopython=True) def pnormalize(G, t, u, bins): r"""Normalize point-process cross-correlation function. This normalization is usually employed for fluorescence correlation spectroscopy (FCS) analysis. The normalization is performed according to `(Laurence 2006) <https://doi.org/10.1364/OL.31.000829>`__. Basically, the input argument `G` is multiplied by: .. math:: \frac{T-\tau}{n(\{i \ni t_i \le T - \tau\})n(\{j \ni u_j \ge \tau\})} where `n({})` is the operator counting the elements in a set, *t* and *u* are the input arrays of the correlation, *τ* is the time lag and *T* is the measurement duration. Arguments: G (array): raw cross-correlation to be normalized. t (array): first input array of "points" used to compute `G`. u (array): second input array of "points" used to compute `G`. bins (array): array of bins used to compute `G`. Needs to have the same units as input arguments `t` and `u`. Returns: Array of normalized values for the cross-correlation function, same size as the input argument `G`. """ duration = max((t.max(), u.max())) - min((t.min(), u.min())) Gn = G.copy() for i, tau in enumerate(bins[1:]): Gn[i] *= ((duration - tau) / (float((t >= tau).sum()) * float((u <= (u.max() - tau)).sum()))) return Gn
[docs]@numba.jit(nopython=True) def pcorrelate(t, u, bins, normalize=False): """Compute correlation of two arrays of discrete events (Point-process). The input arrays need to be values of a point process, such as photon arrival times or positions. The correlation is efficiently computed on an arbitrary array of lag-bins. As an example, bins can be uniformly spaced in log-space and span several orders of magnitudes. (you can use :func:`make_loglags` to creat log-spaced bins). This function implements the algorithm described in `(Laurence 2006) <https://doi.org/10.1364/OL.31.000829>`__. Arguments: t (array): first array of "points" to correlate. The array needs to be monothonically increasing. u (array): second array of "points" to correlate. The array needs to be monothonically increasing. bins (array): bin edges for lags where correlation is computed. normalize (bool): if True, normalize the correlation function as typically done in FCS using :func:`pnormalize`. If False, return the unnormalized correlation function. Returns: Array containing the correlation of `t` and `u`. The size is `len(bins) - 1`. See also: :func:`make_loglags` to genetate log-spaced lag bins. """ nbins = len(bins) - 1 # Array of counts (histogram) counts = np.zeros(nbins, dtype=np.int64) # For each bins, imin is the index of first `u` >= of each left bin edge imin = np.zeros(nbins, dtype=np.int64) # For each bins, imax is the index of first `u` >= of each right bin edge imax = np.zeros(nbins, dtype=np.int64) # For each ti, perform binning of (u - ti) and accumulate counts in Y for ti in t: for k, (tau_min, tau_max) in enumerate(zip(bins[:-1], bins[1:])): if k == 0: j = imin[k] # We start by finding the index of the first `u` element # which is >= of the first bin edge `tau_min` while j < len(u): if u[j] - ti >= tau_min: break j += 1 imin[k] = j if imax[k] > j: j = imax[k] while j < len(u): if u[j] - ti >= tau_max: break j += 1 imax[k] = j # Now j is the index of the first `u` element >= of # the next bin left edge counts += imax - imin G = counts / np.diff(bins) if normalize: G = pnormalize(G, t, u, bins) return G
[docs]@numba.jit def ucorrelate(t, u, maxlag=None): """Compute correlation of two signals defined at uniformly-spaced points. The correlation is defined only for positive lags (including zero). The input arrays represent signals defined at uniformily-spaced points. This function is equivalent to :func:`numpy.correlate`, but can efficiently compute correlations on a limited number of lags. Note that binning point-processes with uniform bins, provides signals that can be passed as argument to this function. Arguments: tx (array): first signal to be correlated ux (array): second signal to be correlated maxlag (int): number of lags where correlation is computed. If None, computes all the lags where signals overlap `min(tx.size, tu.size) - 1`. Returns: Array contained the correlation at different lags. The size of this array is equal to the input argument `maxlag` (if defined) or to `min(tx.size, tu.size) - 1`, and its `dtype` is the same as argument `t`'s. Example: Correlation of two signals `t` and `u`:: >>> t = np.array([1, 2, 0, 0]) >>> u = np.array([0, 1, 1]) >>> pycorrelate.ucorrelate(t, u) array([2, 3, 0]) The same result can be obtained with numpy swapping `t` and `u` and restricting the results only to positive lags:: >>> np.correlate(u, t, mode='full')[t.size - 1:] array([2, 3, 0]) Also works with other types: >>> t = np.array([1.2, 2.4, 0.5, 0.6]) >>> u = np.array([0, 1.2, 1.3]) >>> pycorrelate.ucorrelate(t, u) array([3.53, 4.56, 1.56]) """ if maxlag is None: maxlag = u.size maxlag = int(min(u.size, maxlag)) C = np.empty(maxlag, dtype=t.dtype) for lag in range(C.size): tmax = min(u.size - lag, t.size) umax = min(u.size, t.size + lag) C[lag] = (t[:tmax] * u[lag:umax]).sum() return C
[docs]def make_loglags(exp_min, exp_max, points_per_base, base=10, return_int=True): """Make a log-spaced array useful as lag bins for cross-correlation. This function creates an arrays of log-spaced time-lag bins to be used with :func:`pcorrelate`. By default it returns integer time-lag bins to avoid floating point inaccuracies in the correlation (showing up as higher noise at small time-lags). Arguments: exp_min (int): exponent of the minimum value exp_max (int): exponent of the maximum value points_per_base (int): number of points per base (i.e. in a decade when `base = 10`) base (int): base of the exponent. Default 10. return_int (bool): if True (default) return integer bin edges to avoid floating point inaccuracies. If False, returned bin edges are float. Returns: Array of log-spaced values with specified range and spacing. Example: Compute log10-spaced bins with 5 bins per decade, starting from 1 (10⁰) and stopping at 10⁶:: >>> make_loglags(0, 6, 5) array([ 1, 2, 3, 4, 6, 10, 16, 25, 40, 63, 100, 158, 251, 398, 631, 1000, 1585, 2512, 3981, 6310, 10000, 15849, 25119, 39811, 63096, 100000, 158489, 251189, 398107, 630957, 1000000]) Compute log10-spaced bins with 2 bins per decade, starting from 10⁻¹ and stopping at 10³:: >>> make_loglags(-1, 3, 2, return_int=False) array([ 1.00000000e-01, 3.16227766e-01, 1.00000000e+00, 3.16227766e+00, 1.00000000e+01, 3.16227766e+01, 1.00000000e+02, 3.16227766e+02, 1.00000000e+03]) See also: :func:`pcorrelate` """ num_points = points_per_base * (exp_max - exp_min) + 1 bins = np.logspace(exp_min, exp_max, num_points, base=base) if return_int: # using `unique` because rounding to int may create duplicates bins = np.unique(np.round(bins).astype('int64')) return bins