Below are a few examples of common statistical models implemented in greta.
A simple, one-variable Bayesian linear regression model using the attitude data
# variables & priors
<- normal(0, 10)
int <- normal(0, 10)
coef <- cauchy(0, 3, truncation = c(0, Inf))
sd
# linear predictor
<- int + coef * attitude$complaints
mu
# observation model
distribution(attitude$rating) <- normal(mu, sd)
A multi-variable Bayesian linear regression model using the attitude data
data(attitude)
<- as.matrix(attitude[, 2:7]) design
<- normal(0, 10)
int <- normal(0, 10, dim = ncol(design))
coefs <- cauchy(0, 3, truncation = c(0, Inf))
sd
# matrix multiplication is more efficient than multiplying the coefficients
# separately
<- int + design %*% coefs
mu
distribution(attitude$rating) <- normal(mu, sd)
A multiple Bayesian linear regression model using the warpbreaks
data.
data("warpbreaks")
<- as_data(model.matrix(breaks ~ wool + tension, warpbreaks))
X <- as_data(warpbreaks$breaks) y
<- variable()
int <- normal(0, 5, dim = ncol(X) - 1)
coefs <- c(int, coefs)
beta
<- X %*% beta
eta
distribution(y) <- poisson(exp(eta))
A multi-variable Bayesian categorical regression model using the iris data.
data(iris)
<- as_data(cbind(1, iris[, 1:4]))
X <- model.matrix(~ Species - 1, iris)
y <- ncol(X)
P <- ncol(y) K
<- normal(0, 5, dim = c(P, K - 1))
beta <- X %*% beta
eta <- imultilogit(eta)
prob distribution(y) <- categorical(prob)
A multi-variable Bayesian linear regression model using an exponential-normal prior for the coefficients.
data(attitude)
<- as.matrix(attitude[, 2:7]) design
<- normal(0, 10)
int <- cauchy(0, 3, truncation = c(0, Inf))
sd
<- exponential(0.5, dim = ncol(design))
tau <- normal(0, tau)
coefs <- int + design %*% coefs
mu
distribution(attitude$rating) <- normal(mu, sd)
A hierarchical, Bayesian linear regression model using the iris data, with random intercepts for each of the three species.
# linear model parameters
<- normal(0, 10)
int <- normal(0, 10)
coef <- cauchy(0, 3, truncation = c(0, Inf))
sd
# hierarchical model for species effect; use the first species as the baseline
# like in lm()
<- lognormal(0, 1)
species_sd <- normal(0, species_sd, dim = 2)
species_offset <- rbind(0, species_offset)
species_effect <- as.numeric(iris$Species)
species_id
# model
<- int + coef * iris$Sepal.Width + species_effect[species_id]
mu distribution(iris$Sepal.Length) <- normal(mu, sd)
A hierarchical, Bayesian linear regression model using the iris data, with random intercepts and slopes for each of the three species. The slopes and intercepts for each species are uncorrelated in this example.
# linear model parameters
<- normal(0, 10)
int <- normal(0, 10)
coef <- cauchy(0, 3, truncation = c(0, Inf))
sd
<- as.numeric(iris$Species)
species_id
# random intercepts
<- lognormal(0, 1)
species_int_sd <- normal(0, species_int_sd, dim = 2)
species_int <- rbind(0, species_int)
species_int_eff
# random slopes
<- lognormal(0, 1)
species_slope_sd <- normal(0, species_slope_sd, dim = 2)
species_slope <- rbind(0, species_slope)
species_slope_eff
# model
<- int + coef * iris$Sepal.Width + species_int_eff[species_id] + iris$Sepal.Width * species_slope_eff[species_id]
mu distribution(iris$Sepal.Length) <- normal(mu, sd)
The following examples show some common Bayesian priors of which some induce sparsity.
A simple, one-variable Bayesian linear regression model that uses flat priors for the coefficients. A flat prior using variable
puts an unbounded uniform distribution on the parameter. With unconstrained flat priors, the posterior will be proportional to the likelihood and the MAP will correspond to the MLE. Flat priors are usually chosen when there is little knowledge about the parameters available.
# variables & priors
<- variable()
int <- variable()
coef <- cauchy(0, 3, truncation = c(0, Inf))
sd
# linear predictor
<- int + coef * attitude$complaints
mu
# observation model
distribution(attitude$rating) <- normal(mu, sd)
Here we estimate a simple, one-variable Bayesian linear regression model that uses a ridge prior. The ridge prior has a frequentist interpretation where it is used as a penalty for regression coefficients. Among other effects, the penalty shrinks the coefficients towards zero to reduce variance without setting them to zero. The Bayesian version uses a normal distribution for the slopes and a inverse gamma prior for the strength of the penalty. Note that since the prior in our intercept is still improper, the joint prior is also improper.
# variables & priors
<- variable()
int <- cauchy(0, 3, truncation = c(0, Inf))
sd
<- inverse_gamma(1, 1)
tau <- normal(0, tau)
coef
# linear predictor
<- int + coef * attitude$complaints
mu
# observation model
distribution(attitude$rating) <- normal(mu, sd)
In this example we infer the parameters of one-variable Bayesian linear regression model using an exponential-normal prior. A compound exponential-normal prior can be interpreted like an equivalent to the frequentist LASSO. The exponential-normal prior yields a posterior that is pooled towards zero. An exponential-normal prior, or equivalently a Laplace prior, is consequently often chosen when a sparse solution is assumed, which, for instance, is a natural scenario in many biological settings.
# variables & priors
<- variable()
int <- inverse_gamma(1, 1)
sd
<- gamma(1, 1)
lambda <- exponential(0.5 * lambda**2)
tau <- normal(0, tau)
coef
# linear predictor
<- int + coef * attitude$complaints
mu
# observation model
distribution(attitude$rating) <- normal(mu, sd)
A simple, one-variable Bayesian linear regression model using a horseshoe prior. The horseshoe, just as the LASSO, can be used when the slopes are assumed to be sparse. According to the original publication: > its flat, Cauchy-like tails allow strong signals to remain large […] > a posteriori. Yet its infinitely tall spike at the origin provides > severe shrinkage for the zero elements
<- function (tau = 1, dim = NULL) {
horseshoe <- cauchy(0, 1, truncation = c(0, Inf), dim = dim)
lambda <- tau ^ 2 * lambda ^ 2
sd normal(0, sd, dim = dim)
}
# variables & priors
<- variable()
int <- inverse_gamma(1, 1)
sd <- horseshoe()
coef
# linear predictor
<- int + coef * attitude$complaints
mu
# observation model
distribution(attitude$rating) <- normal(mu, sd)
The regularized (‘Finnish’) horseshoe (doi.org/10.1214/17-EJS1337SI) remedies a problem of the original horseshoe: large, unregularized values for the coefficients. This is especially problematic in scenarios where the parameters are only weakly identified by the data, as in logistic regression with perfectly seperable data.
<- function (tau = 1, c = 1, dim = NULL) {
regularized_horseshoe stopifnot(c > 0)
<- cauchy(0, 1, truncation = c(0, Inf), dim = dim)
lambda <- (c^2 * lambda^2) / (c^2 + tau^2 * lambda^2)
lambda_tilde <- tau ^ 2 * lambda_tilde ^ 2
sd normal(0, sd, dim = dim)
}
# variables & priors
<- variable()
int <- inverse_gamma(1, 1)
sd <- regularized_horseshoe()
coef
# linear predictor
<- int + coef * attitude$complaints
mu
# observation model
distribution(attitude$rating) <- normal(mu, sd)
Below are some more advanced examples implemented in greta.
A hierarchical, Bayesian linear regression model using the iris data, with random intercepts and slopes for each of the three species. The slopes and intercepts for each species are correlated in this example. We allow every species to have a species specific slope for Sepal.Length
.
<- normal(0, 10)
int <- normal(0, 10)
coef <- cauchy(0, 3, truncation = c(0, Inf))
sd
<- length(unique(iris$Species))
n_species <- as.numeric(iris$Species)
species_id
<- model.matrix(~ Species + Sepal.Length * Species - 1, data = iris)
Z
<- multivariate_normal(matrix(0, 1, 2),
gamma_matrix diag(2),
n_realisations = 3)
<- c(gamma_matrix)
gamma
<- as_data(iris$Sepal.Width)
wi <- as_data(Z)
Z <- int + coef * wi + Z %*% gamma
mu
distribution(iris$Sepal.Length) <- normal(mu, sd)
A hierarchical, Bayesian linear regression model using the iris data, with random intercepts and slopes for each of the three species. This time we try to set up the marginal model, i.e. when we integrate the conditional density.
<- variable()
int <- normal(0, 5)
coef <- cauchy(0, 3, truncation = c(0, Inf))
sd
<- length(unique(iris$Species))
n_species <- as.numeric(iris$Species)
species_id
<- model.matrix(~ Species + Sepal.Length * Species - 1, data = iris)
Z <- zeros(n_species * 2, n_species * 2)
G
for (s in unique(species_id)) {
c(s, s + n_species), c(s, s + n_species)] <- diag(2)
G[
}
<- int + coef * iris$Sepal.Width
mu <- zeros(nrow(iris), nrow(iris))
V diag(V) <- sd
<- as_data(Z)
Z <- V + Z %*% G %*% t(Z)
V
<- t(iris$Sepal.Width)
sep distribution(sep) <- multivariate_normal(t(mu), V)
Bayesian neural network estimates an easy neural network with a normal prior on the edge weights. For clarity we use an architecture without a hidden layer, such that the weights actually correspond to coefficients in a linear regression model.
N <- 100
p <- 10
set.seed(23)
X <- matrix(rnorm(N * p), N)
beta <- rnorm(10)
y <- X %*% beta + rnorm(N, sd = 0.1)
<- function(x)
neural_network
{# this can be arbitrarily complex, e.g. multiple hidden layers
%*% weights
x
}
<- normal(0, 1, dim = c(p, 1))
weights <- inverse_gamma(1, 1)
sd
distribution(y) <- normal(neural_network(X), sd)
Factor analysis is a linear latent model used for finding a lower-dimensional probabilistic description of a data set with observations \(\mathbf{x}_i \in \mathbb{R}^p\). We assume the data are generated according to \[ \mathbf{x}_i = \mathbf{W} \mathbf{z}_i + \boldsymbol \mu + \epsilon_i \] where the noise \(\epsilon\) is normally distributed with zero mean and diagonal covariance matrix \(\Psi = \mathrm{diag}(\psi_1, \dots, \psi_p)\). The goal of factor analysis is to estimate the latent variables \(\mathbf{z}_i \mathbb{R}^q\).
In this example we take the mean vector \(\boldsymbol \mu\) to be zero.
<- normal(0, 1, dim = c(p, q))
W <- normal(0, 1, dim = c(q, n))
Z <- zeros(p, p)
psi diag(psi) <- inverse_gamma(1, 1, dim = p)
distribution(X) <- multivariate_normal(t(W %*% Z), psi)
The BUGS project provide a number of example models written in the BUGS modelling language. These models will run in WinBUGS and OpenBUGS, and likely also in JAGS. The Stan wiki provides Stan implementations of these models.
The following sections provide greta implementations of some of these example models, alongside the BUGS code from WinBUGS examples volume 2 (pdf) and Stan code and an R version of the data from the Stan example models wiki.
Air analyses reported respiratory illness versus exposure to nitrogen dioxide in 103 children. The parameters alpha
, beta
and sigma2
are known in advance, and the data are grouped into three categories.
See WinBUGS examples volume 2 (pdf) for details.
y <- c(21, 20, 15)
n <- c(48, 34, 21)
Z <- c(10, 30, 50)
alpha <- 4.48
beta <- 0.76
sigma2 <- 81.14
sigma <- sqrt(sigma2)
tau <- 1 / sigma2
J <- 3
<- normal(0, 32, dim = 2)
theta <- alpha + beta * Z
mu <- normal(mu, sigma)
X <- ilogit(theta[1] + theta[2] * X)
p distribution(y) <- binomial(n, p)
data {
real alpha;
real beta;
real<lower=0> sigma2;
int<lower=0> J;
array[J] int y;
vector[J] Z;
array[J] int n;
}
transformed data {
real<lower=0> sigma;
sigma = sqrt(sigma2);
}
parameters {
real theta1;
real theta2;
vector[J] X;
}
model {
array[J] real p;
theta1 ~ normal(0, 32); // 32^2 = 1024
theta2 ~ normal(0, 32);
X ~ normal(alpha + beta * Z, sigma);
y ~ binomial_logit(n, theta1 + theta2 * X);
}
Beetles considers dose-response data from an experiment applying carbon disulphide to 8 beetles. The original example compares three different link functions; the logit, probit and complementary log-log. Here, only the code for the logit link is shown. You can implement the other two link functions in greta by changing ilogit
to iprobit
or icloglog
.
See WinBUGS examples volume 2 (pdf) for details.
x <- c(1.6907, 1.7242, 1.7552, 1.7842, 1.8113, 1.8369, 1.8610, 1.8839)
n <- c(59, 60, 62, 56, 63, 59, 62, 60)
r <- c(6, 13, 18, 28, 52, 53, 61, 60)
N <- 8
<- normal(0, 32)
alpha_star <- normal(0, 32)
beta <- ilogit(alpha_star + beta * (x - mean(x)))
p distribution(r) <- binomial(n, p)
<- alpha_star - beta * mean(x)
alpha <- p * n rhat
data {
int<lower=0> N;
array[N] int<lower=0> n;
array[N] int<lower=0> r;
vector[N] x;
}
transformed data {
vector[N] centered_x;
real mean_x;
mean_x = mean(x);
centered_x = x - mean_x;
}
parameters {
real alpha_star;
real beta;
}
transformed parameters {
vector[N] m;
m = alpha_star + beta * centered_x;
}
model {
alpha_star ~ normal(0.0, 1.0E4);
beta ~ normal(0.0, 1.0E4);
r ~ binomial_logit(n, m);
}
generated quantities {
real alpha;
array[N] real p;
array[N] real llike;
array[N] real rhat;
for (i in 1 : N) {
p[i] = inv_logit(m[i]);
llike[i] = r[i] * log(p[i]) + (n[i] - r[i]) * log(1 - p[i]);
rhat[i] = p[i] * n[i]; // fitted values
}
alpha = alpha_star - beta * mean_x;
}
The following few code examples show how Stan code can be translated in equivalent greta models.
Lightspeed estimates a linear normal model without predictors. The data are 66 measurements from Simon Newcomb and represent the time required for light to travel roughly 7500 meters.
See also the Stan examples for details.
Eight schools estimates the effect of coaching programs in eight schools. The data are 8 measurements of coaching effects along with their standard errors.
See also the Stan example for details.
<- inverse_gamma(1, 1)
sigma_eta <- normal(0, sigma_eta, dim=N)
eta
<- normal(0, 100)
mu_theta <- normal(0, 5)
xi <- mu_theta + xi * eta
theta
distribution(y) <- normal(theta, sigma_y)
data {
int<lower=0> N;
vector[N] y;
vector[N] sigma_y;
}
parameters {
vector[N] eta;
real mu_theta;
real<lower=0, upper=100> sigma_eta;
real xi;
}
transformed parameters {
real<lower=0> sigma_theta;
vector[N] theta;
theta = mu_theta + xi * eta;
sigma_theta = fabs(xi) / sigma_eta;
}
model {
mu_theta ~ normal(0, 100);
sigma_eta ~ inv_gamma(1, 1); //prior distribution can be changed to uniform
eta ~ normal(0, sigma_eta);
xi ~ normal(0, 5);
y ~ normal(theta, sigma_y);
}
Here we provide some examples of common ecological models. We begin with a basic logistic regression often used in species distribution modelling to estimate species probability of presence. We then provide increasingly complex species distribution models, beginning with modelling observation error directly, and moving on to models for multiple species: independently but concurrently modelled species, partially pooled coefficients, repeated measures, and sub-models.
A simple logistic regression being to estimate the probability of species presence along a number of environmental gradients.
An example of a simple poisson regression being used to estimate the abundance of a species along a number of environmental gradients.
This is an example of a simple logistic regression with an extra observation-level error term, to model over-dispersion or clustering in occupancy data from multiple visits.
# make fake data
n_env <- 3
n_sites <- 20
n_obs <- 5
# n_sites x n_env matrix of environmental variables
env <- matrix(rnorm(n_sites * n_env), nrow = n_sites)
# n_sites observations of species presence or absence over n_obs visits
occupancy <- rbinom(n_sites, n_obs, 0.5)
<- normal(0, 10)
alpha <- normal(0, 10, dim = n_env)
beta <- normal(0, 10, dim = n_sites)
error
# logit-linear model with extra variation
<- alpha + env %*% beta + error
linear_predictor <- ilogit(linear_predictor)
p
# distribution (likelihood) over observed values
distribution(occupancy) <- binomial(n_obs, p)
An example of a logistic regression being used to estimate the probability of multiple species’ presences along a number of environmental gradients. Although modelled concurrently, the random variables for each species are independent. We first simulate some data to model followed by the greta
code.
Where a single observation per species and location would have a bernoulli error distribution, multiple observations for each species and location have a binomial distribution.
When modelling multiple species (or other grouping factor), we need an extra step in constructing the linear predictor. In order to add multiple greta
arrays together for each species we can use the sweep()
function.
# make fake data
n_species <- 5
n_env <- 3
n_sites <- 20
env <- matrix(rnorm(n_sites * n_env), nrow = n_sites)
occupancy <- matrix(rbinom(n_species * n_sites, 1, 0.5), nrow = n_sites)
<- normal(0, 10, dim = n_species)
alpha <- normal(0, 10, dim = c(n_env, n_species))
beta
<- env %*% beta
env_effect
# add intercepts for all species
<- sweep(env_effect, 2, alpha, FUN = '+')
linear_predictor
# ilogit of linear predictor
<- ilogit(linear_predictor)
p
# a single observation means our data are bernoulli distributed
distribution(occupancy) <- bernoulli(p)
An example of a logistic regression being used to estimate the probability of multiple species’ presences along a number of environmental gradients. Instead of assuming independence of species regression coefficients, we assume they are drawn from a shared distribution. We partially pool species responses. This gives us not ony the regression coefficients for each species but also a global average coefficient and a measure of variation between species responses to environmental gradients.
# make fake data
n_species <- 5
n_env <- 1
n_sites <- 50
env <- matrix(rnorm(n_sites * n_env), nrow = n_sites)
occupancy <- matrix(rbinom(n_sites * n_species, 1, 0.5), nrow = n_sites)
<- normal(0, 10, dim = 1)
global_alpha <- uniform(0, 10, dim = 1)
global_alpha_sd <- normal(global_alpha, global_alpha_sd, dim = n_species)
alpha
<- normal(0, 10, dim = n_env)
global_betas <- uniform(0, 10, dim = n_env)
global_betas_sd <- normal(global_betas, global_betas_sd, dim = c(n_env, n_species))
beta
<- env %*% beta
env_effect
# add intercepts for all species
<- sweep(env_effect, 2, alpha, FUN = '+')
linear_predictor
# ilogit of linear predictor
<- ilogit(linear_predictor)
p
distribution(occupancy) <- bernoulli(p)
An example of a logistic regression being used to estimate the probability of multiple species’ presences along a number of environmental gradients. Instead of assuming independence of species regression coefficients, or partial pooling in shared distributions, we use a sub-model to estimate species regression coefficients. In this case, we’re using species traits to estimate their response to different environmental gradients.
Because we’re building a sub-model, it’s more efficient to simply add a column of ones to dataframes for the base model and sub-model. This is simply to prevent our code from becoming too cumbersome. If we didn’t want to use our sub-model to estimate the intercept, we would not need to include the column of ones in the environmental dataframe.
# make fake data
n_species <- 3
n_env <- 1
n_sites <- 5
n_traits <- 1
# n_sites x n_env matrix of environmental variables
env <- matrix(rnorm(n_sites * n_env), nrow = n_sites)
# n_species * n_traits matix of trait variables
traits <- matrix(rnorm(n_species * n_traits), nrow = n_species)
# n_sites * n_species matrix of observed occupancy
occupancy <- matrix(rbinom(n_sites * n_species, 1, 0.5), nrow = n_sites)
# include a column of 1's for intercept estimation in the sub-model (traits) and base model
<- cbind(rep(1, n_species), traits)
traits <- cbind(rep(1, n_sites), env)
env
# redefine n_env and n_traits after adding columns for intercepts
<- ncol(env)
n_env <- ncol(traits)
n_traits
# sub-model parameters have normal prior distributions
<- normal(0, 10, dim = c(n_env, n_traits))
g # parameters of the base model are a function of the parameters of the sub-model
<- g %*% t(traits)
beta
# use the coefficients to get the model linear predictor
<- env %*% beta
linear_predictor
# use the logit link to get probabilities of occupancy
<- ilogit(linear_predictor)
p
# data are bernoulli distributed
distribution(occupancy) <- bernoulli(p)
Cormack-Jolly-Seber (CJS) models estimate probabilities of survival and recapture from mark-recapture data. These models assume that we can only ever see individuals that have been initially marked and released or recaptured following release (i.e. individuals do not exist until first observed). The two key parameters are survival, \(\phi\), and probability of recapture, \(p\). There is an additional derived parameter, \(\chi\), which is the probability that an individual is not recaptured following its final capture. \(\chi\) marginalises over multiple scenarios in which the individual is not observed either because it has died or because it is alive but not detected.
The introductory book to the program MARK has a lot of information on mark-recapture models, including CJS models (starting in Ch. 1) and the broader class of Jolly-Seber models (Ch. 12). There is also a section on mark-recapture models in the Stan language manual, which goes through the derivation of the parameter \(\chi\).
# data summaries
<- apply(y, 1, function(x) min(which(x > 0)))
first_obs <- apply(y, 1, function(x) max(which(x > 0)))
final_obs <- apply(y, 1, function(x) seq(min(which(x > 0)), max(which(x > 0)), by = 1)[-1])
obs_id <- unlist(obs_id)
obs_id <- apply(y, 1, function(x) x[min(which(x > 0)):max(which(x > 0))][-1])
capture_vec <- unlist(capture_vec)
capture_vec
# priors
<- beta(1, 1, dim = n_time)
phi <- beta(1, 1, dim = n_time)
p
# derived parameter
<- ones(n_time)
chi for (i in seq_len(n_time - 1)) {
<- n_time - i
tn <- (1 - phi[tn]) + phi[tn] * (1 - p[tn + 1]) * chi[tn + 1]
chi[tn]
}
# dummy variables
<- ones(length(obs_id)) # definitely alive
alive_data <- final_obs != 20 # ignore observations in last timestep
not_seen_last <- ones(sum(not_seen_last)) # final observation
final_observation
# set likelihoods
distribution(alive_data) <- bernoulli(phi[obs_id - 1])
distribution(capture_vec) <- bernoulli(p[obs_id])
distribution(final_observation) <- bernoulli(chi[final_obs[not_seen_last]])
model {
# priors
for (t in 1:(n_time - 1)) {
phi[t] ~ dunif(0, 1)
p[t] ~ dunif(0, 1)
}
# likelihood
for (i in 1:n_obs) {
z[i, first_obs[i]] <- 1 # state at first capture must be 1!
for (t in (first_obs[i] + 1):n_time) {
mu1[i, t] <- phi[t - 1] * z[i, t - 1]
z[i, t] ~ dbern(mu1[i, t]) # true state
mu2[i, t] <- p[t - 1] * z[i, t]
y[i, t] ~ dbern(mu2[i, t]) # observed state
}
}
}
/**
* Cormack-Jolly-Seber Model
*
* following section 1.2.1 of:
* http://www.maths.otago.ac.nz/home/resources/theses/PhD_Matthew_Schofield.pdf
*
*/
data {
int<lower=2> K; // capture events
int<lower=0> I; // number of individuals
array[I, K] int<lower=0, upper=1> X; // X[i,k]: individual i captured at k
}
transformed data {
array[I] int<lower=0, upper=K + 1> first; // first[i]: ind i first capture
array[I] int<lower=0, upper=K + 1> last; // last[i]: ind i last capture
array[K] int<lower=0, upper=I> n_captured; // n_capt[k]: num aptured at k
first = rep_array(K + 1, I);
last = rep_array(0, I);
for (i in 1 : I) {
for (k in 1 : K) {
if (X[i, k] == 1) {
if (k < first[i]) {
first[i] = k;
}
if (k > last[i]) {
last[i] = k;
}
}
}
}
n_captured = rep_array(0, K);
for (i in 1 : I) {
for (k in 1 : K) {
n_captured[k] = n_captured[k] + X[i, k];
}
}
}
parameters {
vector<lower=0, upper=1>[K - 1] phi; // phi[k]: Pr[alive at k + 1 | alive at k]
vector<lower=0, upper=1>[K] p; // p[k]: Pr[capture at k]
// note: p[1] not used in model and hence not identified
}
transformed parameters {
vector<lower=0, upper=1>[K] chi; // chi[k]: Pr[no capture > k | alive at k]
{
int k;
chi[K] = 1.0;
k = K - 1;
while (k > 0) {
chi[k] = (1 - phi[k]) + phi[k] * (1 - p[k + 1]) * chi[k + 1];
k = k - 1;
}
}
}
model {
for (i in 1 : I) {
if (last[i] > 0) {
for (k in (first[i] + 1) : last[i]) {
target += log(phi[k - 1]); // i survived from k-1 to k
if (X[i, k] == 1) {
target += log(p[k]);
} // i captured at k
else {
target += log1m(p[k]);
} // i not captured at k
}
target += log(chi[last[i]]); // i not seen after last[i]
}
}
}
generated quantities {
// phi[K-1] and p(K) not identified, but product is
real beta;
vector<lower=0>[K] pop_hat; // population
beta = phi[K - 1] * p[K];
for (k in 1 : K) {
pop_hat[k] = n_captured[k] / p[k];
}
}