Set up

rm(list=ls())

cases <- c(0,1,2,3)
households <- c(29,9,2,2)
mat <- matrix(NA, nrow=4, 2)
mat[,1] <- cases
mat[,2] <- households
mat <- as.data.frame(mat)
colnames(mat) <- c("No.of.Cases", "No.of.Households")
mat

##   No.of.Cases No.of.Households
## 1           0               29
## 2           1                9
## 3           2                2
## 4           3                2

# this is a function to compute the final size P(T=k|Y=y) = probability
# that k-y susceptibles become infected with $y$ initial infetctives
# and n-y susceptibles

# m initial susceptibles
# a initial infectives
# alpha infection rate
# the length of the fixed infectious period
compute.conditional.prob <- function(m, j, a, alpha, c){
  if (j==0) {
    res <- (exp(-alpha*m*c))^a
  }
  else {

    part.one <- exp(-alpha*(m-j)*c)^(j+a)
    part.two <- choose(m, j)
    
    sum <- 0;
    for (k in 0:(j-1)) {      
      sum <- sum + choose(m-k, j-k)*compute.conditional.prob(m,k,a,alpha,c)/(exp(-alpha*(m-j)*c)^(k+a))
    }
    res <- part.one*(part.two - sum)
  }
  return(res)
}

# We wish to compute P(T = k), for k = 0, ..., n
# "n" is the size of the household
# "alpha" is the person to person infection rate
# "c" is the length of the (fixed) infectious period
compute.marginal.prob <- function(k, n, c, alpha, p) {  
  prob <- 0;  
  for (y in 0:n) {
    if (k >= y) {
      cond.prob <- compute.conditional.prob(n - y, k - y, y, alpha, c)
    }
    else {
      cond.prob <- 0
    }
    prob <- prob + cond.prob*dbinom(y, n, 1 - p)
  }
  prob
}

The function below simulates data from the model

sim.final.size.data <- function(alpha, p) {
  
  # compute final size probabilities
  p1 <- compute.marginal.prob(0,3,1,alpha, p)
  p2 <- compute.marginal.prob(1,3,1,alpha, p)
  p3 <- compute.marginal.prob(2,3,1,alpha, p)
  p4 <- compute.marginal.prob(3,3,1,alpha, p)

  # put them in a vector
  final.size.prob.vector <- c(p1, p2, p3, p4)
  
  # draw from a multinomial distribution
  sim.data <- rmultinom(1, size = 42, final.size.prob.vector)
  return(sim.data)
  
}

Exercises

Exercise 1

Assume that a priori \[p \sim \mbox{Beta}(1, 1) \qquad \mbox{ and } \qquad \alpha \sim \mbox{Gamma}(1, 1).\]

ER2 <- function(n.samples, y=c(29,9, 2, 2), mu.p = 1, nu.p = 1, mu.alpha = 1, nu.alpha = 0.001) {
  
  res <- matrix(NA, n.samples, ncol=2)
  n.samples.accepted <- 0
  
  while (n.samples.accepted < n.samples) {
    
    alpha <- rgamma(1, mu.alpha, nu.alpha)
    p <- rbeta(1, mu.p, nu.p)
    
    sim.data <- as.vector(sim.final.size.data(alpha, p))
    
    # exact match
    if (sum(sim.data == y)==length(y)) {
      n.samples.accepted <- n.samples.accepted + 1
      res[n.samples.accepted, ] <- c(alpha, p)
    }
  }
  return(res)
}

Let us run the sampler to obtain 1,000 samples

res.EBC <- ER2(1000, nu.alpha = 1)

hist(res.EBC[,1], prob=TRUE, main="alpha")

hist(res.EBC[,2], prob=TRUE, main="p")

Exercise 2

Assuming the same priors as above, write an algorithm to draw samples from an approximation to the posterior distribution. In other words, write some code to implemnt an ABC algorithm.

ABC <- function(n.samples, h, y=c(29,9, 2, 2), mu.p = 1, nu.p = 1, mu.alpha = 1, nu.alpha = 0.001) {
  
  res <- matrix(NA, n.samples, ncol=2)
  n.samples.accepted <- 0
  
  while (n.samples.accepted < n.samples) {
    
    alpha <- rgamma(1, mu.alpha, nu.alpha)
    p <- rbeta(1, mu.p, nu.p)
#print(c(alpha, p))
    sim.data <- as.vector(sim.final.size.data(alpha, p))
    
    # as long as there is only a difference of (at most) 1 in each category
    if (abs(sim.data[1]-y[1]) <= h && abs(sim.data[2]-y[2]) <= h &&  abs(sim.data[3]-y[3]) <= h && abs(sim.data[4]-y[4]) <= h) {
      n.samples.accepted <- n.samples.accepted + 1
      res[n.samples.accepted, ] <- c(alpha, p)
    }
  }
  return(res)
}

Let us run the sampler to obtain 1,000 samples

res.ABC <- ABC(n.samples = 1000, h=1, nu.alpha = 1)

hist(res.ABC[,1], prob=TRUE, main="alpha")

hist(res.ABC[,2], prob=TRUE, main="p")

Exercise 3

Let us draw samples from the true posterior using our MCMC sampler from Lab Session 4

# this is a function to compute the loglikelihood for any pair of alpha, p for the given data in the hand out;
loglik <- function(alpha, p) {
  
  29*log(compute.marginal.prob(0,3,1,alpha, p)) +  
    9*log(compute.marginal.prob(1,3,1,alpha, p)) +  
    2*log(compute.marginal.prob(2,3,1,alpha, p)) +  
    2*log(compute.marginal.prob(3,3,1,alpha, p)) 

}

# this is an MCMC algorithm for Asian-Data household model
# iter: number of MCMC iteration
# sigma: the standard deviation for the random walk Metropolis algorithm

mcmc.hh <- function(iter, sigma, mu.p = 1, nu.p = 1, mu.alpha = 1, nu.alpha = 0.001) {


  # initial values for the Markov Chain
  alpha.cur <- 0.5;
  p.cur <- 0.8;
  
  # create a matrix to store the output.
  res <- matrix(NA, nrow = iter, ncol=2);
  res[1,] <- c(alpha.cur, p.cur);


  for (i in 2:iter) {

    
    ##############
    # update alpha
    ##############

    # propose new value
    alpha.can <- rnorm(1, alpha.cur, sigma)

    # alpha is strictly positive, therefore any proposed value which is negative is automatically rejected.
    if (alpha.can > 0.0) {

      # otherwise, if it is positive the accept it with some probability:
    
      # compute the log-densities
      log.pi.cur <- loglik(alpha.cur, p.cur) + dgamma(alpha.cur, mu.alpha, nu.alpha, log=TRUE)
      log.pi.can <- loglik(alpha.can, p.cur) + dgamma(alpha.can, mu.alpha, nu.alpha, log=TRUE)

      # log-qratio is zero is we are doing a random Walk Metropolis
      log.q.ratio <- 0;

      u <- runif(1)
      if (log(u) < log.pi.can - log.pi.cur) {
        alpha.cur <- alpha.can
      }
    }

    ############
    # propose p
    ############
    p.can <- runif(1, 0, 1)

    
    # compute the log-densities
    log.pi.cur <- loglik(alpha.cur, p.cur) + log(dbeta(p.cur, mu.p, nu.p))
    log.pi.can <- loglik(alpha.cur, p.can) + log(dbeta(p.can, mu.p, nu.p))
    
    # log-qratio is zero, why? Because the density of U(0,1) is 1!
    log.q.ratio <- log(dunif(p.cur, 0, 1)) - log(dunif(p.can, 0, 1))
    
    u <- runif(1)
    if (log(u) < log.pi.can - log.pi.cur) {
      p.cur <- p.can
    }

    # store the output
    res[i,] <- c(alpha.cur, p.cur)

  }
    
    
  res
                                                                                                                              
}

Let us compare EBC vs MCMC. Since we are going for an exact match, then the two algorithms should be drawing from the same distribution (assuming the same priors)

res.MCMC <- mcmc.hh(10000, 0.1, nu.alpha = 1)
res.MCMC <- res.MCMC[-c(1:1000),] # burn in

par(mfrow=c(1,2))

plot(res.MCMC[,1],type='l')
plot(res.MCMC[,2],type='l')

hist(res.MCMC[,1],prob=TRUE, main="alpha: EBC vs MCMC")
lines(density(res.EBC[,1]),col=2)

hist(res.MCMC[,2],prob=TRUE, main="beta: EBC vs MCMC")
lines(density(res.EBC[,2]))

par(mfrow=c(1,2))

hist(res.MCMC[,1],prob=TRUE, main="alpha: ABC vs MCMC")
lines(density(res.ABC[,1]),col=2)

hist(res.MCMC[,2],prob=TRUE, main="beta: ABC vs MCMC")
lines(density(res.ABC[,2]))

Exercise 4

Use your ER2 and ABC algorithm in Exercises (2 and 3) assuming a vague prior for \(\alpha\), e.g. \(\alpha \sim Ga(1,0.001)\) and comment on the efficiency of both algorithms.

You can try this, but it will take a while especially for large samples!

res.EBC <- ER2(100, nu.alpha = 0.001)
hist(res.EBC[,1], prob=TRUE, main="alpha")
hist(res.EBC[,2], prob=TRUE, main="p")