library(circular) # circular stats, cran.r-project.org/web/packages/circular/circular.pdf

Introduction to ploting tools

Refs:

Generate some random temporal data:

library(lubridate)  # deal with date values
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
N <- 500
events <- as.POSIXct("2011-01-01", tz="GMT") + 
              days(floor(365*runif(N)))      + 
              hours(floor(24*rnorm(N)))      +  
              minutes(floor(60*runif(N))   ) +
              seconds(floor(60*runif(N)))
head(events)
## [1] "2011-11-18 17:47:02 GMT" "2011-08-05 23:22:30 GMT"
## [3] "2011-06-22 16:22:01 GMT" "2011-06-05 14:47:16 GMT"
## [5] "2011-10-18 12:21:06 GMT" "2011-07-26 16:40:32 GMT"
df <- data.frame(datetime = events, eventhour = hour(events))
df$work <- df$eventhour %in% seq(9, 17) # determine if event is in business hours (9am to 17pm)
head(df,4)
##              datetime eventhour  work
## 1 2011-11-18 17:47:02        17  TRUE
## 2 2011-08-05 23:22:30        23 FALSE
## 3 2011-06-22 16:22:01        16  TRUE
## 4 2011-06-05 14:47:16        14  TRUE
# make our data into circular class from package circular
df$eventhour <- circular(df$eventhour%%24, # convert to 24 hrs
                         units="hours", 
                         template="clock24")

The package includes a rose diagram plot:

par(mfrow=c(1,2))
rose.diag(df$eventhour, bins = 24, col = "lightblue", main = "Events by Hour (sqrt scale)",
          radii.scale = "sqrt", prop=3)

rose.diag(df$eventhour, bin = 24, col = "lightblue", main = "Events by Hour (linear scale)", 
    prop = 12, radii.scale = "linear", shrink = 1.25, cex = 0.8, ticks=TRUE)

par(mfrow=c(1,1))

We can use a typical plot to show a density:

angles <- seq(0,2*pi,len=100)
data   <- dvonmises(circular(angles), mu=circular(pi/10), kappa=2)
plot(angles, data, type="l", col="blue")

And also draw a density in a circular plot:

ff <- function(x) dvonmises(x, mu=circular(pi/10), kappa=20)
curve.circular(ff, col="blue", shrink=1.5, lwd=2)

Or draw an histogram using the rose diagrams together with the sample datapoints:

set.seed(221)
rdata <- rvonmises(75, circular(3*pi/2), 2.5)
rose.diag(rdata, bins=50, prop=1.5, shrink=1)
points(rdata, pch=20, col="red")

The von Mises Distribution

The von Mises Distribution is a continuous probability distribution on the circle, it’s the circular analogue of the normal distribution. The function has two parameters, a centrality one, \(\mu\), and a concentration parameter, \(\kappa\) (the higher, the more concentraded is the probability mass over \(\mu\)).

This is its pdf:

\[f(x|\mu,\kappa) = \frac{e^{\kappa \cos(x-\mu)}}{2\pi I_0(\kappa)}\]

where \[I_0(\kappa) = \frac{1}{\pi} \int_0^\pi e^{\kappa \cos(x)} dx\]

The Bessel function \(I_0(\kappa)\) can be computed by R function BesselI(kappa,0) or approximated like this:

I0 <- function(kappa) besselI(kappa,0)

# Approximates Bessel(x,nu)
#  increase iter for better approximation
# cf. functions.wolfram.com/Bessel-TypeFunctions/BesselI/introductions/Bessels/05/
approx.I0 <- function(x, nu=0, iter=20) {
  result <- 1
  for(k in 1:iter)
    result = result + (x/2)^(2*k+nu) / (gamma(k+nu+1)*factorial(k))
  result
}

kappa <- 10
approx.I0(kappa,0)
## [1] 2815.717
I0(kappa)
## [1] 2815.717
I.0(kappa)  # package 'circular' also has this function
## [1] 2815.717

The next code creates a random sample at mean \(\pi/4\):

set.seed(101)
mu_real    <- pi/4
kappa_real <- 15
my_data    <- as.numeric(rvonmises(100, circular(mu_real), kappa_real))

Assuming the sample was generated by a von Mises distribution, we can easily compute the MLE estimation which is given by function mle.vonmises:

estimates <- mle.vonmises(as.circular(my_data))
estimates$mu[[1]]
## [1] 0.8028
estimates$kappa[[1]]
## [1] 13.8538

Let’s plot the estimation vs the real signal:

# plot two von Mises distributions
plot_circular_compare <- function(mu_real, kappa_real, mu_hat, kappa_hat, 
                                  shrink=1.75, col="red", add=FALSE) {
  ff.real     <- function(x) dvonmises(x, mu=circular(mu_real), kappa=kappa_real)
  ff.inferred <- function(x) dvonmises(x, mu=circular(mu_hat),  kappa=kappa_hat)
  curve.circular(ff.real,     col="blue", shrink=shrink, ylim=c(-0.75,1.25), lwd=2, add=add)
  curve.circular(ff.inferred, col=col,    lwd=2, add=T)
  legend("topleft",c("real","estimate"), col=c("blue",col), lwd=2, bty = "n")
}

plot_circular_compare(mu_real, kappa_real, estimates$mu[[1]], estimates$kappa[[1]])

The next sections show ways to solve this problem in a Bayesian setting using BUGS.

source("run_model.R") # import 'run.model()' to run bugs

BUGS model I: estimate mean direction with known concentration

The easier variant is when we know \(\kappa\). The specification for this model in BUGS is:

modelString = "
  model {

      for (i in 1:n) {

        ## phi[i] <- -log(exp(kappa * cos(x[i]-mu)) / (2*pi*I0)) + C   pdf simplifies to:
        phi[i] <- - kappa * cos(x[i]-mu) + log(2*pi*I0) + C
        
        dummy[i] <- 0
        dummy[i] ~ dpois( phi[i] )
      }

      # Priors 

      mu    ~ dunif(0,pi2)

      # other values

      C   <- 1000000   # for the zero's trick
      pi  <- 3.14159
      pi2 <- 2*pi
  }
"

Since the von Mises is not a distribution available in BUGS, we implemented its likelihood using the zero’s trick.

With this model, we initialize parameter \(\mu\) to its empirical mean (to help BUGS start with a good value), and run the model:

# initializations
genInitFactory <- function()  {
  i <- 0
  function() {
    i <<- i + 1
    list( 
      mu = mean(my_data) # start with the mean estimate
    ) 
  }
}

# Everything is ready. Run the model!
run.model(modelString, samples=c("mu"), 
          data=list(I0    = I0(kappa_real),
                    x     = my_data,
                    kappa = kappa_real,
                    n     = length(my_data)), 
          chainLength=5e3, init.func=genInitFactory())
## model is syntactically correct
## data loaded
## model compiled
## Initializing chain 1:
## model is initialized
## 500 updates took 0 s
## monitor set for variable 'mu'
## 5000 updates took 1 s
mu_hat <- samplesStats(c("mu"))[[1]]  # get point estimate of distribution's mean

# plot results
plot_circular_compare(mu_real, kappa_real, mu_hat, kappa_real)

BUGS model II: estimate mean direction using a MLE concentration

Assume we don’t know \(\kappa\) but approach the problem in a mixed way, by feeding the previous model with the empirical estimation of \(\kappa\) based on the available sample. Another way to estimate the concentration parameter \(\kappa\) is to use function A1inv to find the max-likelihood estimate:

estimate_kappa <- function(data) {
  sum_sins <- sum(sin(data))
  sum_coss <- sum(cos(data))
  A1inv(mean(cos(data - atan2(sum_sins, sum_coss))))
}

So, being a simple extension of the previous model, would be to run it using this estimation of \(\kappa\):

kappa_hat <- estimate_kappa(my_data)

run.model(modelString, samples=c("mu"), 
          data=list(I0    = I0(kappa_hat),
                    x     = my_data,
                    kappa = kappa_hat,
                    n     = length(my_data)), 
          chainLength=5e3, init.func=genInitFactory())
## model is syntactically correct
## data loaded
## model compiled
## Initializing chain 1:
## model is initialized
## 500 updates took 0 s
## monitor set for variable 'mu'
## 5000 updates took 0 s
mu_hat <- samplesStats(c("mu"))[[1]]  # get point estimate of distribution's mean

# plot results
plot_circular_compare(mu_real, kappa_real, mu_hat, kappa_hat)

BUGS model III: estimate mean direction and concentration

Here we will want to derive both parameters using BUGS.

The following solution is based on the fact that it is not easy to compute the Bessel function \(I_0\) inside BUGS (which should be possible, but it is computationally expensive). So, we give BUGS a set of choices of \(\kappa\) where the respective values \(I_0(\kappa)\) were already computed in R.

The new model specification changed the log-likelihood. Now, instead of using a fixed \(\kappa\) value, we use a variable kappa_hat which is computed via the random variable k following a categorical distribution over all possible pairs \(\kappa, I_0(\kappa)\). These pairs initially start with equal probabilities.

modelString = "
  model {

      for (i in 1:n) {

        phi[i] <- - kappa_hat * cos(x[i]-mu) + log(2*pi*IO_hat) + C
        
        dummy[i] <- 0
        dummy[i] ~ dpois( phi[i] )
      }

      k  ~ dcat(p[])

      for(j in 1:n_kappas) {
        p[j] <- 1/n_kappas         # creating vector p = {1/n_kappas,...}
      }

      kappa_hat <- kappas[k]
      IO_hat    <- I0s[k]

      # Priors 

      mu ~ dunif(0,pi2)

      # other values

      C   <- 1000000   # for the zero's trick
      pi  <- 3.14159
      pi2 <- 2*pi
  }
"

Let’s give BUGS all values of \(\kappa\) from 1 to 40:

n_kappas <- 40
kappas <- 1:n_kappas
I0_s   <- sapply(kappas, I0)

# Everything is ready. Run the model!
run.model(modelString, samples=c("mu","kappa_hat"), 
          data=list(x        = my_data,
                    n        = length(my_data),
                    kappas   = kappas, 
                    I0s      = I0_s, 
                    n_kappas = n_kappas), 
          chainLength=5e3)
## model is syntactically correct
## data loaded
## model compiled
## initial values generated, model initialized
## 500 updates took 0 s
## monitor set for variable 'mu'
## monitor set for variable 'kappa_hat'
## 5000 updates took 2 s
mu_hat    <- samplesStats(c("mu"))[[1]]  # point estimate of distribution's mean
kappa_hat <- samplesStats(c("kappa_hat"))[[1]]  # point estimate of distribution's concentration

# plot results
plot_circular_compare(mu_real, kappa_real, mu_hat, kappa_real)

Since we are making a Bayesian inference, we can also plot the uncertainty of these posterior values. First get the sample from the MCMC run:

mus    <- samplesSample("mu")
kappas <- samplesSample("kappa_hat")

Then plot a bunch of them:

ff.inferred <- function(x) dvonmises(x, mu=circular(mu_hat), kappa=kappa_hat)
curve.circular(ff.inferred, col="red", shrink=2.25, lwd=2) 
  
for (i in 1:250) {                                            # draw a number of samples
  ff.sample <- function(x) dvonmises(x, mu=circular(mus[i]), kappa=kappas[i])
  curve.circular(ff.sample, col="grey", lwd=2, add=T)
}

curve.circular(ff.inferred, col="red", lwd=2, add=T) # draw mean estimation

BUGS model IV: mixture of von Mises with known concentrations

The Bayesian framework can deal with mixtures in the same framework. Let’s see a possible solution for this type of problem. We will present an example with two distributions, but it can be generalized for more complex mixtures. This solution assumes we know both concentration parameters \(\kappa_1, \kappa_2\).

Let’ create some random mix data:

set.seed(101)

mu1_real    <- pi/4  # not known
kappa1_real <- 10    # known

mu2_real    <- pi    # not known
kappa2_real <- 5     # known

my_data1 <- as.numeric(rvonmises(100, circular(mu1_real), kappa1_real))
my_data2 <- as.numeric(rvonmises(100, circular(mu2_real), kappa2_real))

my_mixdata <- c(my_data1, my_data2)

par(mfrow=c(1,2))
rose.diag(my_mixdata, bins=50, prop=2.5, col="lightgreen")
points(as.circular(my_data1), pch=20, col="darkgreen")
points(as.circular(my_data2), pch=20, col="lightgreen")

ff.mix1 <- function(x) dvonmises(x, mu=circular(mu1_real), kappa=kappa1_real)
ff.mix2 <- function(x) dvonmises(x, mu=circular(mu2_real), kappa=kappa2_real)
curve.circular(ff.mix1, xlim=c(-1.5,1.5), col="darkgreen", shrink=1.1, lwd=2)
curve.circular(ff.mix2, col="lightgreen", lwd=2, add=T)

par(mfrow=c(1,1))

This model is based on the BUGS code for mixture of gaussians:

modelString = "
  model {

      for (i in 1:n) {

        phi[i] <- - kappa[i] * cos(x[i]-mu[i]) + log(2*pi*I0[i]) + C

        mu[i]    <- lambdaMu[G[i]]
        kappa[i] <- lambdaKappa[G[i]]
        I0[i]    <- lambdaI0[G[i]]
        
        G[i]   ~  dcat(P[])     # the cluster attributions for each x[i]

        dummy[i] <- 0
        dummy[i] ~ dpois( phi[i] )
      }

      P[1:2] ~ ddirch(alpha[])  # dirichlet distribution (here, just 2 clusters)
      alpha[1] <- 0.5           
      alpha[2] <- 0.5              

      lambdaKappa[1] <- 10
      lambdaKappa[2] <- 5

      lambdaI0[1] <- 2815.717   # I_0(10)
      lambdaI0[2] <- 27.23987   # I_0(5)

      lambdaMu[1] ~ dunif(0,pi2)
      lambdaMu[2] ~ dunif(0,pi2)

      # other values

      C   <- 1000000   # for the zero's trick
      pi  <- 3.14159
      pi2 <- 2*pi
  }
"

The vector G estimates to which cluster a data point belongs to. If a datapoint \(x\) is assigned to cluster \(i\), then its log-likelihood is given by \(f(x|\mu_i, \kappa_i)\). Vector \(G\) is modeled by a dirichlet distribution, where its parameter, \(\alpha\), must be a simplex (ie, a vector of non negatives values that sum to one).

Let’s run the model and extract the posterior means:

run.model(modelString, samples=c("lambdaMu","G"), 
          data=list(x = my_mixdata,
                    n = length(my_mixdata),
                    G = c(1, rep(NA,length(my_mixdata)-1))), # need to give one value (?)
          chainLength=1e4)
## model is syntactically correct
## data loaded
## model compiled
## initial values generated, model initialized
## 1000 updates took 0 s
## monitor set for variable 'lambdaMu'
## monitor set for variable 'G'
## 10000 updates took 7 s
mu_clusters <- samplesStats("lambdaMu")$mean
mu_clusters <- mu_clusters %% (2*pi)  # place values inside the [0,2*pi] interval

The next code compares the true mix with the estimated mix:

ff.real1 <- function(x) dvonmises(x, mu=circular(mu1_real),  kappa=kappa1_real)
ff.real2 <- function(x) dvonmises(x, mu=circular(mu2_real),  kappa=kappa2_real)
ff.real  <- function(x) ifelse(ff.real1(x)>ff.real2(x),ff.real1(x),ff.real2(x))
curve.circular(ff.real, col="blue", shrink=2.25, lwd=2) 

ff.inferred1 <- function(x) dvonmises(x, mu=circular(mu_clusters[1]),  kappa=kappa1_real)
curve.circular(ff.inferred1, col="red", lwd=2, add=T) 
ff.inferred2 <- function(x) dvonmises(x, mu=circular(mu_clusters[2]),  kappa=kappa2_real)
ff.inferred  <- function(x) ifelse(ff.inferred1(x)>ff.inferred2(x),ff.inferred1(x),ff.inferred2(x))
curve.circular(ff.inferred, col="red", lwd=2, add=T) 

legend("topleft",c("real","estimate"), col=c("blue","red"), lwd=2, bty = "n")

The MCMC also gives an attribution of each datapoint to each cluster. We can check how precise was this clustering. In the next code snippet, we see that each point has a value that determines how much nearer it is from cluster \(1\) or \(2\). This value can be interpreted (in a logistic regression fashion) as a probability of belong to either cluster. Herein, we will just apply a thresold at the middle value, \(1.5\), to classify each datapoint.

We denote the two real distributions as ‘a’ and ‘b’, and compare them with the estimated cluster id of each sample:

cluster_id <- samplesStats("G")[,1]
cluster_id
##   [1] 1.000 1.003 1.002 1.002 1.000 1.000 1.000 1.000 1.000 1.000 1.529
##  [12] 1.000 1.000 1.000 1.000 1.000 1.002 1.000 1.000 1.000 1.000 1.000
##  [23] 1.000 1.000 1.000 1.000 1.002 1.000 1.002 1.001 1.000 1.000 1.002
##  [34] 1.001 1.001 1.000 1.000 1.000 1.002 1.000 1.002 1.000 1.000 1.000
##  [45] 1.000 1.000 1.000 1.001 1.000 1.001 1.001 1.000 1.000 1.000 1.000
##  [56] 1.000 1.001 1.000 1.000 1.000 1.000 1.000 1.002 1.001 1.002 1.000
##  [67] 1.000 1.000 1.059 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.002
##  [78] 1.000 1.000 1.001 1.001 1.000 1.000 1.000 1.000 1.000 1.001 1.001
##  [89] 1.002 1.000 1.000 1.001 1.002 1.000 1.002 1.000 1.000 1.015 1.000
## [100] 2.000 1.999 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000
## [111] 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000
## [122] 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000
## [133] 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000
## [144] 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 1.984
## [155] 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000
## [166] 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000
## [177] 2.000 2.000 2.000 2.000 2.000 2.000 2.000 1.976 2.000 2.000 2.000
## [188] 2.000 2.000 2.000 2.000 2.000 1.998 2.000 2.000 2.000 2.000 2.000
## [199] 2.000
estimated_id <- 0+(cluster_id > 1.5) # values are between 1 and 2, we split at the middle

real_id <- c(rep('a',100), rep('b',100))[-1] # remove the first value (was fixed in BUGS)
table(estimated_id, real_id)
##             real_id
## estimated_id   a   b
##            0  98   0
##            1   1 100

The classification error(s) were due to ‘outliers’ of the second distribution, which had a small \(\kappa\) parameter, and so its values are more spread over the unit circle.

Let’s visualize where the error(s) are (the color square gives its real cluster id, and the color cross the estimated id):

mixdata_plot <- my_mixdata[-1]
rose.diag(my_mixdata, bins=50, prop=2.5, col="lightgreen",
          control.circle=circle.control(lty=0))
points(as.circular(mixdata_plot[real_id=='a']), col="orange", pch=0, lwd=1.5)
points(as.circular(mixdata_plot[real_id=='b']), col="purple", pch=0, lwd=1.5)

points(as.circular(mixdata_plot[estimated_id==0]), col="orange", pch=3, lwd=1.5)
points(as.circular(mixdata_plot[estimated_id==1]), col="purple", pch=3, lwd=1.5)

We see that the error is at one of the ‘frontiers’ between the influence of both von Mises distributions.

The Wrapped Cauchy Distribution

The Wrapped Cauchy Distribution is the circular analogue of the Cauchy function, which s quite useful for heavy tail modeling. It has two parameters, a centrality, \(\mu\), and a concentration parameter, \(\rho\) (the higher, the more concentraded is the probability mass over \(\mu\)).

Its pdf is given by

\[f(\theta | \mu, \rho) = \frac{1}{2 \pi} \frac{1-\rho^2}{1+\rho^2-2\cos(\theta-\mu)}, -\pi \le \theta \lt \pi\]

where \(\mu \in [-\pi,\pi)\) and \(\rho \in [0,1)\)

The next code samples 100 data points from \(f(\cdot | 0, 0.7)\)

mu  <- 0
rho <- 0.7

set.seed(101)
rdata <- rwrappedcauchy(100, mu=circular(mu), rho=rho)
plot(rdata, xlim=c(-1,2), col="darkgreen", pch=20)
ff <- function(x) dwrappedcauchy(x, mu=circular(mu), rho=rho)
curve.circular(ff, col="blue", lwd=2, add=T)

Notice how the number of ‘outliers’ increase in this distribution.

The function pdf_cauchy computes the density just like circular package’s dwrappedcauchy:

# pre: theta in -pi <= mu < pi
# location mu, -pi <= mu < pi
# concentration rho, rho in [0,1)
pdf_cauchy <- function(theta, mu, rho) {
  1/(2*pi) * (1-rho^2) / (1+rho^2-2*rho*cos(theta-mu))
}

mu  <- as.circular(pi/4)
rho <- 0.3
xs  <- as.circular(seq(0,pi/2,len=7))

dwrappedcauchy(xs, mu, rho) 
## [1] 0.2175502 0.2539181 0.2837351 0.2955735 0.2837351 0.2539181 0.2175502
sapply(xs,function (x) pdf_cauchy(x,mu,rho))
## [1] 0.2175502 0.2539181 0.2837351 0.2955735 0.2837351 0.2539181 0.2175502

Now let’s see how to model this function in BUGS.

BUGS model I : estimate mean and concentration

This pdf is easier to represent in BUGS, we can directly estimate both parameters:

modelString = "
  model {

      for (i in 1:n) {

        phi[i] <- -log( (1-pow(rho,2)) / (2*pi*(1+pow(rho,2)-2*rho*cos(x[i]-mu))) ) + C

        dummy[i] <- 0
        dummy[i] ~ dpois( phi[i] )
      }

      # Priors 

      mu  ~ dunif(pi_1,pi)
      rho ~ dunif(0,1)

      # other values

      C    <- 1000000   # for the zero's trick
      pi   <- 3.14159
      pi_1 <- -pi
  }
"

Notice the weak priors over \(\mu\) and \(\rho\). As usual, if there is some previous information that would help us concentrate the prior mass, the results could be improved.

Now, let’s create a random dataset:

mu  <- as.circular(pi/4)
rho <- 0.7

set.seed(121)
n <- 100
my_data <- rwrappedcauchy(n, mu, rho)

rose.diag(my_data, bins=50, prop=1.5, shrink=1)
points(my_data, pch=20, col="darkgreen")

And apply the model to the dataset. Herein, we’ll use the MLE estimates to initialize the values of the parameters to help BUGS (since they are easy to compute with function mle.wrappedcauchy, but we could initialize to any reasonable value). We will also use these MLE values to compare with the MCMC results.

# pre: x>=0
# return circular value between -pi <= mu < pi
cauchy_process_val <- function(x) {
  while(x>pi)
    x = x-2*pi
  x
}

# the pdf requires values between -pi <= mu < pi
pp_my_data <- sapply(as.numeric(my_data), cauchy_process_val)

# MLE estimates
mle <- mle.wrappedcauchy(my_data)
mu_mle  <- as.numeric(mle$mu)
rho_mle <- as.numeric(mle$rho)

# initializations
genInitFactory <- function()  {
  i <- 0
  function() {
    i <<- i + 1
    list( 
      mu  = mu_mle,  # MLE estimates
      rho = rho_mle
    ) 
  }
}

# Everything is ready. Run the model!
run.model(modelString, samples=c("mu", "rho"), 
          data=list(x     = pp_my_data,
                    n     = length(pp_my_data)), 
          chainLength=2e4, init.func=genInitFactory())
## model is syntactically correct
## data loaded
## model compiled
## Initializing chain 1:
## model is initialized
## 2000 updates took 0 s
## monitor set for variable 'mu'
## monitor set for variable 'rho'
## 20000 updates took 3 s
samplesStats(c("mu"))
##      mean      sd  MC_error val2.5pc median val97.5pc start sample
## mu 0.7324 0.05387 0.0009973   0.6238 0.7334    0.8348  2001  20000
samplesStats(c("rho"))
##       mean      sd  MC_error val2.5pc median val97.5pc start sample
## rho 0.6986 0.03523 0.0006057   0.6238 0.7003    0.7624  2001  20000
# keep point estimates
mu_hat  <- samplesStats(c("mu"))[[1]]  # get point estimate of distribution's mean
rho_hat <- samplesStats(c("rho"))[[1]]  # get point estimate of distribution's mean

We can now plot the results:

# plot results
plot(my_data, shrink=1.5, col="darkgreen", pch=20)

ff.real <- function(x) dwrappedcauchy(x, mu=circular(mu), rho=rho)
curve.circular(ff.real, col="blue", lwd=2, add=T)

ff.mle <- function(x) dwrappedcauchy(x, mu=circular(mu_mle), rho=rho_mle)
curve.circular(ff.mle, col="orange", lwd=2, add=T)

ff.inferred <- function(x) dwrappedcauchy(x, mu=circular(mu_hat), rho=rho_hat)
curve.circular(ff.inferred, col="red", lwd=2, add=T)
legend("topleft",c("real", "MLE", "MCMC"), col=c("blue","orange", "red"), lwd=2, bty = "n")

Again, we could also check the uncertainty over the mean estimate:

mus  <- samplesSample("mu")
rhos <- samplesSample("rho")

ff.inferred <- function(x) dwrappedcauchy(x, mu=circular(mu_hat), rho=rho_hat)
curve.circular(ff.inferred, col="red", shrink=1.5, lwd=2) 
  
for (i in 1:500) {                                         # draw a number of samples
  ff.sample <- function(x) dwrappedcauchy(x, mu=circular(mus[i]), rho=rhos[i])
  curve.circular(ff.sample, col="grey", lwd=2, add=T)
}

curve.circular(ff.inferred, col="red", lwd=2, add=T) # draw mean estimation

BUGS model II: mixture of wrapped Cauchys

Make a mixture:

set.seed(101)

mu1_real  <- pi/4   # not known
rho1_real <- 0.65   # not known

mu2_real  <- -pi/2  # not known
rho2_real <- 0.75   # not known

my_data1 <- as.numeric(rwrappedcauchy(100, circular(mu1_real), rho1_real))
my_data2 <- as.numeric(rwrappedcauchy(100, circular(mu2_real), rho2_real))

my_mixdata <- c(my_data1, my_data2)

par(mfrow=c(1,2))
rose.diag(my_mixdata, bins=50, prop=2.5, col="lightgreen")
points(as.circular(my_data1), pch=20, col="darkgreen")
points(as.circular(my_data2), pch=20, col="lightgreen")

ff.mix1 <- function(x) dwrappedcauchy(x, mu=circular(mu1_real), rho=rho1_real)
ff.mix2 <- function(x) dwrappedcauchy(x, mu=circular(mu2_real), rho=rho2_real)
curve.circular(ff.mix1, xlim=c(-1.9,1.1), col="darkgreen", shrink=1.1, lwd=2)
curve.circular(ff.mix2, col="lightgreen", lwd=2, add=T)

The model is very similar to the previous mixture for two von Mises distributions. We just added the concentration parameter \(\rho\) as another parameter for the MCMC to estimate:

modelString = "
  model {

      # Log-Likelihood function 

      for (i in 1:n) {

        phi[i] <- -log( (1-pow(rho[i],2)) /
                        (2*pi*(1+pow(rho[i],2)-2*rho[i]*cos(x[i]-mu[i]))) ) + C

        mu[i]  <- lambdaMu[G[i]]
        rho[i] <- lambdaRho[G[i]]

        G[i]   ~  dcat(P[])     # the cluster attributions for each x[i]

        dummy[i] <- 0
        dummy[i] ~ dpois( phi[i] )
      }

      P[1:2] ~ ddirch(alpha[])  # dirichlet distribution (here, just 2 clusters)
      alpha[1] <- 0.5           
      alpha[2] <- 0.5              

      lambdaMu[1] ~ dunif(pi_1,pi)
      lambdaMu[2] ~ dunif(pi_1,pi)

      lambdaRho[1] ~ dunif(0,1)
      lambdaRho[2] ~ dunif(0,1)

      # other values

      C   <- 1000000   # for the zero's trick
      pi  <- 3.14159
      pi_1 <- -pi
  }
"

Now let’s run the model:

pp_my_mixdata <- sapply(as.numeric(my_mixdata), cauchy_process_val)

run.model(modelString, samples=c("lambdaMu","lambdaRho","G"), 
          data=list(x = my_mixdata,
                    n = length(my_mixdata),
                    G = c(1, rep(NA,length(my_mixdata)-1))),
          chainLength=1e4)
## model is syntactically correct
## data loaded
## model compiled
## initial values generated, model initialized
## 1000 updates took 0 s
## monitor set for variable 'lambdaMu'
## monitor set for variable 'lambdaRho'
## monitor set for variable 'G'
## 10000 updates took 10 s
mu_clusters  <- samplesStats("lambdaMu")$mean
rho_clusters <- samplesStats("lambdaRho")$mean

Let’s compare the true and estimated mixtures:

ff.real1 <- function(x) dwrappedcauchy(x, mu=circular(mu1_real),  rho=rho1_real)
ff.real2 <- function(x) dwrappedcauchy(x, mu=circular(mu2_real),  rho=rho2_real)
ff.real  <- function(x) ifelse(ff.real1(x)>ff.real2(x),ff.real1(x),ff.real2(x))
curve.circular(ff.real, col="blue", shrink=2.25, lwd=2) 

ff.inferred1 <- function(x) dwrappedcauchy(x, mu=circular(mu_clusters[1]),  rho=rho_clusters[1])
ff.inferred2 <- function(x) dwrappedcauchy(x, mu=circular(mu_clusters[2]),  rho=rho_clusters[2])
ff.inferred  <- function(x) ifelse(ff.inferred1(x)>ff.inferred2(x),ff.inferred1(x),ff.inferred2(x))
curve.circular(ff.inferred, col="red", lwd=2, add=T) 

legend("topleft",c("real","estimate"), col=c("blue","red"), lwd=2, bty = "n")

And, as we did with the von Mises case, let’s check the cluster attribution. Before that, just notice that the classification error must be higher, since there are much more datapoints from one distribution in the density peak area of the other.

cluster_id <- samplesStats("G")[,1]
cluster_id
##   [1] 1.025 1.075 1.130 1.050 1.057 1.546 1.082 1.238 1.762 1.034 1.072
##  [12] 1.059 1.026 1.632 1.483 1.038 1.038 1.195 1.026 1.076 1.025 1.037
##  [23] 1.127 1.034 1.040 1.027 1.145 1.191 1.130 1.249 1.065 1.040 1.036
##  [34] 1.055 1.033 1.041 1.040 1.026 1.027 1.331 1.391 1.044 1.081 1.145
##  [45] 1.041 1.030 1.029 1.043 1.026 1.026 1.322 1.089 1.064 1.575 1.044
##  [56] 1.097 1.226 1.083 1.497 1.041 1.038 1.303 1.026 1.036 1.068 1.029
##  [67] 1.067 1.030 1.028 1.041 1.028 1.860 1.032 1.034 1.050 1.025 1.046
##  [78] 1.112 1.030 1.091 1.041 1.039 1.027 1.026 1.030 1.078 1.283 1.034
##  [89] 1.025 1.135 1.031 1.071 1.033 1.051 1.040 1.132 1.043 1.029 1.028
## [100] 1.947 1.962 1.526 1.942 1.887 1.744 1.951 1.956 1.918 1.959 1.932
## [111] 1.954 1.472 1.950 1.945 1.952 1.948 1.174 1.739 1.706 1.498 1.943
## [122] 1.951 1.347 1.870 1.937 1.281 1.148 1.809 1.946 1.793 1.637 1.888
## [133] 1.817 1.421 1.392 1.237 1.954 1.077 1.834 1.953 1.855 1.960 1.793
## [144] 1.949 1.922 1.069 1.790 1.958 1.181 1.938 1.030 1.717 1.953 1.946
## [155] 1.898 1.375 1.917 1.346 1.944 1.956 1.865 1.948 1.425 1.816 1.953
## [166] 1.836 1.953 1.955 1.652 1.830 1.959 1.205 1.903 1.950 1.959 1.917
## [177] 1.938 1.954 1.835 1.951 1.915 1.807 1.879 1.684 1.958 1.893 1.949
## [188] 1.690 1.915 1.907 1.963 1.766 1.864 1.949 1.338 1.854 1.948 1.914
## [199] 1.026
estimated_id <- 0+(cluster_id > 1.5) # values are between 1 and 2, we split at the middle

real_id <- c(rep('a',100), rep('b',100))[-1] # remove the first value (was fixed in BUGS)
table(estimated_id, real_id)
##             real_id
## estimated_id  a  b
##            0 94 19
##            1  5 81

Let’s visualize where the errors are (again, the color square gives its real cluster id, and the color cross the estimated id):

mixdata_plot <- my_mixdata[-1]
rose.diag(my_mixdata, bins=50, prop=2.5, col="lightgreen",
          control.circle=circle.control(lty=0))
points(as.circular(mixdata_plot[real_id=='a']), col="orange", pch=0, lwd=1.5)
points(as.circular(mixdata_plot[real_id=='b']), col="purple", pch=0, lwd=1.5)

points(as.circular(mixdata_plot[estimated_id==0]), col="orange", pch=3, lwd=1.5)
points(as.circular(mixdata_plot[estimated_id==1]), col="purple", pch=3, lwd=1.5)