library(circular) # circular stats, cran.r-project.org/web/packages/circular/circular.pdf
Refs:
Generate some random temporal data:
library(lubridate) # deal with date values
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
N <- 500
events <- as.POSIXct("2011-01-01", tz="GMT") +
days(floor(365*runif(N))) +
hours(floor(24*rnorm(N))) +
minutes(floor(60*runif(N)) ) +
seconds(floor(60*runif(N)))
head(events)
## [1] "2011-11-18 17:47:02 GMT" "2011-08-05 23:22:30 GMT"
## [3] "2011-06-22 16:22:01 GMT" "2011-06-05 14:47:16 GMT"
## [5] "2011-10-18 12:21:06 GMT" "2011-07-26 16:40:32 GMT"
df <- data.frame(datetime = events, eventhour = hour(events))
df$work <- df$eventhour %in% seq(9, 17) # determine if event is in business hours (9am to 17pm)
head(df,4)
## datetime eventhour work
## 1 2011-11-18 17:47:02 17 TRUE
## 2 2011-08-05 23:22:30 23 FALSE
## 3 2011-06-22 16:22:01 16 TRUE
## 4 2011-06-05 14:47:16 14 TRUE
# make our data into circular class from package circular
df$eventhour <- circular(df$eventhour%%24, # convert to 24 hrs
units="hours",
template="clock24")
The package includes a rose diagram plot:
par(mfrow=c(1,2))
rose.diag(df$eventhour, bins = 24, col = "lightblue", main = "Events by Hour (sqrt scale)",
radii.scale = "sqrt", prop=3)
rose.diag(df$eventhour, bin = 24, col = "lightblue", main = "Events by Hour (linear scale)",
prop = 12, radii.scale = "linear", shrink = 1.25, cex = 0.8, ticks=TRUE)
par(mfrow=c(1,1))
We can use a typical plot to show a density:
angles <- seq(0,2*pi,len=100)
data <- dvonmises(circular(angles), mu=circular(pi/10), kappa=2)
plot(angles, data, type="l", col="blue")
And also draw a density in a circular plot:
ff <- function(x) dvonmises(x, mu=circular(pi/10), kappa=20)
curve.circular(ff, col="blue", shrink=1.5, lwd=2)
Or draw an histogram using the rose diagrams together with the sample datapoints:
set.seed(221)
rdata <- rvonmises(75, circular(3*pi/2), 2.5)
rose.diag(rdata, bins=50, prop=1.5, shrink=1)
points(rdata, pch=20, col="red")
The von Mises Distribution is a continuous probability distribution on the circle, it’s the circular analogue of the normal distribution. The function has two parameters, a centrality one, \(\mu\), and a concentration parameter, \(\kappa\) (the higher, the more concentraded is the probability mass over \(\mu\)).
This is its pdf:
\[f(x|\mu,\kappa) = \frac{e^{\kappa \cos(x-\mu)}}{2\pi I_0(\kappa)}\]
where \[I_0(\kappa) = \frac{1}{\pi} \int_0^\pi e^{\kappa \cos(x)} dx\]
The Bessel function \(I_0(\kappa)\) can be computed by R function BesselI(kappa,0)
or approximated like this:
I0 <- function(kappa) besselI(kappa,0)
# Approximates Bessel(x,nu)
# increase iter for better approximation
# cf. functions.wolfram.com/Bessel-TypeFunctions/BesselI/introductions/Bessels/05/
approx.I0 <- function(x, nu=0, iter=20) {
result <- 1
for(k in 1:iter)
result = result + (x/2)^(2*k+nu) / (gamma(k+nu+1)*factorial(k))
result
}
kappa <- 10
approx.I0(kappa,0)
## [1] 2815.717
I0(kappa)
## [1] 2815.717
I.0(kappa) # package 'circular' also has this function
## [1] 2815.717
The next code creates a random sample at mean \(\pi/4\):
set.seed(101)
mu_real <- pi/4
kappa_real <- 15
my_data <- as.numeric(rvonmises(100, circular(mu_real), kappa_real))
Assuming the sample was generated by a von Mises distribution, we can easily compute the MLE estimation which is given by function mle.vonmises
:
estimates <- mle.vonmises(as.circular(my_data))
estimates$mu[[1]]
## [1] 0.8028
estimates$kappa[[1]]
## [1] 13.8538
Let’s plot the estimation vs the real signal:
# plot two von Mises distributions
plot_circular_compare <- function(mu_real, kappa_real, mu_hat, kappa_hat,
shrink=1.75, col="red", add=FALSE) {
ff.real <- function(x) dvonmises(x, mu=circular(mu_real), kappa=kappa_real)
ff.inferred <- function(x) dvonmises(x, mu=circular(mu_hat), kappa=kappa_hat)
curve.circular(ff.real, col="blue", shrink=shrink, ylim=c(-0.75,1.25), lwd=2, add=add)
curve.circular(ff.inferred, col=col, lwd=2, add=T)
legend("topleft",c("real","estimate"), col=c("blue",col), lwd=2, bty = "n")
}
plot_circular_compare(mu_real, kappa_real, estimates$mu[[1]], estimates$kappa[[1]])
The next sections show ways to solve this problem in a Bayesian setting using BUGS.
source("run_model.R") # import 'run.model()' to run bugs
The easier variant is when we know \(\kappa\). The specification for this model in BUGS is:
modelString = "
model {
for (i in 1:n) {
## phi[i] <- -log(exp(kappa * cos(x[i]-mu)) / (2*pi*I0)) + C pdf simplifies to:
phi[i] <- - kappa * cos(x[i]-mu) + log(2*pi*I0) + C
dummy[i] <- 0
dummy[i] ~ dpois( phi[i] )
}
# Priors
mu ~ dunif(0,pi2)
# other values
C <- 1000000 # for the zero's trick
pi <- 3.14159
pi2 <- 2*pi
}
"
Since the von Mises is not a distribution available in BUGS, we implemented its likelihood using the zero’s trick.
With this model, we initialize parameter \(\mu\) to its empirical mean (to help BUGS start with a good value), and run the model:
# initializations
genInitFactory <- function() {
i <- 0
function() {
i <<- i + 1
list(
mu = mean(my_data) # start with the mean estimate
)
}
}
# Everything is ready. Run the model!
run.model(modelString, samples=c("mu"),
data=list(I0 = I0(kappa_real),
x = my_data,
kappa = kappa_real,
n = length(my_data)),
chainLength=5e3, init.func=genInitFactory())
## model is syntactically correct
## data loaded
## model compiled
## Initializing chain 1:
## model is initialized
## 500 updates took 0 s
## monitor set for variable 'mu'
## 5000 updates took 1 s
mu_hat <- samplesStats(c("mu"))[[1]] # get point estimate of distribution's mean
# plot results
plot_circular_compare(mu_real, kappa_real, mu_hat, kappa_real)
Assume we don’t know \(\kappa\) but approach the problem in a mixed way, by feeding the previous model with the empirical estimation of \(\kappa\) based on the available sample. Another way to estimate the concentration parameter \(\kappa\) is to use function A1inv
to find the max-likelihood estimate:
estimate_kappa <- function(data) {
sum_sins <- sum(sin(data))
sum_coss <- sum(cos(data))
A1inv(mean(cos(data - atan2(sum_sins, sum_coss))))
}
So, being a simple extension of the previous model, would be to run it using this estimation of \(\kappa\):
kappa_hat <- estimate_kappa(my_data)
run.model(modelString, samples=c("mu"),
data=list(I0 = I0(kappa_hat),
x = my_data,
kappa = kappa_hat,
n = length(my_data)),
chainLength=5e3, init.func=genInitFactory())
## model is syntactically correct
## data loaded
## model compiled
## Initializing chain 1:
## model is initialized
## 500 updates took 0 s
## monitor set for variable 'mu'
## 5000 updates took 0 s
mu_hat <- samplesStats(c("mu"))[[1]] # get point estimate of distribution's mean
# plot results
plot_circular_compare(mu_real, kappa_real, mu_hat, kappa_hat)
Here we will want to derive both parameters using BUGS.
The following solution is based on the fact that it is not easy to compute the Bessel function \(I_0\) inside BUGS (which should be possible, but it is computationally expensive). So, we give BUGS a set of choices of \(\kappa\) where the respective values \(I_0(\kappa)\) were already computed in R
.
The new model specification changed the log-likelihood. Now, instead of using a fixed \(\kappa\) value, we use a variable kappa_hat
which is computed via the random variable k
following a categorical distribution over all possible pairs \(\kappa, I_0(\kappa)\). These pairs initially start with equal probabilities.
modelString = "
model {
for (i in 1:n) {
phi[i] <- - kappa_hat * cos(x[i]-mu) + log(2*pi*IO_hat) + C
dummy[i] <- 0
dummy[i] ~ dpois( phi[i] )
}
k ~ dcat(p[])
for(j in 1:n_kappas) {
p[j] <- 1/n_kappas # creating vector p = {1/n_kappas,...}
}
kappa_hat <- kappas[k]
IO_hat <- I0s[k]
# Priors
mu ~ dunif(0,pi2)
# other values
C <- 1000000 # for the zero's trick
pi <- 3.14159
pi2 <- 2*pi
}
"
Let’s give BUGS all values of \(\kappa\) from 1 to 40:
n_kappas <- 40
kappas <- 1:n_kappas
I0_s <- sapply(kappas, I0)
# Everything is ready. Run the model!
run.model(modelString, samples=c("mu","kappa_hat"),
data=list(x = my_data,
n = length(my_data),
kappas = kappas,
I0s = I0_s,
n_kappas = n_kappas),
chainLength=5e3)
## model is syntactically correct
## data loaded
## model compiled
## initial values generated, model initialized
## 500 updates took 0 s
## monitor set for variable 'mu'
## monitor set for variable 'kappa_hat'
## 5000 updates took 2 s
mu_hat <- samplesStats(c("mu"))[[1]] # point estimate of distribution's mean
kappa_hat <- samplesStats(c("kappa_hat"))[[1]] # point estimate of distribution's concentration
# plot results
plot_circular_compare(mu_real, kappa_real, mu_hat, kappa_real)
Since we are making a Bayesian inference, we can also plot the uncertainty of these posterior values. First get the sample from the MCMC run:
mus <- samplesSample("mu")
kappas <- samplesSample("kappa_hat")
Then plot a bunch of them:
ff.inferred <- function(x) dvonmises(x, mu=circular(mu_hat), kappa=kappa_hat)
curve.circular(ff.inferred, col="red", shrink=2.25, lwd=2)
for (i in 1:250) { # draw a number of samples
ff.sample <- function(x) dvonmises(x, mu=circular(mus[i]), kappa=kappas[i])
curve.circular(ff.sample, col="grey", lwd=2, add=T)
}
curve.circular(ff.inferred, col="red", lwd=2, add=T) # draw mean estimation
The Bayesian framework can deal with mixtures in the same framework. Let’s see a possible solution for this type of problem. We will present an example with two distributions, but it can be generalized for more complex mixtures. This solution assumes we know both concentration parameters \(\kappa_1, \kappa_2\).
Let’ create some random mix data:
set.seed(101)
mu1_real <- pi/4 # not known
kappa1_real <- 10 # known
mu2_real <- pi # not known
kappa2_real <- 5 # known
my_data1 <- as.numeric(rvonmises(100, circular(mu1_real), kappa1_real))
my_data2 <- as.numeric(rvonmises(100, circular(mu2_real), kappa2_real))
my_mixdata <- c(my_data1, my_data2)
par(mfrow=c(1,2))
rose.diag(my_mixdata, bins=50, prop=2.5, col="lightgreen")
points(as.circular(my_data1), pch=20, col="darkgreen")
points(as.circular(my_data2), pch=20, col="lightgreen")
ff.mix1 <- function(x) dvonmises(x, mu=circular(mu1_real), kappa=kappa1_real)
ff.mix2 <- function(x) dvonmises(x, mu=circular(mu2_real), kappa=kappa2_real)
curve.circular(ff.mix1, xlim=c(-1.5,1.5), col="darkgreen", shrink=1.1, lwd=2)
curve.circular(ff.mix2, col="lightgreen", lwd=2, add=T)
par(mfrow=c(1,1))
This model is based on the BUGS code for mixture of gaussians:
modelString = "
model {
for (i in 1:n) {
phi[i] <- - kappa[i] * cos(x[i]-mu[i]) + log(2*pi*I0[i]) + C
mu[i] <- lambdaMu[G[i]]
kappa[i] <- lambdaKappa[G[i]]
I0[i] <- lambdaI0[G[i]]
G[i] ~ dcat(P[]) # the cluster attributions for each x[i]
dummy[i] <- 0
dummy[i] ~ dpois( phi[i] )
}
P[1:2] ~ ddirch(alpha[]) # dirichlet distribution (here, just 2 clusters)
alpha[1] <- 0.5
alpha[2] <- 0.5
lambdaKappa[1] <- 10
lambdaKappa[2] <- 5
lambdaI0[1] <- 2815.717 # I_0(10)
lambdaI0[2] <- 27.23987 # I_0(5)
lambdaMu[1] ~ dunif(0,pi2)
lambdaMu[2] ~ dunif(0,pi2)
# other values
C <- 1000000 # for the zero's trick
pi <- 3.14159
pi2 <- 2*pi
}
"
The vector G
estimates to which cluster a data point belongs to. If a datapoint \(x\) is assigned to cluster \(i\), then its log-likelihood is given by \(f(x|\mu_i, \kappa_i)\). Vector \(G\) is modeled by a dirichlet distribution, where its parameter, \(\alpha\), must be a simplex (ie, a vector of non negatives values that sum to one).
Let’s run the model and extract the posterior means:
run.model(modelString, samples=c("lambdaMu","G"),
data=list(x = my_mixdata,
n = length(my_mixdata),
G = c(1, rep(NA,length(my_mixdata)-1))), # need to give one value (?)
chainLength=1e4)
## model is syntactically correct
## data loaded
## model compiled
## initial values generated, model initialized
## 1000 updates took 0 s
## monitor set for variable 'lambdaMu'
## monitor set for variable 'G'
## 10000 updates took 7 s
mu_clusters <- samplesStats("lambdaMu")$mean
mu_clusters <- mu_clusters %% (2*pi) # place values inside the [0,2*pi] interval
The next code compares the true mix with the estimated mix:
ff.real1 <- function(x) dvonmises(x, mu=circular(mu1_real), kappa=kappa1_real)
ff.real2 <- function(x) dvonmises(x, mu=circular(mu2_real), kappa=kappa2_real)
ff.real <- function(x) ifelse(ff.real1(x)>ff.real2(x),ff.real1(x),ff.real2(x))
curve.circular(ff.real, col="blue", shrink=2.25, lwd=2)
ff.inferred1 <- function(x) dvonmises(x, mu=circular(mu_clusters[1]), kappa=kappa1_real)
curve.circular(ff.inferred1, col="red", lwd=2, add=T)
ff.inferred2 <- function(x) dvonmises(x, mu=circular(mu_clusters[2]), kappa=kappa2_real)
ff.inferred <- function(x) ifelse(ff.inferred1(x)>ff.inferred2(x),ff.inferred1(x),ff.inferred2(x))
curve.circular(ff.inferred, col="red", lwd=2, add=T)
legend("topleft",c("real","estimate"), col=c("blue","red"), lwd=2, bty = "n")
The MCMC also gives an attribution of each datapoint to each cluster. We can check how precise was this clustering. In the next code snippet, we see that each point has a value that determines how much nearer it is from cluster \(1\) or \(2\). This value can be interpreted (in a logistic regression fashion) as a probability of belong to either cluster. Herein, we will just apply a thresold at the middle value, \(1.5\), to classify each datapoint.
We denote the two real distributions as ‘a’ and ‘b’, and compare them with the estimated cluster id of each sample:
cluster_id <- samplesStats("G")[,1]
cluster_id
## [1] 1.000 1.003 1.002 1.002 1.000 1.000 1.000 1.000 1.000 1.000 1.529
## [12] 1.000 1.000 1.000 1.000 1.000 1.002 1.000 1.000 1.000 1.000 1.000
## [23] 1.000 1.000 1.000 1.000 1.002 1.000 1.002 1.001 1.000 1.000 1.002
## [34] 1.001 1.001 1.000 1.000 1.000 1.002 1.000 1.002 1.000 1.000 1.000
## [45] 1.000 1.000 1.000 1.001 1.000 1.001 1.001 1.000 1.000 1.000 1.000
## [56] 1.000 1.001 1.000 1.000 1.000 1.000 1.000 1.002 1.001 1.002 1.000
## [67] 1.000 1.000 1.059 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.002
## [78] 1.000 1.000 1.001 1.001 1.000 1.000 1.000 1.000 1.000 1.001 1.001
## [89] 1.002 1.000 1.000 1.001 1.002 1.000 1.002 1.000 1.000 1.015 1.000
## [100] 2.000 1.999 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000
## [111] 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000
## [122] 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000
## [133] 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000
## [144] 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 1.984
## [155] 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000
## [166] 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000 2.000
## [177] 2.000 2.000 2.000 2.000 2.000 2.000 2.000 1.976 2.000 2.000 2.000
## [188] 2.000 2.000 2.000 2.000 2.000 1.998 2.000 2.000 2.000 2.000 2.000
## [199] 2.000
estimated_id <- 0+(cluster_id > 1.5) # values are between 1 and 2, we split at the middle
real_id <- c(rep('a',100), rep('b',100))[-1] # remove the first value (was fixed in BUGS)
table(estimated_id, real_id)
## real_id
## estimated_id a b
## 0 98 0
## 1 1 100
The classification error(s) were due to ‘outliers’ of the second distribution, which had a small \(\kappa\) parameter, and so its values are more spread over the unit circle.
Let’s visualize where the error(s) are (the color square gives its real cluster id, and the color cross the estimated id):
mixdata_plot <- my_mixdata[-1]
rose.diag(my_mixdata, bins=50, prop=2.5, col="lightgreen",
control.circle=circle.control(lty=0))
points(as.circular(mixdata_plot[real_id=='a']), col="orange", pch=0, lwd=1.5)
points(as.circular(mixdata_plot[real_id=='b']), col="purple", pch=0, lwd=1.5)
points(as.circular(mixdata_plot[estimated_id==0]), col="orange", pch=3, lwd=1.5)
points(as.circular(mixdata_plot[estimated_id==1]), col="purple", pch=3, lwd=1.5)
We see that the error is at one of the ‘frontiers’ between the influence of both von Mises distributions.
The Wrapped Cauchy Distribution is the circular analogue of the Cauchy function, which s quite useful for heavy tail modeling. It has two parameters, a centrality, \(\mu\), and a concentration parameter, \(\rho\) (the higher, the more concentraded is the probability mass over \(\mu\)).
Its pdf is given by
\[f(\theta | \mu, \rho) = \frac{1}{2 \pi} \frac{1-\rho^2}{1+\rho^2-2\cos(\theta-\mu)}, -\pi \le \theta \lt \pi\]
where \(\mu \in [-\pi,\pi)\) and \(\rho \in [0,1)\)
The next code samples 100 data points from \(f(\cdot | 0, 0.7)\)
mu <- 0
rho <- 0.7
set.seed(101)
rdata <- rwrappedcauchy(100, mu=circular(mu), rho=rho)
plot(rdata, xlim=c(-1,2), col="darkgreen", pch=20)
ff <- function(x) dwrappedcauchy(x, mu=circular(mu), rho=rho)
curve.circular(ff, col="blue", lwd=2, add=T)
Notice how the number of ‘outliers’ increase in this distribution.
The function pdf_cauchy
computes the density just like circular package’s dwrappedcauchy
:
# pre: theta in -pi <= mu < pi
# location mu, -pi <= mu < pi
# concentration rho, rho in [0,1)
pdf_cauchy <- function(theta, mu, rho) {
1/(2*pi) * (1-rho^2) / (1+rho^2-2*rho*cos(theta-mu))
}
mu <- as.circular(pi/4)
rho <- 0.3
xs <- as.circular(seq(0,pi/2,len=7))
dwrappedcauchy(xs, mu, rho)
## [1] 0.2175502 0.2539181 0.2837351 0.2955735 0.2837351 0.2539181 0.2175502
sapply(xs,function (x) pdf_cauchy(x,mu,rho))
## [1] 0.2175502 0.2539181 0.2837351 0.2955735 0.2837351 0.2539181 0.2175502
Now let’s see how to model this function in BUGS.
This pdf is easier to represent in BUGS, we can directly estimate both parameters:
modelString = "
model {
for (i in 1:n) {
phi[i] <- -log( (1-pow(rho,2)) / (2*pi*(1+pow(rho,2)-2*rho*cos(x[i]-mu))) ) + C
dummy[i] <- 0
dummy[i] ~ dpois( phi[i] )
}
# Priors
mu ~ dunif(pi_1,pi)
rho ~ dunif(0,1)
# other values
C <- 1000000 # for the zero's trick
pi <- 3.14159
pi_1 <- -pi
}
"
Notice the weak priors over \(\mu\) and \(\rho\). As usual, if there is some previous information that would help us concentrate the prior mass, the results could be improved.
Now, let’s create a random dataset:
mu <- as.circular(pi/4)
rho <- 0.7
set.seed(121)
n <- 100
my_data <- rwrappedcauchy(n, mu, rho)
rose.diag(my_data, bins=50, prop=1.5, shrink=1)
points(my_data, pch=20, col="darkgreen")
And apply the model to the dataset. Herein, we’ll use the MLE estimates to initialize the values of the parameters to help BUGS (since they are easy to compute with function mle.wrappedcauchy
, but we could initialize to any reasonable value). We will also use these MLE values to compare with the MCMC results.
# pre: x>=0
# return circular value between -pi <= mu < pi
cauchy_process_val <- function(x) {
while(x>pi)
x = x-2*pi
x
}
# the pdf requires values between -pi <= mu < pi
pp_my_data <- sapply(as.numeric(my_data), cauchy_process_val)
# MLE estimates
mle <- mle.wrappedcauchy(my_data)
mu_mle <- as.numeric(mle$mu)
rho_mle <- as.numeric(mle$rho)
# initializations
genInitFactory <- function() {
i <- 0
function() {
i <<- i + 1
list(
mu = mu_mle, # MLE estimates
rho = rho_mle
)
}
}
# Everything is ready. Run the model!
run.model(modelString, samples=c("mu", "rho"),
data=list(x = pp_my_data,
n = length(pp_my_data)),
chainLength=2e4, init.func=genInitFactory())
## model is syntactically correct
## data loaded
## model compiled
## Initializing chain 1:
## model is initialized
## 2000 updates took 0 s
## monitor set for variable 'mu'
## monitor set for variable 'rho'
## 20000 updates took 3 s
samplesStats(c("mu"))
## mean sd MC_error val2.5pc median val97.5pc start sample
## mu 0.7324 0.05387 0.0009973 0.6238 0.7334 0.8348 2001 20000
samplesStats(c("rho"))
## mean sd MC_error val2.5pc median val97.5pc start sample
## rho 0.6986 0.03523 0.0006057 0.6238 0.7003 0.7624 2001 20000
# keep point estimates
mu_hat <- samplesStats(c("mu"))[[1]] # get point estimate of distribution's mean
rho_hat <- samplesStats(c("rho"))[[1]] # get point estimate of distribution's mean
We can now plot the results:
# plot results
plot(my_data, shrink=1.5, col="darkgreen", pch=20)
ff.real <- function(x) dwrappedcauchy(x, mu=circular(mu), rho=rho)
curve.circular(ff.real, col="blue", lwd=2, add=T)
ff.mle <- function(x) dwrappedcauchy(x, mu=circular(mu_mle), rho=rho_mle)
curve.circular(ff.mle, col="orange", lwd=2, add=T)
ff.inferred <- function(x) dwrappedcauchy(x, mu=circular(mu_hat), rho=rho_hat)
curve.circular(ff.inferred, col="red", lwd=2, add=T)
legend("topleft",c("real", "MLE", "MCMC"), col=c("blue","orange", "red"), lwd=2, bty = "n")
Again, we could also check the uncertainty over the mean estimate:
mus <- samplesSample("mu")
rhos <- samplesSample("rho")
ff.inferred <- function(x) dwrappedcauchy(x, mu=circular(mu_hat), rho=rho_hat)
curve.circular(ff.inferred, col="red", shrink=1.5, lwd=2)
for (i in 1:500) { # draw a number of samples
ff.sample <- function(x) dwrappedcauchy(x, mu=circular(mus[i]), rho=rhos[i])
curve.circular(ff.sample, col="grey", lwd=2, add=T)
}
curve.circular(ff.inferred, col="red", lwd=2, add=T) # draw mean estimation
Make a mixture:
set.seed(101)
mu1_real <- pi/4 # not known
rho1_real <- 0.65 # not known
mu2_real <- -pi/2 # not known
rho2_real <- 0.75 # not known
my_data1 <- as.numeric(rwrappedcauchy(100, circular(mu1_real), rho1_real))
my_data2 <- as.numeric(rwrappedcauchy(100, circular(mu2_real), rho2_real))
my_mixdata <- c(my_data1, my_data2)
par(mfrow=c(1,2))
rose.diag(my_mixdata, bins=50, prop=2.5, col="lightgreen")
points(as.circular(my_data1), pch=20, col="darkgreen")
points(as.circular(my_data2), pch=20, col="lightgreen")
ff.mix1 <- function(x) dwrappedcauchy(x, mu=circular(mu1_real), rho=rho1_real)
ff.mix2 <- function(x) dwrappedcauchy(x, mu=circular(mu2_real), rho=rho2_real)
curve.circular(ff.mix1, xlim=c(-1.9,1.1), col="darkgreen", shrink=1.1, lwd=2)
curve.circular(ff.mix2, col="lightgreen", lwd=2, add=T)
The model is very similar to the previous mixture for two von Mises distributions. We just added the concentration parameter \(\rho\) as another parameter for the MCMC to estimate:
modelString = "
model {
# Log-Likelihood function
for (i in 1:n) {
phi[i] <- -log( (1-pow(rho[i],2)) /
(2*pi*(1+pow(rho[i],2)-2*rho[i]*cos(x[i]-mu[i]))) ) + C
mu[i] <- lambdaMu[G[i]]
rho[i] <- lambdaRho[G[i]]
G[i] ~ dcat(P[]) # the cluster attributions for each x[i]
dummy[i] <- 0
dummy[i] ~ dpois( phi[i] )
}
P[1:2] ~ ddirch(alpha[]) # dirichlet distribution (here, just 2 clusters)
alpha[1] <- 0.5
alpha[2] <- 0.5
lambdaMu[1] ~ dunif(pi_1,pi)
lambdaMu[2] ~ dunif(pi_1,pi)
lambdaRho[1] ~ dunif(0,1)
lambdaRho[2] ~ dunif(0,1)
# other values
C <- 1000000 # for the zero's trick
pi <- 3.14159
pi_1 <- -pi
}
"
Now let’s run the model:
pp_my_mixdata <- sapply(as.numeric(my_mixdata), cauchy_process_val)
run.model(modelString, samples=c("lambdaMu","lambdaRho","G"),
data=list(x = my_mixdata,
n = length(my_mixdata),
G = c(1, rep(NA,length(my_mixdata)-1))),
chainLength=1e4)
## model is syntactically correct
## data loaded
## model compiled
## initial values generated, model initialized
## 1000 updates took 0 s
## monitor set for variable 'lambdaMu'
## monitor set for variable 'lambdaRho'
## monitor set for variable 'G'
## 10000 updates took 10 s
mu_clusters <- samplesStats("lambdaMu")$mean
rho_clusters <- samplesStats("lambdaRho")$mean
Let’s compare the true and estimated mixtures:
ff.real1 <- function(x) dwrappedcauchy(x, mu=circular(mu1_real), rho=rho1_real)
ff.real2 <- function(x) dwrappedcauchy(x, mu=circular(mu2_real), rho=rho2_real)
ff.real <- function(x) ifelse(ff.real1(x)>ff.real2(x),ff.real1(x),ff.real2(x))
curve.circular(ff.real, col="blue", shrink=2.25, lwd=2)
ff.inferred1 <- function(x) dwrappedcauchy(x, mu=circular(mu_clusters[1]), rho=rho_clusters[1])
ff.inferred2 <- function(x) dwrappedcauchy(x, mu=circular(mu_clusters[2]), rho=rho_clusters[2])
ff.inferred <- function(x) ifelse(ff.inferred1(x)>ff.inferred2(x),ff.inferred1(x),ff.inferred2(x))
curve.circular(ff.inferred, col="red", lwd=2, add=T)
legend("topleft",c("real","estimate"), col=c("blue","red"), lwd=2, bty = "n")
And, as we did with the von Mises case, let’s check the cluster attribution. Before that, just notice that the classification error must be higher, since there are much more datapoints from one distribution in the density peak area of the other.
cluster_id <- samplesStats("G")[,1]
cluster_id
## [1] 1.025 1.075 1.130 1.050 1.057 1.546 1.082 1.238 1.762 1.034 1.072
## [12] 1.059 1.026 1.632 1.483 1.038 1.038 1.195 1.026 1.076 1.025 1.037
## [23] 1.127 1.034 1.040 1.027 1.145 1.191 1.130 1.249 1.065 1.040 1.036
## [34] 1.055 1.033 1.041 1.040 1.026 1.027 1.331 1.391 1.044 1.081 1.145
## [45] 1.041 1.030 1.029 1.043 1.026 1.026 1.322 1.089 1.064 1.575 1.044
## [56] 1.097 1.226 1.083 1.497 1.041 1.038 1.303 1.026 1.036 1.068 1.029
## [67] 1.067 1.030 1.028 1.041 1.028 1.860 1.032 1.034 1.050 1.025 1.046
## [78] 1.112 1.030 1.091 1.041 1.039 1.027 1.026 1.030 1.078 1.283 1.034
## [89] 1.025 1.135 1.031 1.071 1.033 1.051 1.040 1.132 1.043 1.029 1.028
## [100] 1.947 1.962 1.526 1.942 1.887 1.744 1.951 1.956 1.918 1.959 1.932
## [111] 1.954 1.472 1.950 1.945 1.952 1.948 1.174 1.739 1.706 1.498 1.943
## [122] 1.951 1.347 1.870 1.937 1.281 1.148 1.809 1.946 1.793 1.637 1.888
## [133] 1.817 1.421 1.392 1.237 1.954 1.077 1.834 1.953 1.855 1.960 1.793
## [144] 1.949 1.922 1.069 1.790 1.958 1.181 1.938 1.030 1.717 1.953 1.946
## [155] 1.898 1.375 1.917 1.346 1.944 1.956 1.865 1.948 1.425 1.816 1.953
## [166] 1.836 1.953 1.955 1.652 1.830 1.959 1.205 1.903 1.950 1.959 1.917
## [177] 1.938 1.954 1.835 1.951 1.915 1.807 1.879 1.684 1.958 1.893 1.949
## [188] 1.690 1.915 1.907 1.963 1.766 1.864 1.949 1.338 1.854 1.948 1.914
## [199] 1.026
estimated_id <- 0+(cluster_id > 1.5) # values are between 1 and 2, we split at the middle
real_id <- c(rep('a',100), rep('b',100))[-1] # remove the first value (was fixed in BUGS)
table(estimated_id, real_id)
## real_id
## estimated_id a b
## 0 94 19
## 1 5 81
Let’s visualize where the errors are (again, the color square gives its real cluster id, and the color cross the estimated id):
mixdata_plot <- my_mixdata[-1]
rose.diag(my_mixdata, bins=50, prop=2.5, col="lightgreen",
control.circle=circle.control(lty=0))
points(as.circular(mixdata_plot[real_id=='a']), col="orange", pch=0, lwd=1.5)
points(as.circular(mixdata_plot[real_id=='b']), col="purple", pch=0, lwd=1.5)
points(as.circular(mixdata_plot[estimated_id==0]), col="orange", pch=3, lwd=1.5)
points(as.circular(mixdata_plot[estimated_id==1]), col="purple", pch=3, lwd=1.5)