/ #Kalman Filter #Particle Filter

# 1 Libraries

knitr::opts_chunk$set(echo = TRUE) options(scipen=999) library("tidyverse") #ggplot and dplyr library("gridExtra") # combine plots library("knitr") # for pdf library("bnlearn") # ADM library("gRain") # ADM library("entropy") library("HMM") #Hidden Markov Models library("kableExtra") # For table formating library("kernlab") # Gaussian Regression library("mvtnorm") # multi dimensional normal distribution library("caret") # confusion matrix library("e1071") # confusion matrix and accuracy if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") #BiocManager::install("gRain") # The palette with black: cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") set.seed(12345) # 2 Graphical Models ### 2.0.1 D-seperation ## 2.1 1)Show that multiple runs of the hill-climbing algorithm can return non-equivalent Bayesian network (BN) structures. Explain why this happens. Use the Asia dataset which is included in the bnlearn package. To load the data, run data(“asia”). set.seed(12345) asia_data <- bnlearn::asia bayes_net = random.graph(colnames(asia)) hill_climbing_asia_1 <- hc(x=asia_data, start = bayes_net, restart = 20, score = "bde") hill_climbing_asia_1 = cpdag(hill_climbing_asia_1) plot(hill_climbing_asia_1, main="Network Structure with 20 restart") hill_climbing_asia_2 <- hc(x=asia_data, start = bayes_net, restart = 40, score = "bde") hill_climbing_asia_2 = cpdag(hill_climbing_asia_2) plot(hill_climbing_asia_2, main="Network Structure with 40 restart") all.equal(hill_climbing_asia_1,hill_climbing_asia_2) ## [1] "Different number of directed/undirected arcs" ## 2.2 2) Learn a BN from 80 percent of the Asia dataset. The dataset is included in the bnlearn package. To load the data, run data(“asia”). Learn both the structure and the parameters. Use any learning algorithm and settings that you consider appropriate. Use the BN learned to classify the remaining 20 percent of the Asia dataset in two classes: S = yes and S = no. In other words, compute the posterior probability distribution of S for each case and classify it in the most likely class. To do so, you have to use exact or approximate inference with the help of the bnlearn and gRain packages, i.e. you are not allowed to use functions such as predict. Report the confusion matrix, i.e. true/false positives/negatives. Compare your results with those of the true Asia BN, which can be obtained by running dag = model2network(“[A][S][T|A][L|S][B|S][D|B:E][E|T:L][X|E]”). ## Split the data into test and train # number of rows n <- nrow(asia) # sample train 80% id <- sample(1:n, floor(n*0.8)) asia_train <- asia[id, ] asia_test <- asia[-id, ] ## structure learning on the train set (si.hiton.pc see bnlearn doc) asia_st_learn <- mmhc(asia_train, whitelist = NULL, blacklist = NULL, restrict.args = list(cluster = NULL) ) asia_st_learn ## ## Bayesian network learned via Hybrid methods ## ## model: ## [A][S][T][X][L|S][B|S][E|T:L][D|B] ## nodes: 8 ## arcs: 5 ## undirected arcs: 0 ## directed arcs: 5 ## average markov blanket size: 1.50 ## average neighbourhood size: 1.25 ## average branching factor: 0.62 ## ## learning algorithm: Max-Min Hill-Climbing ## constraint-based method: Max-Min Parent Children ## conditional independence test: Mutual Information (disc.) ## score-based method: Hill-Climbing ## score: BIC (disc.) ## alpha threshold: 0.05 ## penalization coefficient: 4.147025 ## tests used in the learning procedure: 254 ## optimized: TRUE # plot the learned structure graphviz.plot(asia_st_learn, layout = "dot", highlight = list("arcs")) # parameter learning asia_parameter_learn <- bn.fit(asia_st_learn, data = asia_train, method = "mle") asia_parameter_learn ## ## Bayesian network parameters ## ## Parameters of node A (multinomial distribution) ## ## Conditional probability table: ## no yes ## 0.991 0.009 ## ## Parameters of node S (multinomial distribution) ## ## Conditional probability table: ## no yes ## 0.49325 0.50675 ## ## Parameters of node T (multinomial distribution) ## ## Conditional probability table: ## no yes ## 0.9925 0.0075 ## ## Parameters of node L (multinomial distribution) ## ## Conditional probability table: ## ## S ## L no yes ## no 0.9868221 0.8865318 ## yes 0.0131779 0.1134682 ## ## Parameters of node B (multinomial distribution) ## ## Conditional probability table: ## ## S ## B no yes ## no 0.7110998 0.2782437 ## yes 0.2889002 0.7217563 ## ## Parameters of node E (multinomial distribution) ## ## Conditional probability table: ## ## , , L = no ## ## T ## E no yes ## no 1 0 ## yes 0 1 ## ## , , L = yes ## ## T ## E no yes ## no 0 0 ## yes 1 1 ## ## ## Parameters of node X (multinomial distribution) ## ## Conditional probability table: ## no yes ## 0.88825 0.11175 ## ## Parameters of node D (multinomial distribution) ## ## Conditional probability table: ## ## B ## D no yes ## no 0.8683274 0.2070831 ## yes 0.1316726 0.7929169 set.seed(12345) pred <- matrix(ncol = 2, nrow = nrow(asia_test)) for (i in 1:nrow(asia_test)) { # excluding S column evidence <- as.factor(asia_test[i, -2]) evA <- ifelse(evidence[1] == 1, "no", "yes") evT <- ifelse(evidence[2] == 1, "no", "yes") evL <- ifelse(evidence[3] == 1, "no", "yes") evB <- ifelse(evidence[4] == 1, "no", "yes") evE <- ifelse(evidence[5] == 1, "no", "yes") evX <- ifelse(evidence[6] == 1, "no", "yes") evD <- ifelse(evidence[7] == 1, "no", "yes") # Approximate inference p(A|X = TRUE,B = TRUE) # bayes_net <- bn.fit(bn_struct, data = data) # dist <- cpdist(bayes_net, nodes="A", evidence=(X=="yes") & (B=="yes")) # prop.table(table(dist)) ## Exact inference p(A|X = TRUE,B = TRUE) # junction_tree <- compile(as.grain(bayes_net)) # my_evid <-setEvidence(junction_tree, nodes=c("X","B"), states=c("yes","yes")) # querygrain(my_evid, nodes=c("A"), type="joint") cpd <- cpdist( fitted = asia_parameter_learn, nodes = "S", evidence = (A == evA) & (T == evT) & (L == evL) & (B == evB) & (E == evE) & (X == evX) & (D == evD) ) pred[i, 1] <- prop.table(table(cpd))[1] pred[i, 2] <- prop.table(table(cpd))[2] } # classify using 0.5 as threshold cls <- matrix(ncol = 1, nrow = nrow(asia_test)) for (i in 1:nrow(pred)) { cls[i,1] <- ifelse(pred[i, 1] > 0.5, "no", "yes") } ## confusion matrix cfm <- table(cls, asia_test$S)

cfm
##
## cls    no yes
##   no  332 128
##   yes 175 354
## Accuracy
acc <- (cfm[1,1] + cfm[2,2])/sum(cfm)

acc
## [1] 0.6936299
## True model
dag <- model2network("[A][S][T|A][L|S][B|S][D|B:E][E|T:L][X|E]")

graphviz.plot(dag, layout = "dot", highlight = list("arcs"))

In learning the parameters maximum-likelihood estimation is used.

## learning parametes from the true model

## parameter learning
asia_pl_true <- bn.fit(dag, data = asia_train, method = "mle")
## Prediction using the true structure and learned parameters

set.seed(12345)

pred_true <- matrix(ncol = 2, nrow = nrow(asia_test))

# lop throught the rows of test dataset
for (i in 1:nrow(asia_test)) {

evidence <- as.factor(asia_test[i, -2])

evA <- ifelse(evidence[1] == 1, "no", "yes")

evT <- ifelse(evidence[2] == 1, "no", "yes")

evL <- ifelse(evidence[3] == 1, "no", "yes")

evB <- ifelse(evidence[4] == 1, "no", "yes")

evE <- ifelse(evidence[5] == 1, "no", "yes")

evX <- ifelse(evidence[6] == 1, "no", "yes")

evD <- ifelse(evidence[7] == 1, "no", "yes")

cpd <- cpdist(
fitted = asia_pl_true, # learned parameters from dag
nodes = "S",
evidence = (A == evA) &
(T == evT) & (L == evL) & (B == evB) &
(E == evE) & (X == evX) & (D == evD)
)

pred_true[i, 1] <- prop.table(table(cpd))[1]

pred_true[i, 2] <- prop.table(table(cpd))[2]
}

# classify using 0.5 as threshold

cls_true <- matrix(ncol = 1, nrow = nrow(asia_test))

for (i in 1:nrow(pred_true)) {
cls_true[i,1] <- ifelse(pred_true[i, 1] > 0.5, "no", "yes")
}

## confusion matrix
cfm_true <- table(cls_true, asia_test$S) cfm_true ## ## cls_true no yes ## no 331 127 ## yes 180 360 ## accuracy acc_true <- (cfm_true[1,1] + cfm_true[2,2])/sum(cfm_true) acc_true ## [1] 0.6923848 ## 2.3 3) In the previous exercise, you classified the variable S given observations for all the rest of the variables. Now, you are asked to classify S given observations only for the so-called Markov blanket of S, i.e. its parents plus its children plus the parents of its children minus S itself. Report again the confusion matrix. markovblanket = mb(asia_parameter_learn, node = "S") cat("The markov blanket is:") ## The markov blanket is: print(markovblanket) ## [1] "L" "B" n <- markovblanket #change to gRain object bnToGrain = as.grain(asia_parameter_learn) junctions = compile(bnToGrain) test=asia_test predictedS = c() for(i in 1 :ncol(test)){ test[,i] = as.character(test[,i]) } for (i in 1: nrow(test)){ st=test[i,markovblanket] evd = setEvidence(junctions,nodes=n,states=st) query = querygrain(evd,nodes = c("S")) if(query$S[1]>query$S[2]) { predictedS[i]= 0 } else { predictedS[i]= 1 } } for(i in 1:length(predictedS) ){ predictedS[i] = ifelse(predictedS[i]== 0,"no","yes") } confusion_matrix = table("predicted"=predictedS,"True" =test$S)

cat("Confusion Matrix:","\n")
## Confusion Matrix:
print(confusion_matrix)
##          True
## predicted  no yes
##       no  331 126
##       yes 181 362
accuracy = sum(diag(confusion_matrix))/ sum(confusion_matrix)

cat("\n")
cat("Accuracy of prediction of network","\n")
## Accuracy of prediction of network
print(accuracy)
## [1] 0.693

## 2.4 4) Repeat the exercise (2) using a naive Bayes classifier, i.e. the predictive variables are independent given the class variable. See p. 380 in Bishop’s book or Wikipedia for more information on the naive Bayes classifier. Model the naive Bayes classifier as a BN. You have to create the BN by hand, i.e. you are not allowed to use the function naive.bayes from the bnlearn package.

predict_from_network <- function(junc_tree, data, obs_variables, pred_variable) {
prediction_fit <- rep(0,NROW(data))
for (i in 1:NROW(data)) {
X <- NULL
for (j in obs_variables) {
X[j] <- if(data[i, j] == "yes") "yes" else "no"
}

# Set evidence in junction tree for observation i
# We have observations of all variables except:
# S: If a person smokes or not
evidence <- setEvidence(object = junc_tree,
nodes = obs_variables,
states = X)

# Do prediction of S from junction tree with the above evidence
prob_dist_fit <- querygrain(object = evidence, nodes = pred_variable)$S prediction_fit[i] <- if(prob_dist_fit["yes"] >= 0.5) "yes" else "no" } return(prediction_fit) } # Convert fit to gRain-object BN.fit_gRain <- as.grain(asia_parameter_learn) BN.fit_true_gRain <- as.grain(asia_pl_true) # Compile BN # Creating a junction tree (Lauritzen-Spiegelhalter algorithm) and establishing clique potentials junc_tree <- compile(BN.fit_gRain) junc_tree_true <- compile(BN.fit_true_gRain) naive_bayes_structure <- model2network("[S][A|S][T|S][L|S][B|S][E|S][X|S][D|S]") # Fit parameters of network to train data BN.fit_naive_bayes <- bn.fit(x = naive_bayes_structure, data = asia_test) plot(naive_bayes_structure, main="Naives Bayes Network Structure") score(naive_bayes_structure, asia_test) ## [1] -2998.545 # Convert fit to gRain-object BN.fit_naive_bayes_grain <- as.grain(BN.fit_naive_bayes) # Generate juncion tree and clique potentials junc_tree_naive_bayes <- compile(BN.fit_naive_bayes_grain) junc_tree_true <- compile(BN.fit_true_gRain) prediction_fit_naive_bayes <- predict_from_network(junc_tree = junc_tree_naive_bayes, data = asia_test, obs_variables = c("A", "T", "L", "B", "E", "X", "D"), pred_variable = c("S")) prediction_fit_true <- predict_from_network(junc_tree = junc_tree_true, data = asia_test, obs_variables = c("A", "T", "L", "B", "E", "X", "D"), pred_variable = c("S")) # Calculate confusion matricies confusion_matrix_naive_bayes <- table(prediction_fit_naive_bayes, asia_test$S)
confusion_matrix_naive_bayes
##
## prediction_fit_naive_bayes  no yes
##                        no  366 183
##                        yes 146 305
confusion_matrix_fit_true <- table(prediction_fit_true, asia_test$S) confusion_matrix_fit_true ## ## prediction_fit_true no yes ## no 331 126 ## yes 181 362 ## 2.5 5) Explain why you obtain the same or different results in the exercises (2-4). Answer: We can see that results of 2 and 3 are the same from the confusion table, this is expected since essentially its the same model. 2 and 4 are different clearly because of the network structure. # 3 Hidden Markov Models The purpose of the lab is to put in practice some of the concepts covered in the lectures. To do so, you are asked to model the behavior of a robot that walks around a ring. The ring is divided into 10 sectors. At any given time point, the robot is in one of the sectors and decides with equal probability to stay in that sector or move to the next sector. You do not have direct observation of the robot.However, the robot is equipped with a tracking device that you can access. The device is not very accurate though, If the robot is in the sector ‘i’, then the device will report that the robot is in the sectors i-2 to i+2 with equal probability ## 3.1 1) Build a hidden Markov model (HMM) for the scenario described above. set.seed(12345) transition_mat <- matrix(data = c(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0.5), nrow = 10, ncol = 10) sensor_mat <- matrix(data = c(0.2, 0.2, 0.2, 0, 0, 0, 0, 0, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0, 0, 0, 0, 0, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0, 0, 0, 0, 0, 0, 0.2, 0.2, 0.2, 0.2, 0.2, 0, 0, 0, 0, 0, 0, 0.2, 0.2, 0.2, 0.2, 0.2, 0, 0, 0, 0, 0, 0, 0.2, 0.2, 0.2, 0.2, 0.2, 0, 0, 0, 0, 0, 0, 0.2, 0.2, 0.2, 0.2, 0.2, 0, 0, 0, 0, 0, 0, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0, 0, 0, 0, 0, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0, 0, 0, 0, 0, 0.2, 0.2, 0.2), nrow = 10, ncol = 10) #States<-1:100 #Symbols<-1:2 # 1=door #transProbs<-matrix(rep(0,length(States)*length(States)), nrow=length(States), ncol=length(States), byrow = TRUE) # for(i in 1:99){ # transProbs[i,i]<-.1 # transProbs[i,i+1]<-.9 # } #emissionProbs<-matrix(rep(0,length(States)*length(Symbols)), nrow=length(States), ncol=length(Symbols), byrow = TRUE) sector_10_model <- initHMM(States = c("1","2","3","4","5","6","7","8","9","10"), Symbols = c("1","2","3","4","5","6","7","8","9","10"), startProbs = rep(0.1, 10), transProbs = transition_mat, emissionProbs = sensor_mat) ## 3.2 2) Simulate the HMM for 100 time steps. set.seed(12345) hmm_100 <- simHMM(sector_10_model, length=100) hmm_100 ##$states
##   [1] "9"  "9"  "9"  "9"  "8"  "7"  "6"  "5"  "5"  "5"  "4"  "3"  "2"  "2"  "2"
##  [16] "2"  "2"  "2"  "2"  "1"  "1"  "1"  "10" "9"  "9"  "8"  "8"  "8"  "7"  "6"
##  [31] "5"  "4"  "3"  "2"  "2"  "2"  "1"  "10" "9"  "8"  "8"  "7"  "7"  "7"  "6"
##  [46] "6"  "6"  "6"  "6"  "5"  "5"  "5"  "4"  "3"  "2"  "2"  "1"  "1"  "1"  "1"
##  [61] "10" "10" "9"  "9"  "9"  "8"  "8"  "7"  "7"  "7"  "7"  "7"  "6"  "6"  "6"
##  [76] "5"  "5"  "5"  "5"  "4"  "4"  "4"  "3"  "3"  "3"  "3"  "2"  "1"  "10" "10"
##  [91] "9"  "9"  "8"  "8"  "8"  "8"  "8"  "8"  "7"  "6"
##
## $observation ## [1] "7" "10" "8" "10" "10" "6" "4" "6" "3" "3" "2" "4" "10" "4" "10" ## [16] "3" "3" "1" "2" "2" "1" "1" "10" "10" "10" "6" "6" "8" "7" "4" ## [31] "7" "2" "3" "10" "3" "3" "10" "10" "9" "6" "10" "9" "7" "9" "8" ## [46] "6" "6" "5" "6" "3" "6" "5" "2" "4" "4" "10" "10" "1" "3" "10" ## [61] "1" "2" "8" "8" "7" "10" "6" "8" "6" "7" "6" "8" "4" "7" "7" ## [76] "4" "4" "6" "4" "6" "4" "5" "2" "4" "3" "3" "4" "3" "10" "10" ## [91] "10" "8" "7" "7" "6" "8" "7" "9" "7" "5" ## 3.3 3) Discard the hidden states from the sample obtained above. Use the remaining observations to compute the filtered and smoothed probability distributions for each of the 100 time points. Compute also the most probable path. ### 3.3.1 Forward Algorithm Goal: to compute $p(z_k, x_{1:k})$ Let $\begin{split} \alpha_k(z_k) = p(z_k, x_{1:k}) = \sum^M_{z_{k-1}=1} p(x_k| z_k, z_{k-1}, x_{1:k-1}) p(z_k|z_{k-1},x_{1:k-1}) p(z_{k-1},x_{1:k-1}) \\ = \sum^M_{z_{k-1}=1} p(x_k| z_k, \cancel{z_{k-1}, x_{1:k-1}}) p(z_k|z_{k-1}, \cancel{x_{1:k-1}}) p(z_{k-1},x_{1:k-1}) ~~ Due~to~Markov~property \\ \alpha_k(z_k) = \sum^M_{z_{k-1}=1} p(x_k| z_k) p(z_k|z_{k-1}) p(z_{k-1},x_{1:k-1}) \\ \alpha_k(z_k) = \sum^M_{z_{k-1}=1} p(x_k| z_k) p(z_k|z_{k-1}) \alpha_{k-1}(z_{k-1}) \\ \alpha_k(z_k) = \sum^M_{z_{k-1}=1} (Emission~probability) (Transition~probability) \alpha_{k-1}(z_{k-1}) \\ \end{split}$ ### 3.3.2 Backward Algorithm Goal: to compute $p(x_{k+1:n}|z_k)$ Let $\begin{split} \beta_k(z_k) = p(x_{k+1:n}|z_k) = \sum^M_{z_{k+1}=1} p(x_{k+2:n}|z_{k+1},z_{k},x_{k+1}) p(x_{k+1}|z_{k+1},z_k) p(z_{k+1},z_{k}) \\ = \sum^M_{z_{k+1}=1} p(x_{k+2:n}|z_{k+1}, \cancel{z_{k},x_{k+1}}) p(x_{k+1}|z_{k+1}, \cancel{z_k}) p(z_{k+1},z_{k}) \\ ~~ Due~to~Markov~property \\ \beta_k(z_k) = \sum^M_{z_{k+1}=1} p(x_{k+2:n}|z_{k+1}) p(x_{k+1}|z_{k+1}) p(z_{k+1},z_{k}) \\ \beta_k(z_k) = \sum^M_{z_{k+1}=1} \beta_{k+1}(z_{k+1}) p(x_{k+1}|z_{k+1}) p(z_{k+1},z_{k}) \\ \beta_k(z_k) = \sum^M_{z_{k+1}=1} \beta_{k+1}(z_{k+1}) (Emission~probability) (Transition~probability)\\ \end{split}$ set.seed(12345) # The library retruns the probabilities logged, we we have to de-log alpha = exp(forward(sector_10_model, hmm_100$observation))
beta = exp(backward(sector_10_model, hmm_100$observation)) # Filtering which is defined as alpha column, divided by its col sum filtered = sweep(alpha, 2, colSums(alpha), FUN="/") # Smoothing smoothing = alpha * beta smoothing = sweep(smoothing, 2, colSums(smoothing), FUN="/") # Path hmm_viterbi = viterbi(sector_10_model, hmm_100$observation)
as.numeric(hmm_viterbi)
##   [1]  8  8  8  8  8  7  6  5  4  3  2  2  2  2  1  1  1  1  1 10  9  9  8  8  8
##  [26]  7  6  6  5  5  5  4  3  2  1  1 10  9  8  8  8  7  7  7  6  5  4  4  4  4
##  [51]  4  3  2  2  2  1  1  1  1  1  1 10  9  8  8  8  7  6  6  6  6  6  5  5  5
##  [76]  4  4  4  4  4  3  3  2  2  2  2  2  1 10  9  8  7  7  7  7  7  7  7  6  5

## 3.4 4) Compute the accuracy of the filtered and smoothed probability distributions, and of the most probable path. That is, compute the percentage of the true hidden states that are guessed by each method.

set.seed(12345)

# finding the max of each column for 100 entries for filtered values
filtered_path <- t(filtered)
filtered_path <- max.col(filtered_path, "first")

# finding the max of each column for 100 entries for smoothened values
smoothing_path <- t(smoothing)
smoothing_path <- max.col(smoothing_path, "first")

# viterbi path
viterbi_path <- as.numeric(hmm_viterbi)

# actual path
actual_state <- as.numeric(hmm_100$states) # Accuracy of filtered path acc_filered <- sum(actual_state == filtered_path)/length(filtered_path) * 100 # Accuracy of smoothened path acc_smooth <- sum(actual_state == smoothing_path)/length(smoothing_path) * 100 # Accuracy of viterbi path viterbi_smooth <- sum(actual_state == viterbi_path)/length(viterbi_path) * 100 cat("Accuracy of filtered, smoothing and viterbi are as follows:", acc_filered, "%,", acc_smooth, "% and ", viterbi_smooth, "%") ## Accuracy of filtered, smoothing and viterbi are as follows: 49 %, 63 % and 28 % ## 3.5 5) Repeat the previous exercise with different simulated samples. In general, the smoothed distributions should be more accurate than the filtered distributions. Why? In general, the smoothed distributions should be more accurate than the most probable paths, too. Why? set.seed(12345) get_hmm_accuracy <- function(N, model) { hmm_N <- simHMM(model, length=N) alpha = exp(forward(model, hmm_N$observation))
beta = exp(backward(model, hmm_N$observation)) filtered = sweep(alpha, 2, colSums(alpha), FUN="/") smoothing = alpha * beta smoothing = sweep(smoothing, 2, colSums(smoothing), FUN="/") hmm_viterbi = viterbi(model, hmm_N$observation)

filtered_path <- t(filtered)
filtered_path <- max.col(filtered_path, "first")

smoothing_path <- t(smoothing)
smoothing_path <- max.col(smoothing_path, "first")

viterbi_path <- as.numeric(hmm_viterbi)
actual_state <- as.numeric(hmm_N$states) acc_filered <- sum(actual_state == filtered_path)/length(filtered_path) * 100 acc_smooth <- sum(actual_state == smoothing_path)/length(smoothing_path) * 100 viterbi_smooth <- sum(actual_state == viterbi_path)/length(viterbi_path) * 100 df <- data.frame(N=N, accuracy_of_filtered = acc_filered, accuracy_of_smoothing = acc_smooth, accuracy_of_viterbi = viterbi_smooth) return(df) } final <- NULL for (i in seq(10, 300, 10)) { temp <- get_hmm_accuracy(i, sector_10_model) final <- rbind(temp, final) } ggplot(final, aes(x = N)) + geom_line(aes(y=accuracy_of_smoothing, color="accuracy_of_smoothing")) + geom_line(aes(y=accuracy_of_filtered, color="accuracy_of_filtered")) + geom_line(aes(y=accuracy_of_viterbi, color="accuracy_of_viterbi")) + ggtitle("Accuracy of Filtering, Smoothing and Viterbi vs. Simulation") + ylab("Accuracy in %") + scale_colour_manual(values = c("#CC79A7", "#000000", "#D55E00")) ## 3.6 6) Is it true that the more observations you have the better you know where the robot is? set.seed(12345) empirical_entropy = apply(filtered, 2, entropy.empirical) plot(empirical_entropy, type= 'l', main = "Entropy of filtered path") ## 3.7 7) Consider any of the samples above of length 100. Compute the probabilities of the hidden states for the time step 101. set.seed(12345) position = transition_mat %*% filtered[,100] rownames(position) = names(filtered[,100]) position ## [,1] ## 1 0.0000000 ## 2 0.0000000 ## 3 0.0000000 ## 4 0.0000000 ## 5 0.1051418 ## 6 0.3419030 ## 7 0.3948582 ## 8 0.1580970 ## 9 0.0000000 ## 10 0.0000000 # 4 State-space models The purpose of the lab is to put in practice some of the concepts covered in the lectures. To do so, you are asked to implement the particle filter for robot localization. For the particle filter algorithm, please check Section 13.3.4 of Bishop’s book and/or the slides for the last lecture on state space models (SSMs). The robot moves along the horizontal axis according to the following SSM: Transition model: $p(z_t|z_{t-1}) = (N(z_t|z_{t-1},1) + N(z_t|z_{t-1} + 1,1) + N(z_t|z_{t-1} + 2,1))/3$ Emission model: $p(x_t|z_t) = (N(x_t|z_t,1) + N(x_t|z_t-1),1) + + N(x_t|z_t+1),1))/3$ Initial model: $p(z_1) = Uniform(0,100)$ ## 4.1 1) Implement the SSM above. Simulate it for T = 100 time steps to obtain $z_{1:100}$ (i.e., states) and $x_{1:100}$ (i.e., observations). Use the observations (i.e., sensor readings) to identify the state (i.e., robot location) via particle filtering. Use 100 particles. Show the particles, the expected location and the true location for the first and last time steps, as well as for two intermediate time steps of your choice. set.seed(12345) gen_data <- function(N, sd_emi){ x_t <- vector(length = N) z_t = vector(length = N) # Simulate initial state z_t[1] = runif(1, min = 0, max = 100) # Simulate remaining states for(i in 2:N){ mean_z_t = sample(c(z_t[i-1], z_t[i-1]+1, z_t[i-1]+2), size = 1, prob = rep(1/3, 3)) z_t[i] = rnorm(1, mean_z_t, sd_emi) } # Simulate observations from states for(i in 1:N){ mean_x_t = sample(c(z_t[i], z_t[i]+1, z_t[i]-1), size = 1, prob = rep(1/3, 3)) x_t[i] = rnorm(1, mean_x_t, sd_emi) } sample_data <- data.frame(observation = x_t, states = z_t, index=1:N) # plot of samples p1 <- ggplot(data=sample_data, aes(x=index)) + geom_line(aes(y=observation, color="Observations")) + geom_line(aes(y=states, color="True location")) + scale_colour_manual(values = c("#000000", "#E69F00", "#56B4E9", "#009E73")) + xlab("Index") + ylab("Location") + ggtitle("Observed and True location") print(p1) # density plots p2 <- ggplot(data=sample_data) + geom_density(aes(x=observation, color="Observations")) + geom_density(aes(x=states, color="True location")) + scale_colour_manual("", breaks = c("Observations", "True location"), values = c("#000000", "#E69F00", "#56B4E9", "#009E73")) + xlab("Index") + ylab("Density") + ggtitle("Density of Observed and True location") print(p2) return(list(z_t=z_t, x_t=x_t)) } #method 1 #$p(x_t|x_{t-1}) = N(x_t|x_{t-1} + 1,1)$//transition distribution #$p(z_t|x_t) = N(z_t|x_t,5)$//emission distribution #$p(x_0) = N(x_0|50,100)$//inital distribution T<-10000 n_par<-100 tra_sd<-1 #R emi_sd<-5 #Q mu_0<-50 Sigma_0<-10 ini_dis<-function(n){ return (rnorm(n,mu_0,Sigma_0)) } tra_dis<-function(zt){ return (rnorm(1,mean=zt+1,sd=tra_sd)) } emi_dis<-function(zt){ return (rnorm(1,mean=zt,sd=emi_sd)) } den_emi_dis<-function(xt,zt){ return (dnorm(xt,mean=zt,sd=emi_sd)) } z<-vector(length=T) x<-vector(length=T) for(t in 1:T){ z[t]<-ifelse(t==1,ini_dis(1),tra_dis(z[t-1])) x[t]<-emi_dis(z[t]) } err<-vector(length=T) bel<-ini_dis(n_par) w<-rep(1/n_par,n_par) for(t in 2:T){ com<-sample(1:n_par,n_par,replace=TRUE,prob=w) bel<-sapply(bel[com],tra_dis) for(i in 1:n_par){ w[i]<-den_emi_dis(x[t],bel[i]) } w<-w/sum(w) Ezt<-sum(w * bel) err[t]<-abs(z[t]-Ezt) } mean(err[2:T]) sd(err[2:T]) ## didnt work # x<-vector(length=T) # z<-vector(length=T) # err<-vector(length=T) # # for(t in 1:T){ # x[t]<-ifelse(t==1,rnorm(1,mu_0,Sigma_0),x[t-1]+1+rnorm(1,0,R)) # z[t]<-x[t]+rnorm(1,0,Q) # } # # mu<-mu_0 # Sigma<-Sigma_0*Sigma_0 # for(t in 2:T){ # pre_mu<-mu+1 # pre_Sigma<-Sigma+R*R # K<-pre_Sigma/(pre_Sigma+Q*Q) # mu<-pre_mu+K*(z[t]-pre_mu) # Sigma<-(1-K)*pre_Sigma # # err[t]<-abs(x[t]-mu) # } # # mean(err[2:T]) # sd(err[2:T]) # method 2 my_particle_filter <- function(M, Times, obs, sd_tra, sd_emi, ignore_weight) { temp_particles <- NULL weights <- NULL # Create matrix that will contain all particles for each time step particles <- matrix(NA, nrow = Times, ncol = M) # Generate initial M particles, Uniform(0, 100) particles[1, ] <- runif(n = M, min = 0, max = 100) for (t in 2:Times) { for (m in 1:M) { # transiton matrix selection <- sample(1:3, prob = c(1 / 3, 1 / 3, 1 / 3), size = 1) mean_tra <- c(particles[t - 1, m], particles[t - 1, m] + 1, particles[t - 1, m] + 2) temp_particles[m] <- rnorm(n=1, mean = mean_tra[selection], sd = sd_tra) # emission matrix weights[m] <- mean(dnorm(x=obs[t], mean = temp_particles[m]-1, sd = sd_emi), dnorm(x=obs[t], mean = temp_particles[m], sd = sd_emi), dnorm(x=obs[t], mean = temp_particles[m]+1, sd = sd_emi)) } weights = weights/sum(weights) if(ignore_weight == TRUE){ particles[t, ] <- sample(x = temp_particles, size = M, replace = TRUE) }else{ particles[t, ] <- sample(x = temp_particles, size = M, replace = TRUE, prob = weights)} } return(particles) } set.seed(12345) # generate data sample_data <- gen_data(N=100, sd_emi=1) # particle filter particles <- my_particle_filter(M=100, Times = 100, obs=sample_data$x_t,
sd_tra=1, sd_emi=1, ignore_weight = FALSE)

location <- data.frame(true_location=sample_data$z_t, observed_location=sample_data$x_t,
estimated_location = rowMeans(particles, na.rm = TRUE),
index = 1:NROW(particles)) %>% as.data.frame()

# plot function
plot_location <- function(step, df){
plot1 <- ggplot(data=df, aes(x=index)) +
geom_line(aes(y=observed_location, color="Observed Location")) +
geom_line(aes(y=estimated_location, color="Estimated Location")) +
geom_vline(aes(xintercept=step, color="step")) +
xlab("Index") +
ylab("Location") +
scale_colour_manual(values = c("#000000", "#E69F00", "#56B4E9", "#009E73")) +
ggtitle(paste0("Observed and Estimated Location for its ", step, " step"))

df <- df[step,]
result <- data.frame(step = step,
true_location = df$true_location, observed_location = df$observed_location,
estimated_location = df$estimated_location) print(plot1) kable(result, "latex", booktabs = T) %>% kable_styling(latex_options = "striped") } plot_location(step=1, df=location) plot_location(step=100, df=location) plot_location(step=50, df=location) plot_location(step=75, df=location) ## 4.2 2) Repeat the exercise above replacing the standard deviation of the emission model with 5 and then with 50. Comment on how this affects the results. set.seed(12345) # generate data sample_data <- gen_data(N=100, sd_emi=5) # with standard deviation of the emission model of 5 particles_sd_emis_5 <- my_particle_filter(M=100, Times = 100, obs=sample_data$x_t,
sd_tra=1, sd_emi=5, ignore_weight = FALSE)

location_sd_5 <- data.frame(true_location=sample_data$z_t, observed_location=sample_data$x_t,
estimated_location = rowMeans(particles_sd_emis_5,
na.rm = TRUE),
index = 1:NROW(particles_sd_emis_5)) %>% as.data.frame()

plot_location(step=100, df=location_sd_5)
plot_location(step=50, df=location_sd_5)

## 4.3 3) Finally, show and explain what happens when the weights in the particle filter are always equal to 1, i.e. there is no correction.

set.seed(12345)
# generate data
sample_data <- gen_data(N=100, sd_emi=1)

# with standard deviation of the emission model of 50
particles_no_weight <- my_particle_filter(M=100, Times = 100, obs=sample_data$x_t, sd_tra=1, sd_emi=1, ignore_weight = TRUE) df_no_weight <- data.frame(true_location=sample_data$z_t,
sd <- sqrt(res$variance) df <- data.frame(XStar=XStar, mu = mu, upper_band = mu + 1.96 * sd, lower_band = mu - 1.96 * sd) df2 <- data.frame(x=x,y=y) plot1 <- ggplot(data=df, aes(x=XStar)) + geom_line(aes(y=mu, color="mean")) + geom_line(aes(y=upper_band, color="upper_band")) + geom_line(aes(y=lower_band, color="lower_band")) + geom_point(data=df2, aes(x=x, y=y, color="Observation")) + ggtitle("Posterior mean with bands and observations") + ylab("Posterior/Observation") + xlab("Xstar") + scale_colour_manual(values = c("#E69F00", "#56B4E9", "#000000", "#E69F00")) print(plot1) } ## 5.2 2) Now, let the prior hyperparameters be $\sigma_f=1$ and $l=0.3$. Update this prior with a single observation: $(x,y)=(0.4,0.719)$. Assume that $\sigma_n=0.1$. Plot the posterior mean of $f$ over the interval $x \in [-1,1]$. Plot also 95 percent probability (pointwise) bands for $f$. set.seed(111) XStar = seq(-1, 1, 0.05) # Set hyperparameters sigma_f <- 1 l <- 0.3 # Set standard deviation of measurement sigma_n <- 0.1 # Measurements observations <- data.frame(x = c(0.4), y = c(0.719)) # Prior nsim <- 6 func_val <- t(rmvnorm(nsim, rep(0, length(XStar)), squared_exponential(XStar,XStar,1,0.3))) func_val <- as.data.frame(func_val) func_val$x <- XStar
func_val <- reshape2::melt(func_val,id="x")
ggplot(func_val,aes(x=x,y=value)) + geom_line(aes(group=variable),
colour="#696969",alpha = 1) +
ylab("Function values") + xlab("Input vector") +
ggtitle("6 sample from GP prior")

# Get posterior mean and variance of f
res <- posterior_GP(x = observations$x, y = observations$y, x_star = XStar,
sigma_f = sigma_f, l = l, kernel = squared_exponential,
sigma_n = sigma_n)

plot_gaussian(x=observations$x, y=observations$y, XStar, res)

## 5.3 3) Compute the posterior distribution of $f$ using all the five data points in the table below (note that the two previous observations are included in the table). Plot the posterior mean of $f$ over the interval $x \in [-1,1]$. Plot also 95 % probability (pointwise) bands for $f$.

x y
-1.0 0.768
-0.6 -0.044
-0.2 -0.940
0.4 0.719
0.8 -0.664
set.seed(111)
x4 = c(-1,-0.6,-0.2,0.4,0.8)
y4 = c(0.768, -0.044, -0.940, 0.719, -0.664)

# Measurements
observations <- data.frame(x = x4, y = y4)

# Set hyperparameters
sigma_f <- 1
l <- 0.3
# Set standard deviation of measurement
sigma_n <- 0.1

# Get posterior mean and variance of f
res <- posterior_GP(x = observations$x, y = observations$y, x_star = XStar,
sigma_f = sigma_f, l = l, kernel = squared_exponential,
sigma_n = sigma_n)

plot_gaussian(x=observations$x, y=observations$y, XStar, res)

## 5.4 4) Repeat (3), this time with hyperparameters $\sigma_f = 1$ and $l=1$. Compare the results.

set.seed(111)

# Set hyperparameters
sigma_f <- 1
l <- 1
# Set standard deviation of measurement
sigma_n <- 0.1

# Get posterior mean and variance of f
res <- posterior_GP(x = observations$x, y = observations$y, x_star = XStar,
sigma_f = sigma_f, l = l, kernel = squared_exponential,
sigma_n = sigma_n)

plot_gaussian(x=observations$x, y=observations$y, XStar, res)

# 6 GP Regression with kernlab

In this exercise, you will work with the daily mean temperature in Stockholm (Tullinge) during the period January 1, 2010 - December 31, 2015. We have removed the leap year day February 29, 2012 to make things simpler.

Create the variable $time$ which records the day number since the start of the dataset (i.e., time= 1, 2,…., 365