Q6.1

<aside> 💡 0,406

</aside>

# create pca model
pca_model <- prcomp(training, center = TRUE, scale = TRUE)

# calculate eigenvalues
eigenvalues = pca_model$sdev^2

# calculate percentage of information integration for PC1 
eigenvalues[1]/sum(eigenvalues)

Q6.2

<aside> 💡 0,128

</aside>

sum(eigenvalues[5:9])/sum(eigenvalues)

Q6.3

<aside> 💡 0,755

</aside>

# required libraries
library(MLmetrics)
# create predictions vector with knn
ypred = knn(training, testing, trainingType, k = 3)

# calculate Accuracy
Accuracy(ypred, testingType)

Q6.4

<aside> 💡 0,800

</aside>

# required libraries
library(MLmetrics)
Recall(testingType, ypred, "2")

Q6.5

<aside> 💡 (b) 6

</aside>

# required libraries
library(MLmetrics)
### code source: thmmy.gr/smf/index.php?topic=62451.msg1284089#msg1284089

# initialize structures
accuracies <- c()
pValues = c(1:9)

# run prediction for different values of p
for (p in pValues) {
	training_pc <- as.data.frame(predict(pca_model, training)[, 1:p])
	testing_pc <- as.data.frame(predict(pca_model, testing)[, 1:p])
	ypred = knn(training_pc, testing_pc, trainingType, k = 3)
	accuracies = c(accuracies, Accuracy(ypred, testingType))
}

# return number of Principal Components that maximizes Accuracy
pValues[which.max(accuracies)]