#Cluster Analysis assessment in proposing a surgical technique for benign prostatic enlargement_ICIMTH 2022 install.packages("ggpubr") install.packages("factoextra") library(ggpubr) library(factoextra) citation() citation("ggpubr") citation("factoextra") citation("dplyr") #import csv from location data <- read.csv(file.choose()) #preview file summary(data) # identify working directory getwd() # Rename the first column colnames(data)[1]<-'SurgMethod' summary(data) #convert to factor data$SurgMethod<- factor(data$SurgMethod) data1<-data[ ,c('SurgMethod','Age','PSA','ProstateVolume','Hbbefore','Nabefore', 'PVRbefore', 'Qmaxbefore', 'IPSSbefore')] summary(data1) data.noclass = subset(data1, select = -c(1)) summary(data.noclass) #Scale the variables library(dplyr) data.noclass_Scale<-data.noclass %>% mutate_if(is.numeric, scale) summary(data.noclass_Scale) #Clustering with K-means in 3 clusters set.seed(100) data.noclass_Scale.cl <- kmeans(data.noclass_Scale, centers=3) #Create a column with the clustering results in our original dataframe data$cluster <- fitted(data.noclass_Scale.cl, method = "classes") #Comparison results table(data$SurgMethod, data$cluster) #visualize k-means clusters fviz_cluster(data.noclass_Scale.cl, data = data.noclass_Scale, palette = c("#2E9FDF", "#00AFBB", "#E7B800"), geom = "point", ellipse.type = "convex", ggtheme = theme_bw() ) #Compute principal component analysis (PCA) to reduce the data into small dimensions for visualization res.pca <- prcomp(data.noclass_Scale, scale = TRUE) # Coordinates of individuals ind.coord <- as.data.frame(get_pca_ind(res.pca)$coord) # Add clusters obtained using the K-means algorithm ind.coord$cluster <- factor(data$cluster) ind.coord$SurgMethod <- data$SurgMethod # Data inspection head(ind.coord$SurgMethod) # Percentage of variance explained by dimensions eigenvalue <- round(get_eigenvalue(res.pca), 1) variance.percent <- eigenvalue$variance.percent head(eigenvalue) #Visualize k-means clusters #Color individuals according to the cluster groups ggscatter( ind.coord, x = "Dim.1", y = "Dim.2", color = "cluster", palette = "npg", ellipse = TRUE, ellipse.type = "convex", shape = "SurgMethod", size = 1.5, legend = "right", ggtheme = theme_bw(), xlab = paste0("Dim 1 (", variance.percent[1], "% )" ), ylab = paste0("Dim 2 (", variance.percent[2], "% )" ) ) + stat_mean(aes(color = cluster), size = 4)