Published

30 Mar 2026

Code
##################
# LOAD LIBRARIES #
##################
library(tidyverse)
library(mixOmics)
library(readxl)
library(cowplot)
library(RColorBrewer)
library(scales)

select <- dplyr::select
map <- purrr::map

# setwd("/Users/vilkal/work/Brolidens_work/Projects/Candida-omics/src")

feat <- c("#e39ddaff", "#6dded3ff", "#a68cd5ff") #,"#6095d7","#755fa3","#c44999")
Code
pal <- list(
  CST = c(
    "I" = "#148F77",
    "II" = "#6d9f06",
    "III" = "#A3E4D7",
    "IV-B" = "#E794C1",
    "IV-C" = "#b5cb8c",
    "V" = "#43BA8F"
  ),
  subCST = c(
    "#a5c97b",
    "#cc93cf",
    "#de9548",
    "#63d3b4",
    "#9e8dec",
    "#902267",
    '#FB5273',
    "#91c5de"
  ),
  RVVC = c("#EAEAEA", "#fd814e"),
  BV = c("#EAEAEA", "#fd814e"),
  NAC = c("#EAEAEA", "#fd814e"),
  ST = c("#EAEAEA", "#fd814e"),
  missing_page = c("#EAEAEA", "#fd814e"),
  Clin = c("#a5c97b", "#cc93cf", "#de9548", "#63d3b4", "#9e8dec", "#902267"),
  Clin_gr = c("#ffa998", '#FB5273', "#902267"),
  RVVC_AS = c("#ffa998", '#FB5273', "#902267"),
  RVVC_pos = c(
    "#A6CEE3",
    "#1F78B4",
    #"#EAEAEA",
    #"gray",
    "#e490c4ff",
    "#c12d88ff"
  ), # "#FB9A99", "#E31A1C", "#B2DF8A", "#609c5bff", "#FDBF6F", "#FF7F00",, "#CAB2D6", "#6a3d9a" "#e490c4ff", "#c12d88ff"
  trx_louvain_7 = c(
    "#de9548",
    "#cc93cf",
    "#a5c97b",
    "#9e8dec",
    "#902267",
    "#63d3b4",
    '#FB5273'
  ),
  metab_louvain = c(
    "#cc93cf",
    "#a5c97b",
    "#de9548",
    "#902267",
    "#9e8dec",
    "#63d3b4"
    #'#FB5273'
  ),
  microb_louvain = c(
    "#cc93cf",
    "#de9548",
    "#a5c97b",
    "#9e8dec",
    "#902267",
    "#63d3b4",
    '#FB5273'
  ),
  fun_louvain = c(
    "#a5c97b",
    "#cc93cf",
    "#de9548",
    "#63d3b4",
    "#9e8dec",
    "#902267",
    '#FB5273'
    #"#984EA3"
    #"#B3CDE3"
  ),
  integ_louvain = c(
    "#a5c97b",
    "#cc93cf",
    "#de9548",
    "#63d3b4",
    "#9e8dec",
    "#902267",
    '#FB5273',
    "#984EA3"
    #"#B3CDE3"
  ),
  "Wet_smear:_clue_cells_(y/n)" = c("#2266ac", "#91c5de"),
  hyphae = c("#f26386", "#fd814e", "#a4d984", "#fbbc52"),
  pos = c("#91c5de", "#2266ac"),
  "Clinical_score_(0-5)" = brewer.pal(6, name = "Reds")
)
X <- readRDS("../Results/MixOmic/X_object.RDS")
Y <- readRDS("../Results/MixOmic/Y_object.RDS")

Variables to change

1. Specify design matrix

The design matrix describes how strongly you want the different input datasets to be connected in the model. It is a matrix of numbers between 0 and 1, where each value shows how much the model should try to link two datasets.

  • Values between 0.5 and 1 tell the model to focus more on the correlations between those datasets.
  • Values below 0.5 tell the model to focus more on prediction rather than on linking the datasets.

A matrix full of ones is the default matrix that is used, and can be specified by setting design = "full". Similarly, setting design = "null" will produce a matrix full of zeroes.

weight <- c(0.1, 1) %>% set_names(., c("weighted", "full"))

design <- map(
  weight,
  ~ matrix(
    .x,
    ncol = length(X[[1]]),
    nrow = length(X[[1]]),
    dimnames = list(names(X[[1]]), names(X[[1]]))
  )
)
design <- map(
  design,
  ~ {
    diag(.x) <- 0
    .x
  }
)

design$full
   TR MB MG FU
TR  0  1  1  1
MB  1  0  1  1
MG  1  1  0  1
FU  1  1  1  0

Number of features to test when tuning

test.KeepX <- list(
  "test.High" = list(
    TR = c(seq(300, 500, 50)),
    MG = c(50, 25, 5),
    FU = c(50, 25, 5),
    MB = c(50, 25, 5)
  ),
  "test.Median" = list(
    TR = c(seq(100, 250, 50)),
    MG = c(50, 25, 5),
    FU = c(50, 25, 5),
    MB = c(50, 25, 5)
  ),
  "test.Low" = list(
    TR = c(seq(10, 50, 10)),
    MG = c(50, 25, 5),
    FU = c(50, 25, 5),
    MB = c(50, 25, 5)
  ),
  "test.High" = list(
    TR = c(seq(300, 500, 50)),
    MG = c(50, 25, 5),
    FU = c(50, 25, 5),
    MB = c(50, 25, 5)
  )
)

Tuning Table

The tune tibble defines the combinations of parameters for multiple DIABLO tuning runs. Each row corresponds to a unique model configuration:

  • Taxa – selects a feature set from the X named list (e.g., "strain"X[["strain"]]).
  • Y – selects the response variable from the Y named list (e.g., "Clin"Y[["Clin"]]).
  • design – specifies the design matrix for the model (e.g., "full" or "null").
  • test.KeepX – selects a variable selection parameter from the test.KeepX named list (e.g., "test.High"test.KeepX[["test.High"]]).
  • Model – a unique integer ID used to name output files and track results.

This structure allows each row to be passed programmatically to run_diablo_tuning(), which looks up the corresponding list elements and runs the model without manually specifying each combination.

# tune <- tibble(
#   "Taxa" = c("strain", "species", "strain", "species"),
#   "gr" = c("RVVC_AS", "RVVC_AS", "Clin_bin", "Clin_bin"),
#   "design" = c("full", "null", "full", "null"),
#   "KeepX" = c("test.High", "test.Median", "test.Low", "test.High")
# ) %>%
#   mutate(Model = 1:ncol(.))

tune <- tibble(
  "Taxa" = c("strain", "strain", "strain", "strain", "strain"),
  "gr" = c("RVVC_pos", "RVVC_pos", "Clin_gr", "pos", "RVVC_pos"),
  "design" = c("full", "null", "full", "null", "null"),
  "KeepX" = c("test.High", "test.Median", "test.Low", "test.High", "test.Low")
) %>%
  mutate(Model = 1:nrow(.))

knitr::kable(tune)
Taxa gr design KeepX Model
strain RVVC_pos full test.High 1
strain RVVC_pos null test.Median 2
strain Clin_gr full test.Low 3
strain pos null test.High 4
strain RVVC_pos null test.Low 5
run_diablo_tuning(Taxa, gr, design, KeepX, Model, overwrite = F)
# A tibble: 1 × 6
  Model design success runtime_sec output_file              skipped
  <int> <chr>  <lgl>         <dbl> <chr>                    <lgl>  
1     1 full   TRUE             NA tune_batch_model_FU1.rds TRUE   
run_diablo_tuning(Taxa, gr, design, KeepX, Model, overwrite = F)
# A tibble: 1 × 6
  Model design success runtime_sec output_file              skipped
  <int> <chr>  <lgl>         <dbl> <chr>                    <lgl>  
1     2 null   TRUE             NA tune_batch_model_FU2.rds TRUE   
run_diablo_tuning(Taxa, gr, design, KeepX, Model, overwrite = F)
# A tibble: 1 × 6
  Model design success runtime_sec output_file              skipped
  <int> <chr>  <lgl>         <dbl> <chr>                    <lgl>  
1     3 full   TRUE             NA tune_batch_model_FU3.rds TRUE   
run_diablo_tuning(Taxa, gr, design, KeepX, Model, overwrite = F)
# A tibble: 1 × 6
  Model design success runtime_sec output_file              skipped
  <int> <chr>  <lgl>         <dbl> <chr>                    <lgl>  
1     4 null   TRUE             NA tune_batch_model_FU4.rds TRUE   
run_diablo_tuning(Taxa, gr, design, KeepX, Model, overwrite = F)
# A tibble: 1 × 6
  Model design success runtime_sec output_file              skipped
  <int> <chr>  <lgl>         <dbl> <chr>                    <lgl>  
1     5 null   TRUE             NA tune_batch_model_FU5.rds TRUE   

Run Models

Code
tune <- tune %>%
  mutate(
    .,
    diablo.mod = pmap(
      .,
      ~ block.splsda(
        X = X[[..1]],
        Y = Y[[..2]],
        design = ..3,
        keepX = diablo_fits[[..5]]$choice.keepX,
        ncomp = 2
      )
    )
  ) %>%
  mutate(nr_feat = pmap(., ~ diablo_fits[[..5]]$choice.keepX)) %>%
  mutate(diablo.mod = set_names(.$diablo.mod, paste0("Model ", .$Model)))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `diablo.mod = pmap(...)`.
Caused by warning:
! The SGCCA algorithm did not converge
Code
saveRDS(tune, "../Results/MixOmic/Diablo_object_FU.RDS")
# tune <- readRDS("../Results/MixOmic/Diablo_object.RDS")
# tune <- readRDS("../Results/MixOmic/Diablo_object_FU.RDS")

Visualizations

Taxa: strain, Design: full
KeepX: TR: 450 MG: 25 MB: 5
KeepX: TR: 300 MG: 50 MB: 25
plotIndiv(
   diablo.mod,
   ind.names = FALSE,
   legend = TRUE,
   col = pal[[gr]],
   title = "Comp 1 - 2")

Taxa: strain, Design: null
KeepX: TR: 100 MG: 5 MB: 50
KeepX: TR: 100 MG: 50 MB: 5
plotIndiv(
   diablo.mod,
   ind.names = FALSE,
   legend = TRUE,
   col = pal[[gr]],
   title = "Comp 1 - 2")

Taxa: strain, Design: full
KeepX: TR: 30 MG: 25 MB: 50
KeepX: TR: 10 MG: 25 MB: 5
plotIndiv(
   diablo.mod,
   ind.names = FALSE,
   legend = TRUE,
   col = pal[[gr]],
   title = "Comp 1 - 2")

Taxa: strain, Design: null
KeepX: TR: 300 MG: 5 MB: 50
KeepX: TR: 350 MG: 5 MB: 25
plotIndiv(
   diablo.mod,
   ind.names = FALSE,
   legend = TRUE,
   col = pal[[gr]],
   title = "Comp 1 - 2")

Taxa: strain, Design: null
KeepX: TR: 20 MG: 25 MB: 5
KeepX: TR: 30 MG: 25 MB: 5
plotIndiv(
   diablo.mod,
   ind.names = FALSE,
   legend = TRUE,
   col = pal[[gr]],
   title = "Comp 1 - 2")

Taxa: strain, Design: full
KeepX: TR: 450 MG: 25 MB: 5
KeepX: TR: 300 MG: 50 MB: 25
plotArrow(
  diablo.mod,
  ind.names = FALSE,
  legend = TRUE,
  col = pal[[gr]],
  title = "comp 1 - 2")

Taxa: strain, Design: null
KeepX: TR: 100 MG: 5 MB: 50
KeepX: TR: 100 MG: 50 MB: 5
plotArrow(
  diablo.mod,
  ind.names = FALSE,
  legend = TRUE,
  col = pal[[gr]],
  title = "comp 1 - 2")

Taxa: strain, Design: full
KeepX: TR: 30 MG: 25 MB: 50
KeepX: TR: 10 MG: 25 MB: 5
plotArrow(
  diablo.mod,
  ind.names = FALSE,
  legend = TRUE,
  col = pal[[gr]],
  title = "comp 1 - 2")

Taxa: strain, Design: null
KeepX: TR: 300 MG: 5 MB: 50
KeepX: TR: 350 MG: 5 MB: 25
plotArrow(
  diablo.mod,
  ind.names = FALSE,
  legend = TRUE,
  col = pal[[gr]],
  title = "comp 1 - 2")

Taxa: strain, Design: null
KeepX: TR: 20 MG: 25 MB: 5
KeepX: TR: 30 MG: 25 MB: 5
plotArrow(
  diablo.mod,
  ind.names = FALSE,
  legend = TRUE,
  col = pal[[gr]],
  title = "comp 1 - 2")

Taxa: strain, Design: full
KeepX: TR: 450 MG: 25 MB: 5
KeepX: TR: 300 MG: 50 MB: 25
plotDiablo(diablo.mod, ncomp = 1, col.per.group = pal[[gr]])

Taxa: strain, Design: null
KeepX: TR: 100 MG: 5 MB: 50
KeepX: TR: 100 MG: 50 MB: 5
plotDiablo(diablo.mod, ncomp = 1, col.per.group = pal[[gr]])

Taxa: strain, Design: full
KeepX: TR: 30 MG: 25 MB: 50
KeepX: TR: 10 MG: 25 MB: 5
plotDiablo(diablo.mod, ncomp = 1, col.per.group = pal[[gr]])

Taxa: strain, Design: null
KeepX: TR: 300 MG: 5 MB: 50
KeepX: TR: 350 MG: 5 MB: 25
plotDiablo(diablo.mod, ncomp = 1, col.per.group = pal[[gr]])

Taxa: strain, Design: null
KeepX: TR: 20 MG: 25 MB: 5
KeepX: TR: 30 MG: 25 MB: 5
plotDiablo(diablo.mod, ncomp = 1, col.per.group = pal[[gr]])

Taxa: strain, Design: full
KeepX: TR: 450 MG: 25 MB: 5
KeepX: TR: 300 MG: 50 MB: 25
plotVar(
  diablo.mod,
  var.names = FALSE,
  style = "graphics",
  legend = TRUE,
  pch = c(16, 17, 15, 18),
  cex = c(2, 2, 2, 2),
  col = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
  title = "DIABLO comp 1 - 2")

The correlation circle plot highlights the contribution of each selected variable to each component. Important variables should be close to the large circle. Here, only the variables selected on components 1 and 2 are depicted (across all blocks). Clusters of points indicate a strong correlation between variables.

Taxa: strain, Design: null
KeepX: TR: 100 MG: 5 MB: 50
KeepX: TR: 100 MG: 50 MB: 5
plotVar(
  diablo.mod,
  var.names = FALSE,
  style = "graphics",
  legend = TRUE,
  pch = c(16, 17, 15, 18),
  cex = c(2, 2, 2, 2),
  col = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
  title = "DIABLO comp 1 - 2")

The correlation circle plot highlights the contribution of each selected variable to each component. Important variables should be close to the large circle. Here, only the variables selected on components 1 and 2 are depicted (across all blocks). Clusters of points indicate a strong correlation between variables.

Taxa: strain, Design: full
KeepX: TR: 30 MG: 25 MB: 50
KeepX: TR: 10 MG: 25 MB: 5
plotVar(
  diablo.mod,
  var.names = FALSE,
  style = "graphics",
  legend = TRUE,
  pch = c(16, 17, 15, 18),
  cex = c(2, 2, 2, 2),
  col = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
  title = "DIABLO comp 1 - 2")

The correlation circle plot highlights the contribution of each selected variable to each component. Important variables should be close to the large circle. Here, only the variables selected on components 1 and 2 are depicted (across all blocks). Clusters of points indicate a strong correlation between variables.

Taxa: strain, Design: null
KeepX: TR: 300 MG: 5 MB: 50
KeepX: TR: 350 MG: 5 MB: 25
plotVar(
  diablo.mod,
  var.names = FALSE,
  style = "graphics",
  legend = TRUE,
  pch = c(16, 17, 15, 18),
  cex = c(2, 2, 2, 2),
  col = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
  title = "DIABLO comp 1 - 2")

The correlation circle plot highlights the contribution of each selected variable to each component. Important variables should be close to the large circle. Here, only the variables selected on components 1 and 2 are depicted (across all blocks). Clusters of points indicate a strong correlation between variables.

Taxa: strain, Design: null
KeepX: TR: 20 MG: 25 MB: 5
KeepX: TR: 30 MG: 25 MB: 5
plotVar(
  diablo.mod,
  var.names = FALSE,
  style = "graphics",
  legend = TRUE,
  pch = c(16, 17, 15, 18),
  cex = c(2, 2, 2, 2),
  col = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
  title = "DIABLO comp 1 - 2")
Warning in plotVar(diablo.mod, var.names = FALSE, style = "graphics", legend =
TRUE, : We detected negative correlation between the variates of some blocks,
which means that some clusters of variables observed on the correlation circle
plot are not necessarily positively correlated.

The correlation circle plot highlights the contribution of each selected variable to each component. Important variables should be close to the large circle. Here, only the variables selected on components 1 and 2 are depicted (across all blocks). Clusters of points indicate a strong correlation between variables.

Taxa: strain, Design: full
KeepX: TR: 450 MG: 25 MB: 5
KeepX: TR: 300 MG: 50 MB: 25
png("../Results/MixOmic/loadings_comp1_1.png", width = 10, height = 6, res = 300, units = "in")
par(mar = c(5.1, 4.1, 4.1, 6.1))  # b,l,t,r
try(plotLoadings(diablo.mod, comp = 1, ndisplay = 20, size.name = 0.7,
    size.title = 0.9, contrib = "max", method = "median", legend.color = pal[[gr]]),
    silent = TRUE)
'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block MB
dev.off()
quartz_off_screen 
                2 

Taxa: strain, Design: null
KeepX: TR: 100 MG: 5 MB: 50
KeepX: TR: 100 MG: 50 MB: 5
png("../Results/MixOmic/loadings_comp1_2.png", width = 10, height = 6, res = 300, units = "in")
par(mar = c(5.1, 4.1, 4.1, 6.1))  # b,l,t,r
try(plotLoadings(diablo.mod, comp = 1, ndisplay = 20, size.name = 0.7,
    size.title = 0.9, contrib = "max", method = "median", legend.color = pal[[gr]]),
    silent = TRUE)
'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block MG
'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block FU
dev.off()
quartz_off_screen 
                2 

Taxa: strain, Design: full
KeepX: TR: 30 MG: 25 MB: 50
KeepX: TR: 10 MG: 25 MB: 5
png("../Results/MixOmic/loadings_comp1_3.png", width = 10, height = 6, res = 300, units = "in")
par(mar = c(5.1, 4.1, 4.1, 6.1))  # b,l,t,r
try(plotLoadings(diablo.mod, comp = 1, ndisplay = 20, size.name = 0.7,
    size.title = 0.9, contrib = "max", method = "median", legend.color = pal[[gr]]),
    silent = TRUE)
'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block FU
dev.off()
quartz_off_screen 
                2 

Taxa: strain, Design: null
KeepX: TR: 300 MG: 5 MB: 50
KeepX: TR: 350 MG: 5 MB: 25
png("../Results/MixOmic/loadings_comp1_4.png", width = 10, height = 6, res = 300, units = "in")
par(mar = c(5.1, 4.1, 4.1, 6.1))  # b,l,t,r
try(plotLoadings(diablo.mod, comp = 1, ndisplay = 20, size.name = 0.7,
    size.title = 0.9, contrib = "max", method = "median", legend.color = pal[[gr]]),
    silent = TRUE)
'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block MG
'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block FU
dev.off()
quartz_off_screen 
                2 

Taxa: strain, Design: null
KeepX: TR: 20 MG: 25 MB: 5
KeepX: TR: 30 MG: 25 MB: 5
png("../Results/MixOmic/loadings_comp1_5.png", width = 10, height = 6, res = 300, units = "in")
par(mar = c(5.1, 4.1, 4.1, 6.1))  # b,l,t,r
try(plotLoadings(diablo.mod, comp = 1, ndisplay = 20, size.name = 0.7,
    size.title = 0.9, contrib = "max", method = "median", legend.color = pal[[gr]]),
    silent = TRUE)
'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block MB
'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block FU
dev.off()
quartz_off_screen 
                2 

Taxa: strain, Design: full
KeepX: TR: 450 MG: 25 MB: 5
KeepX: TR: 300 MG: 50 MB: 25
png("../Results/MixOmic/loadings_comp2_1.png", width = 14, height = 6, res = 300, units = "in")
try(plotLoadings(diablo.mod, comp = 2, ndisplay = 20, size.name = 0.7,
    size.title = 1, contrib = "max", method = "median", legend.color = pal[[gr]]), 
    silent = TRUE)
'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block FU
dev.off()
quartz_off_screen 
                2 

Taxa: strain, Design: null
KeepX: TR: 100 MG: 5 MB: 50
KeepX: TR: 100 MG: 50 MB: 5
png("../Results/MixOmic/loadings_comp2_2.png", width = 14, height = 6, res = 300, units = "in")
try(plotLoadings(diablo.mod, comp = 2, ndisplay = 20, size.name = 0.7,
    size.title = 1, contrib = "max", method = "median", legend.color = pal[[gr]]), 
    silent = TRUE)
'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block MB
dev.off()
quartz_off_screen 
                2 

Taxa: strain, Design: full
KeepX: TR: 30 MG: 25 MB: 50
KeepX: TR: 10 MG: 25 MB: 5
png("../Results/MixOmic/loadings_comp2_3.png", width = 14, height = 6, res = 300, units = "in")
try(plotLoadings(diablo.mod, comp = 2, ndisplay = 20, size.name = 0.7,
    size.title = 1, contrib = "max", method = "median", legend.color = pal[[gr]]), 
    silent = TRUE)
'ndisplay' value is larger than the number of selected variables! It has been reseted to 10 for block TR
'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block MB
'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block FU
dev.off()
quartz_off_screen 
                2 

Taxa: strain, Design: null
KeepX: TR: 300 MG: 5 MB: 50
KeepX: TR: 350 MG: 5 MB: 25
png("../Results/MixOmic/loadings_comp2_4.png", width = 14, height = 6, res = 300, units = "in")
try(plotLoadings(diablo.mod, comp = 2, ndisplay = 20, size.name = 0.7,
    size.title = 1, contrib = "max", method = "median", legend.color = pal[[gr]]), 
    silent = TRUE)
'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block MG
dev.off()
quartz_off_screen 
                2 

Taxa: strain, Design: null
KeepX: TR: 20 MG: 25 MB: 5
KeepX: TR: 30 MG: 25 MB: 5
png("../Results/MixOmic/loadings_comp2_5.png", width = 14, height = 6, res = 300, units = "in")
try(plotLoadings(diablo.mod, comp = 2, ndisplay = 20, size.name = 0.7,
    size.title = 1, contrib = "max", method = "median", legend.color = pal[[gr]]), 
    silent = TRUE)
'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block MB
dev.off()
quartz_off_screen 
                2 

Taxa: strain, Design: full
KeepX: TR: 450 MG: 25 MB: 5
KeepX: TR: 300 MG: 50 MB: 25
png("../Results/MixOmic/Heatmap_Model_1.png", width = 20, height = 15, res = 300, units = "in")
cim_obj <- cimDiablo(
    diablo.mod,
    comp = 1,
    color.Y = pal[[gr]],
    size.legend = 1.5,
    margins = c(10, 4),
    color.blocks = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
    legend.position = "bottomleft",
    title = "Clustered heatmap of integrated data")

trimming values to [-3, 3] range for cim visualisation. See 'trim' arg in ?cimDiablo
dev.off()
quartz_off_screen 
                2 

Taxa: strain, Design: null
KeepX: TR: 100 MG: 5 MB: 50
KeepX: TR: 100 MG: 50 MB: 5
png("../Results/MixOmic/Heatmap_Model_2.png", width = 20, height = 15, res = 300, units = "in")
cim_obj <- cimDiablo(
    diablo.mod,
    comp = 1,
    color.Y = pal[[gr]],
    size.legend = 1.5,
    margins = c(10, 4),
    color.blocks = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
    legend.position = "bottomleft",
    title = "Clustered heatmap of integrated data")

trimming values to [-3, 3] range for cim visualisation. See 'trim' arg in ?cimDiablo
dev.off()
quartz_off_screen 
                2 

Taxa: strain, Design: full
KeepX: TR: 30 MG: 25 MB: 50
KeepX: TR: 10 MG: 25 MB: 5
png("../Results/MixOmic/Heatmap_Model_3.png", width = 20, height = 15, res = 300, units = "in")
cim_obj <- cimDiablo(
    diablo.mod,
    comp = 1,
    color.Y = pal[[gr]],
    size.legend = 1.5,
    margins = c(10, 4),
    color.blocks = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
    legend.position = "bottomleft",
    title = "Clustered heatmap of integrated data")

trimming values to [-3, 3] range for cim visualisation. See 'trim' arg in ?cimDiablo
dev.off()
quartz_off_screen 
                2 

Taxa: strain, Design: null
KeepX: TR: 300 MG: 5 MB: 50
KeepX: TR: 350 MG: 5 MB: 25
png("../Results/MixOmic/Heatmap_Model_4.png", width = 20, height = 15, res = 300, units = "in")
cim_obj <- cimDiablo(
    diablo.mod,
    comp = 1,
    color.Y = pal[[gr]],
    size.legend = 1.5,
    margins = c(10, 4),
    color.blocks = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
    legend.position = "bottomleft",
    title = "Clustered heatmap of integrated data")

trimming values to [-3, 3] range for cim visualisation. See 'trim' arg in ?cimDiablo
dev.off()
quartz_off_screen 
                2 

Taxa: strain, Design: null
KeepX: TR: 20 MG: 25 MB: 5
KeepX: TR: 30 MG: 25 MB: 5
png("../Results/MixOmic/Heatmap_Model_5.png", width = 20, height = 15, res = 300, units = "in")
cim_obj <- cimDiablo(
    diablo.mod,
    comp = 1,
    color.Y = pal[[gr]],
    size.legend = 1.5,
    margins = c(10, 4),
    color.blocks = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
    legend.position = "bottomleft",
    title = "Clustered heatmap of integrated data")

trimming values to [-3, 3] range for cim visualisation. See 'trim' arg in ?cimDiablo
dev.off()
quartz_off_screen 
                2 

Code
sim_to_edges <- function(sim_matrix, threshold = NULL) {
  #colnames(sim_matrix) <- make.names(colnames(sim_matrix), unique = TRUE)
  #rownames(sim_matrix) <- make.names(rownames(sim_matrix), unique = TRUE)

  # removes spaces from feature names and replace with "_"
  colnames(sim_matrix) <- gsub("\\s+", "_", colnames(sim_matrix))
  rownames(sim_matrix) <- gsub("\\s+", "_", rownames(sim_matrix))

  TR <- colnames(X$TR) %>% gsub("\\s+", "_", .)
  MB <- colnames(X$MB) %>% gsub("\\s+", "_", .)
  MG <- colnames(X$MG) %>% gsub("\\s+", "_", .)

  # ---- edges between features ----
  feature_edges <- sim_matrix %>%
    as_tibble(rownames = "from") %>%
    #as.data.frame() %>%
    #rownames_to_column("from") %>%
    pivot_longer(-from, names_to = "to", values_to = "weight") %>%
    mutate(
      type_from = case_when(
        from %in% TR ~ "TR",
        from %in% MB ~ "MB",
        from %in% MG ~ "MG",
        from %in% FU ~ "FU"
      ),
      type_to = case_when(
        to %in% TR ~ "TR",
        to %in% MB ~ "MB",
        to %in% MG ~ "MG",
        from %in% FU ~ "FU"
      )
    ) %>%
    filter(from != to) %>%
    filter(type_from != type_to) %>% # only cross-block edges

    # remove duplicates in symmetric matrix:
    rowwise() %>%
    mutate(pair = paste(sort(c(from, to)), collapse = "_")) %>%
    distinct(pair, .keep_all = TRUE) %>%
    ungroup() %>%
    select(-pair)

  # Filter weights
  if (!is.null(threshold)) {
    feature_edges <- feature_edges %>% filter(weight >= threshold)
  }

  # ---- vertex table ----
  vertex_df <- tibble(name = colnames(sim_matrix)) %>%
    mutate(
      block = case_when(
        name %in% TR ~ "TR",
        name %in% MG ~ "MG",
        name %in% MB ~ "MB",
        from %in% FU ~ "FU"
      )
    )

  # ---- block hierarchy edges ----
  block_edges <- rbind(
    data.frame(from = "origin", to = unique(vertex_df$block)),
    vertex_df %>% rename(from = block, to = name)
  )

  return(list(
    edges = block_edges,
    feature_edges = feature_edges,
    vertices = vertex_df
  ))
}


# choose to display corelation values or similarity values
# the similarity values are the matrix returned from the circosplot() function
n <- f %>%
  map(., ~ head(.x, 10)) %>%
  unlist(.) %>%
  intersect(colnames(sim_m), .)

n <- f %>%
  map(., ~ head(.x, 10)) %>%
  unlist(.) %>%
  intersect(colnames(big_cor), .)
# sim_matrix <- sim_m

# sim_matrix <- sim_m[n,n]
sim_matrix <- big_cor[n, n]
glimpse(sim_matrix)
edges <- sim_to_edges(sim_matrix[n, n], 0.4)


# expand vertex table to include meta nodes: origin + block names
vertices <- edges$vertices %>%
  bind_rows(
    tibble(
      name = c("origin", unique(edges$vertices$block)),
      block = NA
    )
  )


#Let's add information concerning the label we are going to add: angle, horizontal adjustement and potential flip
#calculate the ANGLE of the labels
vertices$id <- NA
myleaves <- which(is.na(match(vertices$name, edges$from)))
nleaves <- length(myleaves)
vertices$id[myleaves] <- seq(1:nleaves)
vertices$angle <- 90 - 360 * vertices$id / nleaves

# calculate the alignment of labels: right or left
# If I am on the left part of the plot, my labels have currently an angle < -90
vertices$hjust <- ifelse(vertices$angle < -90, 1, 0)

# flip angle BY to make them readable
vertices$angle <- ifelse(
  vertices$angle < -90,
  vertices$angle + 180,
  vertices$angle
)


mygraph <- igraph::graph_from_data_frame(
  d = edges$edges, # hierarchical edges only
  vertices = vertices
)

# recalc leaf because adding meta nodes broke original leaf detection
#V(mygraph)$leaf <- degree(mygraph, mode = "out") == 0

# optional: add feature–feature similarity edges
# mygraph <- igraph::add_edges(
#   mygraph,
#   t(as.matrix(edges$feature_edges[, c("from", "to")]))
# )

# Convert feature_edges into index form
from <- match(edges$feature_edges$from, V(mygraph)$name)
to <- match(edges$feature_edges$to, V(mygraph)$name)
connect <- edges$feature_edges

p <- ggraph(mygraph, layout = "dendrogram", circular = TRUE) +

  # similarity bundles
  geom_conn_bundle(
    data = get_con(from = from, to = to, value = connect$weight),
    aes(alpha = value, color = value),
    tension = 0.6, # adjust bundling strength
    width = 0.8
  ) +

  # features at the periphery
  geom_node_point(
    aes(filter = leaf, x = x * 1.05, y = y * 1.05, color = block),
    size = 2
  ) +

  # optional text labels
  geom_node_text(aes(x = x * 1.25, y = y * 1.05, filter = leaf, label = name)) +
  # geom_node_text(aes(x = x, y=y, filter = leaf, label=name,
  #               angle = angle,
  #               hjust=hjust,
  #               colour=block), size=2, alpha=1) +
  coord_cartesian(clip = "off") +

  #scale_color_viridis_c(option = "B") +
  scale_alpha(range = c(0.1, 0.8)) +
  theme_void() +
  theme(
    #legend.position = "none",
    plot.margin = unit(c(0, 0, 0, 0), "cm")
  ) +
  expand_limits(x = c(-1.5, 1.5), y = c(-1.25, 1.25))

p