DIABLO Models

Published

30 Mar 2026

Code

##################
# LOAD LIBRARIES #
##################
library(tidyverse)
library(mixOmics)
library(readxl)
library(cowplot)
library(RColorBrewer)
library(scales)

select <- dplyr::select
map <- purrr::map

# setwd("/Users/vilkal/work/Brolidens_work/Projects/Candida-omics/src")

feat <- c("#e39ddaff", "#6dded3ff", "#a68cd5ff") #,"#6095d7","#755fa3","#c44999")

Code

pal <- list(
  CST = c(
    "I" = "#148F77",
    "II" = "#6d9f06",
    "III" = "#A3E4D7",
    "IV-B" = "#E794C1",
    "IV-C" = "#b5cb8c",
    "V" = "#43BA8F"
  ),
  subCST = c(
    "#a5c97b",
    "#cc93cf",
    "#de9548",
    "#63d3b4",
    "#9e8dec",
    "#902267",
    '#FB5273',
    "#91c5de"
  ),
  RVVC = c("#EAEAEA", "#fd814e"),
  BV = c("#EAEAEA", "#fd814e"),
  NAC = c("#EAEAEA", "#fd814e"),
  ST = c("#EAEAEA", "#fd814e"),
  missing_page = c("#EAEAEA", "#fd814e"),
  Clin = c("#a5c97b", "#cc93cf", "#de9548", "#63d3b4", "#9e8dec", "#902267"),
  Clin_gr = c("#ffa998", '#FB5273', "#902267"),
  RVVC_AS = c("#ffa998", '#FB5273', "#902267"),
  RVVC_pos = c(
    "#A6CEE3",
    "#1F78B4",
    #"#EAEAEA",
    #"gray",
    "#e490c4ff",
    "#c12d88ff"
  ), # "#FB9A99", "#E31A1C", "#B2DF8A", "#609c5bff", "#FDBF6F", "#FF7F00",, "#CAB2D6", "#6a3d9a" "#e490c4ff", "#c12d88ff"
  trx_louvain_7 = c(
    "#de9548",
    "#cc93cf",
    "#a5c97b",
    "#9e8dec",
    "#902267",
    "#63d3b4",
    '#FB5273'
  ),
  metab_louvain = c(
    "#cc93cf",
    "#a5c97b",
    "#de9548",
    "#902267",
    "#9e8dec",
    "#63d3b4"
    #'#FB5273'
  ),
  microb_louvain = c(
    "#cc93cf",
    "#de9548",
    "#a5c97b",
    "#9e8dec",
    "#902267",
    "#63d3b4",
    '#FB5273'
  ),
  fun_louvain = c(
    "#a5c97b",
    "#cc93cf",
    "#de9548",
    "#63d3b4",
    "#9e8dec",
    "#902267",
    '#FB5273'
    #"#984EA3"
    #"#B3CDE3"
  ),
  integ_louvain = c(
    "#a5c97b",
    "#cc93cf",
    "#de9548",
    "#63d3b4",
    "#9e8dec",
    "#902267",
    '#FB5273',
    "#984EA3"
    #"#B3CDE3"
  ),
  "Wet_smear:_clue_cells_(y/n)" = c("#2266ac", "#91c5de"),
  hyphae = c("#f26386", "#fd814e", "#a4d984", "#fbbc52"),
  pos = c("#91c5de", "#2266ac"),
  "Clinical_score_(0-5)" = brewer.pal(6, name = "Reds")
)

X <- readRDS("../Results/MixOmic/X_object.RDS")
Y <- readRDS("../Results/MixOmic/Y_object.RDS")

Variables to change

Species vs strain level for MG data
full weighted vs full design matrix
different number of trx genes

1. Specify design matrix

The design matrix describes how strongly you want the different input datasets to be connected in the model. It is a matrix of numbers between 0 and 1, where each value shows how much the model should try to link two datasets.

Values between 0.5 and 1 tell the model to focus more on the correlations between those datasets.
Values below 0.5 tell the model to focus more on prediction rather than on linking the datasets.

A matrix full of ones is the default matrix that is used, and can be specified by setting design = "full". Similarly, setting design = "null" will produce a matrix full of zeroes.

weight <- c(0.1, 1) %>% set_names(., c("weighted", "full"))

design <- map(
  weight,
  ~ matrix(
    .x,
    ncol = length(X[[1]]),
    nrow = length(X[[1]]),
    dimnames = list(names(X[[1]]), names(X[[1]]))
  )
)
design <- map(
  design,
  ~ {
    diag(.x) <- 0
    .x
  }
)

design$full

   TR MB MG FU
TR  0  1  1  1
MB  1  0  1  1
MG  1  1  0  1
FU  1  1  1  0

Number of features to test when tuning

test.KeepX <- list(
  "test.High" = list(
    TR = c(seq(300, 500, 50)),
    MG = c(50, 25, 5),
    FU = c(50, 25, 5),
    MB = c(50, 25, 5)
  ),
  "test.Median" = list(
    TR = c(seq(100, 250, 50)),
    MG = c(50, 25, 5),
    FU = c(50, 25, 5),
    MB = c(50, 25, 5)
  ),
  "test.Low" = list(
    TR = c(seq(10, 50, 10)),
    MG = c(50, 25, 5),
    FU = c(50, 25, 5),
    MB = c(50, 25, 5)
  ),
  "test.High" = list(
    TR = c(seq(300, 500, 50)),
    MG = c(50, 25, 5),
    FU = c(50, 25, 5),
    MB = c(50, 25, 5)
  )
)

Tuning Table

The tune tibble defines the combinations of parameters for multiple DIABLO tuning runs. Each row corresponds to a unique model configuration:

Taxa – selects a feature set from the X named list (e.g., "strain" → X[["strain"]]).
Y – selects the response variable from the Y named list (e.g., "Clin" → Y[["Clin"]]).
design – specifies the design matrix for the model (e.g., "full" or "null").
test.KeepX – selects a variable selection parameter from the test.KeepX named list (e.g., "test.High" → test.KeepX[["test.High"]]).
Model – a unique integer ID used to name output files and track results.

This structure allows each row to be passed programmatically to run_diablo_tuning(), which looks up the corresponding list elements and runs the model without manually specifying each combination.

# tune <- tibble(
#   "Taxa" = c("strain", "species", "strain", "species"),
#   "gr" = c("RVVC_AS", "RVVC_AS", "Clin_bin", "Clin_bin"),
#   "design" = c("full", "null", "full", "null"),
#   "KeepX" = c("test.High", "test.Median", "test.Low", "test.High")
# ) %>%
#   mutate(Model = 1:ncol(.))

tune <- tibble(
  "Taxa" = c("strain", "strain", "strain", "strain", "strain"),
  "gr" = c("RVVC_pos", "RVVC_pos", "Clin_gr", "pos", "RVVC_pos"),
  "design" = c("full", "null", "full", "null", "null"),
  "KeepX" = c("test.High", "test.Median", "test.Low", "test.High", "test.Low")
) %>%
  mutate(Model = 1:nrow(.))

knitr::kable(tune)

Taxa	gr	design	KeepX	Model
strain	RVVC_pos	full	test.High	1
strain	RVVC_pos	null	test.Median	2
strain	Clin_gr	full	test.Low	3
strain	pos	null	test.High	4
strain	RVVC_pos	null	test.Low	5

run_diablo_tuning(Taxa, gr, design, KeepX, Model, overwrite = F)

# A tibble: 1 × 6
  Model design success runtime_sec output_file              skipped
  <int> <chr>  <lgl>         <dbl> <chr>                    <lgl>  
1     1 full   TRUE             NA tune_batch_model_FU1.rds TRUE

run_diablo_tuning(Taxa, gr, design, KeepX, Model, overwrite = F)

# A tibble: 1 × 6
  Model design success runtime_sec output_file              skipped
  <int> <chr>  <lgl>         <dbl> <chr>                    <lgl>  
1     2 null   TRUE             NA tune_batch_model_FU2.rds TRUE

run_diablo_tuning(Taxa, gr, design, KeepX, Model, overwrite = F)

# A tibble: 1 × 6
  Model design success runtime_sec output_file              skipped
  <int> <chr>  <lgl>         <dbl> <chr>                    <lgl>  
1     3 full   TRUE             NA tune_batch_model_FU3.rds TRUE

run_diablo_tuning(Taxa, gr, design, KeepX, Model, overwrite = F)

# A tibble: 1 × 6
  Model design success runtime_sec output_file              skipped
  <int> <chr>  <lgl>         <dbl> <chr>                    <lgl>  
1     4 null   TRUE             NA tune_batch_model_FU4.rds TRUE

run_diablo_tuning(Taxa, gr, design, KeepX, Model, overwrite = F)

# A tibble: 1 × 6
  Model design success runtime_sec output_file              skipped
  <int> <chr>  <lgl>         <dbl> <chr>                    <lgl>  
1     5 null   TRUE             NA tune_batch_model_FU5.rds TRUE

Run Models

Code

tune <- tune %>%
  mutate(
    .,
    diablo.mod = pmap(
      .,
      ~ block.splsda(
        X = X[[..1]],
        Y = Y[[..2]],
        design = ..3,
        keepX = diablo_fits[[..5]]$choice.keepX,
        ncomp = 2
      )
    )
  ) %>%
  mutate(nr_feat = pmap(., ~ diablo_fits[[..5]]$choice.keepX)) %>%
  mutate(diablo.mod = set_names(.$diablo.mod, paste0("Model ", .$Model)))

Warning: There was 1 warning in `mutate()`.
ℹ In argument: `diablo.mod = pmap(...)`.
Caused by warning:
! The SGCCA algorithm did not converge

Code

saveRDS(tune, "../Results/MixOmic/Diablo_object_FU.RDS")
# tune <- readRDS("../Results/MixOmic/Diablo_object.RDS")
# tune <- readRDS("../Results/MixOmic/Diablo_object_FU.RDS")

Visualizations

Taxa: strain, Design: full

KeepX: TR: 450 MG: 25 MB: 5

KeepX: TR: 300 MG: 50 MB: 25

plotIndiv(
   diablo.mod,
   ind.names = FALSE,
   legend = TRUE,
   col = pal[[gr]],
   title = "Comp 1 - 2")

Taxa: strain, Design: null

KeepX: TR: 100 MG: 5 MB: 50

KeepX: TR: 100 MG: 50 MB: 5

plotIndiv(
   diablo.mod,
   ind.names = FALSE,
   legend = TRUE,
   col = pal[[gr]],
   title = "Comp 1 - 2")

Taxa: strain, Design: full

KeepX: TR: 30 MG: 25 MB: 50

KeepX: TR: 10 MG: 25 MB: 5

plotIndiv(
   diablo.mod,
   ind.names = FALSE,
   legend = TRUE,
   col = pal[[gr]],
   title = "Comp 1 - 2")

Taxa: strain, Design: null

KeepX: TR: 300 MG: 5 MB: 50

KeepX: TR: 350 MG: 5 MB: 25

plotIndiv(
   diablo.mod,
   ind.names = FALSE,
   legend = TRUE,
   col = pal[[gr]],
   title = "Comp 1 - 2")

Taxa: strain, Design: null

KeepX: TR: 20 MG: 25 MB: 5

KeepX: TR: 30 MG: 25 MB: 5

plotIndiv(
   diablo.mod,
   ind.names = FALSE,
   legend = TRUE,
   col = pal[[gr]],
   title = "Comp 1 - 2")

Taxa: strain, Design: full

KeepX: TR: 450 MG: 25 MB: 5

KeepX: TR: 300 MG: 50 MB: 25

plotArrow(
  diablo.mod,
  ind.names = FALSE,
  legend = TRUE,
  col = pal[[gr]],
  title = "comp 1 - 2")

Taxa: strain, Design: null

KeepX: TR: 100 MG: 5 MB: 50

KeepX: TR: 100 MG: 50 MB: 5

plotArrow(
  diablo.mod,
  ind.names = FALSE,
  legend = TRUE,
  col = pal[[gr]],
  title = "comp 1 - 2")

Taxa: strain, Design: full

KeepX: TR: 30 MG: 25 MB: 50

KeepX: TR: 10 MG: 25 MB: 5

plotArrow(
  diablo.mod,
  ind.names = FALSE,
  legend = TRUE,
  col = pal[[gr]],
  title = "comp 1 - 2")

Taxa: strain, Design: null

KeepX: TR: 300 MG: 5 MB: 50

KeepX: TR: 350 MG: 5 MB: 25

plotArrow(
  diablo.mod,
  ind.names = FALSE,
  legend = TRUE,
  col = pal[[gr]],
  title = "comp 1 - 2")

Taxa: strain, Design: null

KeepX: TR: 20 MG: 25 MB: 5

KeepX: TR: 30 MG: 25 MB: 5

plotArrow(
  diablo.mod,
  ind.names = FALSE,
  legend = TRUE,
  col = pal[[gr]],
  title = "comp 1 - 2")

Taxa: strain, Design: full

KeepX: TR: 450 MG: 25 MB: 5

KeepX: TR: 300 MG: 50 MB: 25

plotDiablo(diablo.mod, ncomp = 1, col.per.group = pal[[gr]])

Taxa: strain, Design: null

KeepX: TR: 100 MG: 5 MB: 50

KeepX: TR: 100 MG: 50 MB: 5

plotDiablo(diablo.mod, ncomp = 1, col.per.group = pal[[gr]])

Taxa: strain, Design: full

KeepX: TR: 30 MG: 25 MB: 50

KeepX: TR: 10 MG: 25 MB: 5

plotDiablo(diablo.mod, ncomp = 1, col.per.group = pal[[gr]])

Taxa: strain, Design: null

KeepX: TR: 300 MG: 5 MB: 50

KeepX: TR: 350 MG: 5 MB: 25

plotDiablo(diablo.mod, ncomp = 1, col.per.group = pal[[gr]])

Taxa: strain, Design: null

KeepX: TR: 20 MG: 25 MB: 5

KeepX: TR: 30 MG: 25 MB: 5

plotDiablo(diablo.mod, ncomp = 1, col.per.group = pal[[gr]])

Taxa: strain, Design: full

KeepX: TR: 450 MG: 25 MB: 5

KeepX: TR: 300 MG: 50 MB: 25

plotVar(
  diablo.mod,
  var.names = FALSE,
  style = "graphics",
  legend = TRUE,
  pch = c(16, 17, 15, 18),
  cex = c(2, 2, 2, 2),
  col = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
  title = "DIABLO comp 1 - 2")

The correlation circle plot highlights the contribution of each selected variable to each component. Important variables should be close to the large circle. Here, only the variables selected on components 1 and 2 are depicted (across all blocks). Clusters of points indicate a strong correlation between variables.

Taxa: strain, Design: null

KeepX: TR: 100 MG: 5 MB: 50

KeepX: TR: 100 MG: 50 MB: 5

plotVar(
  diablo.mod,
  var.names = FALSE,
  style = "graphics",
  legend = TRUE,
  pch = c(16, 17, 15, 18),
  cex = c(2, 2, 2, 2),
  col = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
  title = "DIABLO comp 1 - 2")

Taxa: strain, Design: full

KeepX: TR: 30 MG: 25 MB: 50

KeepX: TR: 10 MG: 25 MB: 5

plotVar(
  diablo.mod,
  var.names = FALSE,
  style = "graphics",
  legend = TRUE,
  pch = c(16, 17, 15, 18),
  cex = c(2, 2, 2, 2),
  col = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
  title = "DIABLO comp 1 - 2")

Taxa: strain, Design: null

KeepX: TR: 300 MG: 5 MB: 50

KeepX: TR: 350 MG: 5 MB: 25

plotVar(
  diablo.mod,
  var.names = FALSE,
  style = "graphics",
  legend = TRUE,
  pch = c(16, 17, 15, 18),
  cex = c(2, 2, 2, 2),
  col = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
  title = "DIABLO comp 1 - 2")

Taxa: strain, Design: null

KeepX: TR: 20 MG: 25 MB: 5

KeepX: TR: 30 MG: 25 MB: 5

plotVar(
  diablo.mod,
  var.names = FALSE,
  style = "graphics",
  legend = TRUE,
  pch = c(16, 17, 15, 18),
  cex = c(2, 2, 2, 2),
  col = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
  title = "DIABLO comp 1 - 2")

Warning in plotVar(diablo.mod, var.names = FALSE, style = "graphics", legend =
TRUE, : We detected negative correlation between the variates of some blocks,
which means that some clusters of variables observed on the correlation circle
plot are not necessarily positively correlated.

Taxa: strain, Design: full

KeepX: TR: 450 MG: 25 MB: 5

KeepX: TR: 300 MG: 50 MB: 25

png("../Results/MixOmic/loadings_comp1_1.png", width = 10, height = 6, res = 300, units = "in")
par(mar = c(5.1, 4.1, 4.1, 6.1))  # b,l,t,r
try(plotLoadings(diablo.mod, comp = 1, ndisplay = 20, size.name = 0.7,
    size.title = 0.9, contrib = "max", method = "median", legend.color = pal[[gr]]),
    silent = TRUE)

'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block MB

dev.off()

quartz_off_screen 
                2

Taxa: strain, Design: null

KeepX: TR: 100 MG: 5 MB: 50

KeepX: TR: 100 MG: 50 MB: 5

png("../Results/MixOmic/loadings_comp1_2.png", width = 10, height = 6, res = 300, units = "in")
par(mar = c(5.1, 4.1, 4.1, 6.1))  # b,l,t,r
try(plotLoadings(diablo.mod, comp = 1, ndisplay = 20, size.name = 0.7,
    size.title = 0.9, contrib = "max", method = "median", legend.color = pal[[gr]]),
    silent = TRUE)

'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block MG

'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block FU

dev.off()

quartz_off_screen 
                2

Taxa: strain, Design: full

KeepX: TR: 30 MG: 25 MB: 50

KeepX: TR: 10 MG: 25 MB: 5

png("../Results/MixOmic/loadings_comp1_3.png", width = 10, height = 6, res = 300, units = "in")
par(mar = c(5.1, 4.1, 4.1, 6.1))  # b,l,t,r
try(plotLoadings(diablo.mod, comp = 1, ndisplay = 20, size.name = 0.7,
    size.title = 0.9, contrib = "max", method = "median", legend.color = pal[[gr]]),
    silent = TRUE)

'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block FU

dev.off()

quartz_off_screen 
                2

Taxa: strain, Design: null

KeepX: TR: 300 MG: 5 MB: 50

KeepX: TR: 350 MG: 5 MB: 25

png("../Results/MixOmic/loadings_comp1_4.png", width = 10, height = 6, res = 300, units = "in")
par(mar = c(5.1, 4.1, 4.1, 6.1))  # b,l,t,r
try(plotLoadings(diablo.mod, comp = 1, ndisplay = 20, size.name = 0.7,
    size.title = 0.9, contrib = "max", method = "median", legend.color = pal[[gr]]),
    silent = TRUE)

'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block MG

'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block FU

dev.off()

quartz_off_screen 
                2

Taxa: strain, Design: null

KeepX: TR: 20 MG: 25 MB: 5

KeepX: TR: 30 MG: 25 MB: 5

png("../Results/MixOmic/loadings_comp1_5.png", width = 10, height = 6, res = 300, units = "in")
par(mar = c(5.1, 4.1, 4.1, 6.1))  # b,l,t,r
try(plotLoadings(diablo.mod, comp = 1, ndisplay = 20, size.name = 0.7,
    size.title = 0.9, contrib = "max", method = "median", legend.color = pal[[gr]]),
    silent = TRUE)

'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block MB

'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block FU

dev.off()

quartz_off_screen 
                2

Taxa: strain, Design: full

KeepX: TR: 450 MG: 25 MB: 5

KeepX: TR: 300 MG: 50 MB: 25

png("../Results/MixOmic/loadings_comp2_1.png", width = 14, height = 6, res = 300, units = "in")
try(plotLoadings(diablo.mod, comp = 2, ndisplay = 20, size.name = 0.7,
    size.title = 1, contrib = "max", method = "median", legend.color = pal[[gr]]), 
    silent = TRUE)

'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block FU

dev.off()

quartz_off_screen 
                2

Taxa: strain, Design: null

KeepX: TR: 100 MG: 5 MB: 50

KeepX: TR: 100 MG: 50 MB: 5

png("../Results/MixOmic/loadings_comp2_2.png", width = 14, height = 6, res = 300, units = "in")
try(plotLoadings(diablo.mod, comp = 2, ndisplay = 20, size.name = 0.7,
    size.title = 1, contrib = "max", method = "median", legend.color = pal[[gr]]), 
    silent = TRUE)

'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block MB

dev.off()

quartz_off_screen 
                2

Taxa: strain, Design: full

KeepX: TR: 30 MG: 25 MB: 50

KeepX: TR: 10 MG: 25 MB: 5

png("../Results/MixOmic/loadings_comp2_3.png", width = 14, height = 6, res = 300, units = "in")
try(plotLoadings(diablo.mod, comp = 2, ndisplay = 20, size.name = 0.7,
    size.title = 1, contrib = "max", method = "median", legend.color = pal[[gr]]), 
    silent = TRUE)

'ndisplay' value is larger than the number of selected variables! It has been reseted to 10 for block TR

'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block MB

'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block FU

dev.off()

quartz_off_screen 
                2

Taxa: strain, Design: null

KeepX: TR: 300 MG: 5 MB: 50

KeepX: TR: 350 MG: 5 MB: 25

png("../Results/MixOmic/loadings_comp2_4.png", width = 14, height = 6, res = 300, units = "in")
try(plotLoadings(diablo.mod, comp = 2, ndisplay = 20, size.name = 0.7,
    size.title = 1, contrib = "max", method = "median", legend.color = pal[[gr]]), 
    silent = TRUE)

'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block MG

dev.off()

quartz_off_screen 
                2

Taxa: strain, Design: null

KeepX: TR: 20 MG: 25 MB: 5

KeepX: TR: 30 MG: 25 MB: 5

png("../Results/MixOmic/loadings_comp2_5.png", width = 14, height = 6, res = 300, units = "in")
try(plotLoadings(diablo.mod, comp = 2, ndisplay = 20, size.name = 0.7,
    size.title = 1, contrib = "max", method = "median", legend.color = pal[[gr]]), 
    silent = TRUE)

'ndisplay' value is larger than the number of selected variables! It has been reseted to 5 for block MB

dev.off()

quartz_off_screen 
                2

Taxa: strain, Design: full

KeepX: TR: 450 MG: 25 MB: 5

KeepX: TR: 300 MG: 50 MB: 25

png("../Results/MixOmic/Heatmap_Model_1.png", width = 20, height = 15, res = 300, units = "in")
cim_obj <- cimDiablo(
    diablo.mod,
    comp = 1,
    color.Y = pal[[gr]],
    size.legend = 1.5,
    margins = c(10, 4),
    color.blocks = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
    legend.position = "bottomleft",
    title = "Clustered heatmap of integrated data")


trimming values to [-3, 3] range for cim visualisation. See 'trim' arg in ?cimDiablo

dev.off()

quartz_off_screen 
                2

Taxa: strain, Design: null

KeepX: TR: 100 MG: 5 MB: 50

KeepX: TR: 100 MG: 50 MB: 5

png("../Results/MixOmic/Heatmap_Model_2.png", width = 20, height = 15, res = 300, units = "in")
cim_obj <- cimDiablo(
    diablo.mod,
    comp = 1,
    color.Y = pal[[gr]],
    size.legend = 1.5,
    margins = c(10, 4),
    color.blocks = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
    legend.position = "bottomleft",
    title = "Clustered heatmap of integrated data")


trimming values to [-3, 3] range for cim visualisation. See 'trim' arg in ?cimDiablo

dev.off()

quartz_off_screen 
                2

Taxa: strain, Design: full

KeepX: TR: 30 MG: 25 MB: 50

KeepX: TR: 10 MG: 25 MB: 5

png("../Results/MixOmic/Heatmap_Model_3.png", width = 20, height = 15, res = 300, units = "in")
cim_obj <- cimDiablo(
    diablo.mod,
    comp = 1,
    color.Y = pal[[gr]],
    size.legend = 1.5,
    margins = c(10, 4),
    color.blocks = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
    legend.position = "bottomleft",
    title = "Clustered heatmap of integrated data")


trimming values to [-3, 3] range for cim visualisation. See 'trim' arg in ?cimDiablo

dev.off()

quartz_off_screen 
                2

Taxa: strain, Design: null

KeepX: TR: 300 MG: 5 MB: 50

KeepX: TR: 350 MG: 5 MB: 25

png("../Results/MixOmic/Heatmap_Model_4.png", width = 20, height = 15, res = 300, units = "in")
cim_obj <- cimDiablo(
    diablo.mod,
    comp = 1,
    color.Y = pal[[gr]],
    size.legend = 1.5,
    margins = c(10, 4),
    color.blocks = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
    legend.position = "bottomleft",
    title = "Clustered heatmap of integrated data")


trimming values to [-3, 3] range for cim visualisation. See 'trim' arg in ?cimDiablo

dev.off()

quartz_off_screen 
                2

Taxa: strain, Design: null

KeepX: TR: 20 MG: 25 MB: 5

KeepX: TR: 30 MG: 25 MB: 5

png("../Results/MixOmic/Heatmap_Model_5.png", width = 20, height = 15, res = 300, units = "in")
cim_obj <- cimDiablo(
    diablo.mod,
    comp = 1,
    color.Y = pal[[gr]],
    size.legend = 1.5,
    margins = c(10, 4),
    color.blocks = c("#E69F00", "#CD3B8E", "#D55E00", "#CC79A7"),
    legend.position = "bottomleft",
    title = "Clustered heatmap of integrated data")


trimming values to [-3, 3] range for cim visualisation. See 'trim' arg in ?cimDiablo

dev.off()

quartz_off_screen 
                2

Code

sim_to_edges <- function(sim_matrix, threshold = NULL) {
  #colnames(sim_matrix) <- make.names(colnames(sim_matrix), unique = TRUE)
  #rownames(sim_matrix) <- make.names(rownames(sim_matrix), unique = TRUE)

  # removes spaces from feature names and replace with "_"
  colnames(sim_matrix) <- gsub("\\s+", "_", colnames(sim_matrix))
  rownames(sim_matrix) <- gsub("\\s+", "_", rownames(sim_matrix))

  TR <- colnames(X$TR) %>% gsub("\\s+", "_", .)
  MB <- colnames(X$MB) %>% gsub("\\s+", "_", .)
  MG <- colnames(X$MG) %>% gsub("\\s+", "_", .)

  # ---- edges between features ----
  feature_edges <- sim_matrix %>%
    as_tibble(rownames = "from") %>%
    #as.data.frame() %>%
    #rownames_to_column("from") %>%
    pivot_longer(-from, names_to = "to", values_to = "weight") %>%
    mutate(
      type_from = case_when(
        from %in% TR ~ "TR",
        from %in% MB ~ "MB",
        from %in% MG ~ "MG",
        from %in% FU ~ "FU"
      ),
      type_to = case_when(
        to %in% TR ~ "TR",
        to %in% MB ~ "MB",
        to %in% MG ~ "MG",
        from %in% FU ~ "FU"
      )
    ) %>%
    filter(from != to) %>%
    filter(type_from != type_to) %>% # only cross-block edges

    # remove duplicates in symmetric matrix:
    rowwise() %>%
    mutate(pair = paste(sort(c(from, to)), collapse = "_")) %>%
    distinct(pair, .keep_all = TRUE) %>%
    ungroup() %>%
    select(-pair)

  # Filter weights
  if (!is.null(threshold)) {
    feature_edges <- feature_edges %>% filter(weight >= threshold)
  }

  # ---- vertex table ----
  vertex_df <- tibble(name = colnames(sim_matrix)) %>%
    mutate(
      block = case_when(
        name %in% TR ~ "TR",
        name %in% MG ~ "MG",
        name %in% MB ~ "MB",
        from %in% FU ~ "FU"
      )
    )

  # ---- block hierarchy edges ----
  block_edges <- rbind(
    data.frame(from = "origin", to = unique(vertex_df$block)),
    vertex_df %>% rename(from = block, to = name)
  )

  return(list(
    edges = block_edges,
    feature_edges = feature_edges,
    vertices = vertex_df
  ))
}


# choose to display corelation values or similarity values
# the similarity values are the matrix returned from the circosplot() function
n <- f %>%
  map(., ~ head(.x, 10)) %>%
  unlist(.) %>%
  intersect(colnames(sim_m), .)

n <- f %>%
  map(., ~ head(.x, 10)) %>%
  unlist(.) %>%
  intersect(colnames(big_cor), .)
# sim_matrix <- sim_m

# sim_matrix <- sim_m[n,n]
sim_matrix <- big_cor[n, n]
glimpse(sim_matrix)
edges <- sim_to_edges(sim_matrix[n, n], 0.4)


# expand vertex table to include meta nodes: origin + block names
vertices <- edges$vertices %>%
  bind_rows(
    tibble(
      name = c("origin", unique(edges$vertices$block)),
      block = NA
    )
  )


#Let's add information concerning the label we are going to add: angle, horizontal adjustement and potential flip
#calculate the ANGLE of the labels
vertices$id <- NA
myleaves <- which(is.na(match(vertices$name, edges$from)))
nleaves <- length(myleaves)
vertices$id[myleaves] <- seq(1:nleaves)
vertices$angle <- 90 - 360 * vertices$id / nleaves

# calculate the alignment of labels: right or left
# If I am on the left part of the plot, my labels have currently an angle < -90
vertices$hjust <- ifelse(vertices$angle < -90, 1, 0)

# flip angle BY to make them readable
vertices$angle <- ifelse(
  vertices$angle < -90,
  vertices$angle + 180,
  vertices$angle
)


mygraph <- igraph::graph_from_data_frame(
  d = edges$edges, # hierarchical edges only
  vertices = vertices
)

# recalc leaf because adding meta nodes broke original leaf detection
#V(mygraph)$leaf <- degree(mygraph, mode = "out") == 0

# optional: add feature–feature similarity edges
# mygraph <- igraph::add_edges(
#   mygraph,
#   t(as.matrix(edges$feature_edges[, c("from", "to")]))
# )

# Convert feature_edges into index form
from <- match(edges$feature_edges$from, V(mygraph)$name)
to <- match(edges$feature_edges$to, V(mygraph)$name)
connect <- edges$feature_edges

p <- ggraph(mygraph, layout = "dendrogram", circular = TRUE) +

  # similarity bundles
  geom_conn_bundle(
    data = get_con(from = from, to = to, value = connect$weight),
    aes(alpha = value, color = value),
    tension = 0.6, # adjust bundling strength
    width = 0.8
  ) +

  # features at the periphery
  geom_node_point(
    aes(filter = leaf, x = x * 1.05, y = y * 1.05, color = block),
    size = 2
  ) +

  # optional text labels
  geom_node_text(aes(x = x * 1.25, y = y * 1.05, filter = leaf, label = name)) +
  # geom_node_text(aes(x = x, y=y, filter = leaf, label=name,
  #               angle = angle,
  #               hjust=hjust,
  #               colour=block), size=2, alpha=1) +
  coord_cartesian(clip = "off") +

  #scale_color_viridis_c(option = "B") +
  scale_alpha(range = c(0.1, 0.8)) +
  theme_void() +
  theme(
    #legend.position = "none",
    plot.margin = unit(c(0, 0, 0, 0), "cm")
  ) +
  expand_limits(x = c(-1.5, 1.5), y = c(-1.25, 1.25))

p