Multi-omics integration with MOFA2

Published

2 Apr 2026

Overview

This report presents a structured MOFA2 analysis integrating transcriptomics, metagenomics, and metabolomics data. The goal is to identify latent factors capturing shared and view-specific sources of variation, and to generate a set of publication-ready figures that can be combined into a multi‑panel figure.

Main outcomes: - Latent factor structure across multi‑omics layers - Variance explained per factor and per view - Associations between factors and clinical / technical covariates - Feature loadings driving key factors


Analysis workflow (step‑by‑step)

  1. Load required libraries and define plotting palettes
  2. Load omics data and sample metadata
  3. Format MOFA input
  4. Select samples for inclusion
  5. Unsupervised UMAPs
  6. Configure and run the MOFA model
  7. Load trained MOFA model
  8. Perform model diagnostics and quality checks
  9. Covariates- Factor correlation
  10. Annotate latent factors using group information
  11. Feature-level figures
  12. Unified UMAP (all views)

1. Libraries and global settings

Code
library(MOFA2)
library(tidyverse)
library(ggpubr)
library(GGally)
library(psych)
library(pheatmap)
library(cowplot)
library(RColorBrewer)
library(readxl)
library(ComplexHeatmap)
library(circlize)
library(patchwork)

# setwd("/Users/vilkal/work/Brolidens_work/Projects/Candida-omics/src")

Define consistent colour palettes to ensure visual coherence across all figures.

Code
select <- dplyr::select
map <- purrr::map
rename <- dplyr::rename

pal <- list(
  CST = c(
    "I" = "#148F77",
    "II" = "#6d9f06",
    "III" = "#A3E4D7",
    "IV-B" = "#E794C1",
    "IV-C" = "#b5cb8c",
    "V" = "#43BA8F"
  ),
  subCST = c(
    "#a5c97b",
    "#cc93cf",
    "#de9548",
    "#63d3b4",
    "#9e8dec",
    "#902267",
    '#FB5273',
    "#91c5de"
  ),
  RVVC = c("#EAEAEA", "#fd814e"),
  BV = c("#EAEAEA", "#fd814e"),
  NAC = c("#EAEAEA", "#fd814e"),
  ST = c("#EAEAEA", "#fd814e"),
  missing_page = c("#EAEAEA", "#fd814e"),
  Clin = c("#a5c97b", "#cc93cf", "#de9548", "#63d3b4", "#9e8dec", "#902267"),
  Clin_gr = c("#ffa998", '#FB5273', "#902267"),
  RVVC_AS = c("#ffa998", '#FB5273', "#902267"),
  RVVC_pos = c(
    "#A6CEE3",
    "#1F78B4",
    #"#EAEAEA",
    #"gray",
    "#e490c4ff",
    "#c12d88ff"
  ), # "#FB9A99", "#E31A1C", "#B2DF8A", "#609c5bff", "#FDBF6F", "#FF7F00",, "#CAB2D6", "#6a3d9a" "#e490c4ff", "#c12d88ff"
  trx_louvain_7 = c(
    "#de9548",
    "#cc93cf",
    "#a5c97b",
    "#9e8dec",
    "#902267",
    "#63d3b4",
    '#FB5273'
  ),
  metab_louvain = c(
    "#cc93cf",
    "#a5c97b",
    "#de9548",
    "#902267",
    "#9e8dec",
    "#63d3b4"
    #'#FB5273'
  ),
  microb_louvain = c(
    "#cc93cf",
    "#de9548",
    "#a5c97b",
    "#9e8dec",
    "#902267",
    "#63d3b4",
    '#FB5273'
  ),
  fun_louvain = c(
    "#a5c97b",
    "#cc93cf",
    "#de9548",
    "#63d3b4",
    "#9e8dec",
    "#902267",
    '#FB5273'
    #"#984EA3"
    #"#B3CDE3"
  ),
  integ_louvain = c(
    "#a5c97b",
    "#cc93cf",
    "#de9548",
    "#63d3b4",
    "#9e8dec",
    "#902267",
    '#FB5273',
    "#984EA3"
    #"#B3CDE3"
  ),
  "Wet_smear:_clue_cells_(y/n)" = c("#2266ac", "#91c5de"),
  hyphae = c("#f26386", "#fd814e", "#a4d984", "#fbbc52"),
  pos = c("#91c5de", "#2266ac"),
  fct1_clus = c("#33A02C", "#B2DF8A", "#FF7F00", "#FDBF6F"), #"#FDBF6F" "#FF7F00", "#CAB2D6", "#6A3D9A"
  "Clinical_score_(0-5)" = brewer.pal(6, name = "Reds")
)
# default ggplot2 pallet:
gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}
gg_color_hue(4)
[1] "#F8766D" "#7CAE00" "#00BFC4" "#C77CFF"

2. Data loading

Multi‑omics matrices and sample metadata are loaded from pre‑processed RDS/CSV files. These data have already undergone quality control and normalisation upstream.

Clin_path <- "/Users/vilkal/work/Brolidens_work/Projects/Candida-omics/data/Metadata_svamp_FINAL.xlsx"
comb_meta_path <- "../Results/Combined_gr_meta_data.csv"

# meta data
Clin_data <- read_xlsx(Clin_path, sheet = "Metadata", na = "na")
meta <- read_csv(comb_meta_path) %>%
  #rename(sample = "svamp_ID") %>%
  mutate(across(2:27, ~ factor(.x)))
top_hvg <- read_csv("../Results/Tissue_QC/HVG.csv")


TRX_path <- "../Results/MixOmic/Transcriptomics_batch_filt.csv"
MG_path <- "../Results/MixOmic/Metageonome_vagswab_CLR_transformed.RDS"
MB_path <- "../Results/MixOmic/Metabolom_Normalized.csv"
FM_path <- "../Tidy_data/Metagenomics/Functional_annotation_CLR_filtered.RDS"

# expression data
TRX_batch <- read.csv(TRX_path, row.names = 1)
MG_matrix <- readRDS(MG_path)
MB_matrix <- read.csv(MB_path, row.names = 1)
FM_matrix <- readRDS(FM_path)

3. Prepare MOFA input

4.1 Assemble omics views

MOFA describes each omic dataset as a “view”. One of the possible inputs to mofa is a list of matrixes for each view. Each omics layer is formatted as a features × samples matrix. Sample ordering needs to be the same. The top 5000 most higly variable genes identified in the QC_Transcriptomics.qmd script is used to filter the transcriptomics data.

# remove long strain id, replace with int
MG_matrix <- MG_matrix[["clr_data"]][["strain"]]
new_names <- sub("\\s+\\S+$", "", colnames(MG_matrix)) # remove strain id
new_names <- make.unique(new_names, sep = " ") # Make names unique
colnames(MG_matrix) <- new_names # Assign new names

# select 5000 most higly variable genes
TRX_HVG <- TRX_batch[top_hvg$gene[1:5000], ]

views <- list(
  Transcriptomics = as.matrix(TRX_HVG),
  Metabolomics = as.matrix(MB_matrix),
  Metagenomics = unclass(t(MG_matrix)),
  Functional = unclass(FM_matrix$path_lvl)
)

4.2 Convert matrices to long format

The matrix list input is only valid if you have matching samples for all “views”. If you wish to include samples that are not “complete” in this sense the long format version is another input alternative to create a MOFA model

omics_to_long <- function(mat, view_name) {
  mat %>%
    as.data.frame() %>%
    rownames_to_column("feature") %>%
    pivot_longer(
      cols = -feature,
      names_to = "sample",
      values_to = "value"
    ) %>%
    mutate(view = view_name)
}

mofa_df <- imap_dfr(views, ~ omics_to_long(.x, .y))

4.3 Sample criteria df

The tibble called m shows what samples excist across views for each study participant

# select samples to include
m <- mofa_df %>%
  distinct(sample, view) %>%
  mutate(present = TRUE) %>%
  pivot_wider(
    id_cols = sample,
    names_from = view,
    values_from = present,
    values_fill = FALSE
  )

m <- m %>%
  mutate(
    n_views = rowSums(across(-c("sample", "Functional"))),
    at_least_two = n_views >= 2,
    all_BL = !(grepl("_3M|_1W|_6M", .$sample)),
    all_BL = ifelse(grepl("S84_BL", sample), FALSE, all_BL),
    matched = n_views == 3
  )

write_csv(m, "../Results/Sample_omic_table.csv")
# m <- write_csv("../Results/Sample_omic_table.csv")

knitr::kable(head(m))
sample Transcriptomics Metabolomics Metagenomics Functional n_views at_least_two all_BL matched
S01 TRUE TRUE TRUE TRUE 3 TRUE TRUE TRUE
S02 TRUE TRUE TRUE TRUE 3 TRUE TRUE TRUE
S03 TRUE TRUE TRUE TRUE 3 TRUE TRUE TRUE
S04 TRUE TRUE TRUE TRUE 3 TRUE TRUE TRUE
S05 TRUE TRUE TRUE TRUE 3 TRUE TRUE TRUE
S06 TRUE TRUE TRUE TRUE 3 TRUE TRUE TRUE

Based on the various criteria we filter out the samples we do not wish to include from our long format table

mofa_df <- mofa_df %>%
  filter(sample %in% m$sample[m$at_least_two & m$all_BL])

meta_ <- meta %>% filter(sample %in% m$sample[m$at_least_two & m$all_BL])

4. Sample heatmap overview

With our samples selected we can now plot a overviw heatmap showing all our included samples, which omic views they are represented in and how they are distributed by some of our groups of intrest NB! S10 and S15 was excluded from metagenome analysis due to low total counts before and after taxa filtering respectivelyl Thats why I have given them a lighter pink colour

Code
heatmap_prepp <- function(keep_s, vars, arrange_by = "Clinical_score_(0-5)") {
  ### ------------------------------------------------------------
  ### 1. Select symptom columns and prepare binary clinical data
  ### ------------------------------------------------------------

  # Extract only samples of interest and symptom score columns
  # Convert TRUE/FALSE columns to numeric 0/1 and clean column names
  df_bin <- m %>%
    #filter(sample %in% m$sample[m$at_least_two & m$all_BL]) %>%
    filter(sample %in% m$sample[keep_s]) %>%

    left_join(
      .,
      select(meta, sample, RVVC_AS, RVVC_pos, "Clinical_score_(0-5)"),
      by = "sample"
    ) %>%
    mutate(
      RVVC_AS = factor(RVVC_AS, levels = c("RVVC", "Control", "RVVC_AS"))
    ) %>%
    mutate(
      RVVC_pos = factor(
        RVVC_pos,
        levels = c(
          "RVVC_pos",
          "RVVC_neg",
          "Control_pos",
          "Control_neg"
          #"NA_pos",
          #"NA_neg"
        )
      )
    ) %>%
    arrange(desc(n_views)) %>%
    arrange(RVVC_pos) %>%
    arrange(desc(`Clinical_score_(0-5)`)) %>%
    arrange(
      desc(.data[[arrange_by]])
    ) %>%
    select(1:5) %>%
    mutate(across(where(is.logical), as.double)) %>%
    mutate(
      Metagenomics = ifelse(grepl("S10|S15", sample), 0.5, Metagenomics),
      Functional = ifelse(grepl("S10|S15", sample), 0.5, Functional)
    ) %>%
    column_to_rownames("sample") # convert sample ID to rownames

  # Convert to numeric matrix for ComplexHeatmap
  mat <- as.matrix(df_bin)

  # Determine row annotation height scaling factor
  my_h <<- 0.1 * nrow(mat)

  ### ------------------------------------------------------------
  ### 2. Prepare annotation dataframe (metadata)
  ### ------------------------------------------------------------

  annot_col <<- meta %>%
    select(sample, any_of(vars)) %>% # select annotation variables
    mutate(across(all_of(vars), ~ factor(.x))) %>%
    # {. ->> all_annot} %>%
    filter(sample %in% rownames(df_bin)) %>% # keep same samples as heatmap matrix
    arrange(match(sample, rownames(df_bin))) %>%

    #mutate(across(1:length(vars) + 1, ~ factor(.x))) %>% # ensure annotations are factors
    column_to_rownames("sample") # convert sample ID to rownames

  # Quick check: ensure annotation rows match heatmap rows
  dim(mat)
  dim(annot_col)

  setdiff(rownames(mat), rownames(annot_col))

  ### ------------------------------------------------------------
  ### 4. Define row annotations for heatmap (ComplexHeatmap)
  ### ------------------------------------------------------------

  annot <- HeatmapAnnotation(
    df = annot_col[, c(vars)],
    which = c("row"),
    #which = c("column"),
    # Clin_gr = annot_col$Clin_gr, # binned clinical grade
    # "Clinical_score_(0-5)" = annot_col$`Clinical_score_(0-5)`, # raw clinical score
    # RVVC_AS = annot_col$RVVC_AS, # sample group
    # BV = annot_col$BV, # sample group
    # RVVC = annot_col$RVVC, # sample group
    # hyphae = annot_col$hyphae, # sample group

    show_annotation_name = TRUE, # show labels
    annotation_name_gp = gpar(fontsize = 8), # label font size

    # Legend formatting
    annotation_legend_param = list(
      grid_height = unit(.1, "mm"),
      grid_width = unit(2, "mm"),
      title = "",
      labels_gp = gpar(fontsize = 7),
      title_gp = gpar(fontsize = 8)
    ),

    simple_anno_size = unit(.3, "cm"), # annotation row height

    # Colors for each annotation variable
    na_col = "white",
    col = pal[vars] %>% imap(., ~ set_names(.x, levels(annot_col[[.y]])))
  )
  ### ------------------------------------------------------------
  ### Create heatmap
  ### ------------------------------------------------------------
  H <- Heatmap(
    mat,
    width = unit(ncol(mat) * 5, "mm"),
    height = unit(nrow(mat) * 2.2, "mm"),
    name = "Symptom Present",
    col = col_fun,

    # vertical orientation:
    right_annotation = annot,
    cluster_rows = FALSE,
    row_order = rownames(mat),

    row_names_side = "right",

    column_names_rot = 90,
    row_names_gp = gpar(fontsize = 7),
    heatmap_legend_param = list(
      at = c(0, 1),
      labels = c("No", "Yes")
    ),
    #   cell_fun = function(j, i, x, y, width, height, fill) {
    #   grid.rect(x, y, width, height, gp = gpar(col = "grey70", lwd = 0.5, fill = NA))
    # }
    layer_fun = function(j, i, x, y, width, height, fill) {
      # Draw cell borders without overwriting fill
      grid.rect(
        x = x,
        y = y,
        width = width,
        height = height,
        gp = gpar(col = "grey70", lwd = 0.4, fill = NA)
      )
    }
  )
  #return(list(mat = mat, annot = annot, h = my_h))
  return(H)
}
Code
### ------------------------------------------------------------
### Color function for the symptom heatmap
### ------------------------------------------------------------
col_fun <- colorRamp2(
  c(0, 0.5, 1),
  c("white", "#EFAEC2", "#e05e85") #"#e05e8584"
)

### ------------------------------------------------------------
### Prepare annotation df and matrix
### ------------------------------------------------------------

vars <- c(
  "Clinical_score_(0-5)",
  "Clin_gr",
  "RVVC",
  #"RVVC_AS",
  "RVVC_pos",
  "BV",
  "NAC",
  "missing_page",
  "ST"
)
c("S84_BL|S38|S44|S60|S20|K30")
[1] "S84_BL|S38|S44|S60|S20|K30"
Code
c("S84_BL", "S38", "S44", "S60", "S20", "S30")
[1] "S84_BL" "S38"    "S44"    "S60"    "S20"    "S30"   
Code
keep_s <- rep(TRUE, nrow(m)) # 152 every samples incl. follow up
keep_s <- m$all_BL # 91 samples
keep_s <- m$matched # 70 samples

keep_s <- grepl("S84_BL|S38|S44|S60|S20|S30|S16|S14", m$sample) # exluded
keep_s <- m$at_least_two & m$all_BL # 89 samples

H <- heatmap_prepp(keep_s, vars)

H <- heatmap_prepp(
  keep_s,
  c("RVVC_pos", "RVVC_AS", "Clinical_score_(0-5)"),
  arrange_by = "RVVC_pos"
)

H <- heatmap_prepp(
  keep_s,
  c("pos", "RVVC_pos", "Clinical_score_(0-5)"),
  arrange_by = "RVVC_AS"
)
# quartz(width = 6, height = 6, dpi = 150)
# draw(H)
Code
# !expr my_h

# Heatmap global options:
# ht_opt$COLUMN_ANNO_PADDING = unit(.05, "cm")
# ht_opt$HEATMAP_LEGEND_PADDING = unit(-0, "cm") #unit(c(0, 0, 0, -1), "cm") #b,l,t,r
# ht_opt$TITLE_PADDING = unit(.05, "cm")
# ht_opt$DIMNAME_PADDING = unit(.05, "cm")

# ht_opt(LEGEND_GAP = unit(18, "mm"))

H_g <- draw(
  H,
  heatmap_legend_side = "bottom",
  merge_legend = F,
  background = NA,
  show_annotation_legend = FALSE,
  legend_gap = unit(5, "mm"),
  padding = unit(c(3, 9, -2, 3), "mm") # outer padding
) # b,l,t,r

Code
################
# SAVE RESULTS #
################
# quartz(width = 6, height = 6, dpi = 150)
png(
  file = paste0("../Results/Sample_HT/Excluded_samples.png"),
  units = "in",
  res = 600,
  #width = 8.5, height = 10 # Epi
  width = 8.5,
  height = 4 # 11 # my_h # 17 (all samples)
)
H_g
#draw(H)
dev.off()
quartz_off_screen 
                2 

5. Unsupervised UMAPs

We plot unsupervised umaps coloured by different groups of intrest in order to see how they cluster across views.

!NB should think about doing with and without HVGs only

# filter samples from views for UMAP ploting
m_ <- m %>%
  filter(sample %in% .$sample[m$at_least_two & m$all_BL])

v <- views %>%
  imap(., ~ .x[, m_$sample[m_[[.y]]]])

This first code is taken from Paulos analysis of the metabolomics data. It gives more control over the knn clustering by perfoming it in a seperate step and then supplying the knn to the umap function. The other alternative bellow is just supplying the data to umap, which then runs the PCA and knn internaly with default setings.

Code
#### UMAP analysis ####
set.seed(1)
UMAP_df <- v %>%
  map(., ~ t(.x)) %>%
  # PCA
  map(., ~ prcomp(.x, scale = T, center = T)) %>%
  # KNN graph
  map(
    .,
    ~ RcppHNSW::hnsw_knn(as.matrix(.x$x[, 1:10]), k = 10, distance = "cosine")
  ) %>%

  # UMAP
  map(
    .,
    ~ uwot::umap(
      X = NULL,
      nn_method = .x,
      n_components = 2,
      ret_extra = c("model", "fgraph"),
      verbose = T,
      min_dist = 0.1,
      spread = 0.3,
      approx_pow = TRUE,
      repulsion_strength = .2,
      negative_sample_rate = 30,
      n_epochs = 200,
      n_threads = 8
    )$embedding
  ) %>%
  imap(
    .,
    ~ tibble(
      "UMAP 1" = .x[, 1],
      "UMAP 2" = .x[, 2],
      "ID" = colnames(v[[.y]])
    )
  ) %>%
  map(., ~ left_join(., annot_col, by = c("ID" = "sample")))
# attempt at getting the same groups as Paulo, but failed, think I need his list with the group result
Paulo_clus <- c(
  Metab = "/Users/vilkal/Downloads/resulsts_metabolomics/metab_knn.rds",
  Microbe = "/Users/vilkal/Downloads/resulsts_metabolomics/microb_knn.rds",
  Integ = "/Users/vilkal/Downloads/resulsts_metabolomics/integ_knn.rds"
)

res <- c(0.8, 0.6, 1.3)

set.seed(42)
clus <- map(Paulo_clus, ~ readRDS(.x)) %>%
  # graph:
  map(
    .,
    ~ igraph::graph_from_data_frame(
      data.frame(
        from = rep(1:nrow(.x[["idx"]]), ncol(.x[["idx"]])),
        to = c(.x[["idx"]]),
        weight = c(.x[["dist"]])
      ),
      directed = F
    )
  ) %>%
  # clustering:
  map2(., res, ~ igraph::cluster_louvain(.x, resolution = .y, weights = NA))

map(clus, ~ table(.x$membership))
# it does not create the same groups as the seed are ignored as fare as I can see
cluster_louvain_list <- function(
  mat_list,
  k = 10,
  n_pcs = 10,
  resolution = 0.6,
  seed = 42
) {
  require(RcppHNSW)
  require(Matrix)
  require(igraph)
  require(tibble)

  cluster_results <- purrr::imap(
    mat_list,
    ~ {
      mg <- .x # matrix
      nm <- .y # name of the matrix
      idx <- which(names(mat_list) == nm)

      # PCA
      PC <- prcomp(t(mg), scale. = TRUE, center = TRUE)
      x <- PC$x[, seq_len(min(n_pcs, ncol(PC$x)))]

      # KNN
      knn <- RcppHNSW::hnsw_knn(as.matrix(x), k = k, distance = "cosine")

      # Graph
      g <- igraph::graph_from_data_frame(
        data.frame(
          from = rep(1:nrow(knn$idx), ncol(knn$idx)),
          to = c(knn$idx),
          weight = c(knn$dist)
        ),
        directed = FALSE
      )

      # Clustering
      set.seed(seed)
      cl <- igraph::cluster_louvain(
        g,
        resolution = resolution[idx],
        weights = NA
      )
      #cl <- igraph::cluster_leiden(g, resolution = resolution)

      out <- cl$membership
      names(out) <- rownames(x)

      out
    }
  )

  # Combine into one table
  cluster_df <- cluster_results %>%
    imap(
      ~ tibble(
        sample = names(.x),
        cluster = .x,
        matrix = .y
      )
    ) %>%
    bind_rows() %>%
    pivot_wider(
      names_from = matrix,
      values_from = cluster
    ) %>%
    mutate(across(-sample, as.factor))

  return(cluster_df)
}
# res <- c(1.3, 1.2, 1.3, 0.9)
res <- c(1.3, 1.2, 1.3, 1)
n <- c("trx_louvain_7", "metab_louvain", "microb_louvain", "fun_louvain")
result <- v %>%
  set_names(., n) %>%
  cluster_louvain_list(mat_list = ., resolution = res)

map(set_names(n), ~ table(result[[.x]]))
$trx_louvain_7

 1  2  3  4  5  6 
15 11  9 12 15 11 

$metab_louvain

 1  2  3  4  5  6 
18 19 14 12 10 13 

$microb_louvain

 1  2  3  4  5 
20 13 18 15 21 

$fun_louvain

 1  2  3  4  5  6  7 
14 13 10 14 12 19  6 
# head(result)

meta <- meta %>%
  select(-any_of(n)) %>%
  left_join(., result, by = "sample") %>%
  select(1:15, any_of(n), everything())
# write_csv(meta, "../Results/Combined_gr_meta_data.csv")
Code
UMAP_fun <- function(
  umap_df,
  group_var,
  txt_var = NULL,
  shape_gr = NULL,
  shape = NULL,
  title = ""
) {
  if (is.null(shape_gr)) {
    point <- geom_point(size = 2, alpha = 1, shape = 21)
  } else {
    point <- geom_point(
      size = 2,
      alpha = 1,
      aes(shape = if (!is.null(shape_gr)) .data[[shape_gr]] else NULL)
    )
  }

  p <- ggplot(
    umap_df,
    aes(
      x = `UMAP 1`,
      y = `UMAP 2`,
      #shape = RVVC_AS,
      fill = .data[[group_var]],
      color = .data[[group_var]]
    )
  ) +
    point +
    #geom_jitter( shape=21, size=3, color="white", width=.5, height=.5) +  # Tassos used jitter

    scale_fill_manual(
      values = pal[[group_var]],
      na.value = "transparent",
      name = ""
      #aesthetics = c("colour", "fill")
    ) +
    guides(fill = "none", nrow = 2, byrow = TRUE) +
    scale_colour_manual(
      values = pal[[group_var]],
      na.value = "gray",
      name = ""
    ) +
    {
      if (!is.null(shape_gr) && !is.null(shape)) {
        scale_shape_manual(values = shape, name = "")
      }
    } +
    # scale_shape_manual(values = c(21, 5, 4), name = "" ) + #specify individual shapes+
    {
      if (!is.null(txt_var)) {
        geom_text(
          data = umap_df,
          aes(x = `UMAP 1`, y = `UMAP 2`, label = .data[[txt_var]]),
          size = 2,
          hjust = 0,
          nudge_x = 0.07,
          color = "gray51"
        )
      }
    } +
    ggtitle(title) +
    theme_bw(base_size = 14) +
    theme(
      # point = element_point(shape = 21),
      line = element_blank(),
      axis.ticks = element_line(color = "black"),
      aspect.ratio = 1,
      axis.title = element_blank()
    )
  return(p)
}


#### UMAP plotting by groups ####

# save plot to file
#ggsave(paste0("./Figures/09/", "Bulk_UMAP.png"), p)
Code
#### group information ####
annot_col <- meta %>%
  #filter(sample %in% .$sample[m$at_least_two & m$all_BL]) %>%
  # select(-`Age (years)`) %>%
  select(
    1:22,
    `Clinical_score_(0-5)`
  ) %>%
  mutate(across(2:ncol(.), ~ factor(.x))) %>%
  mutate(
    txt_clin = ifelse(
      grepl("0", .$`Clinical_score_(0-5)`),
      NA,
      as.character(.$`Clinical_score_(0-5)`)
    )
  ) %>%
  mutate(txt_id = ifelse(RVVC == "RVVC", .$sample, NA))

#### UMAP analysis ####
set.seed(1)

umap_dfs <- v %>%
  map(., ~ t(.x)) %>%
  map(
    .,
    ~ uwot::umap(
      .x,
      #seed = 123,
      seed = 463,
      n_neighbors = 15,
      #metric = "cosine",
      init = "spectral",
      scale = T
    )
  ) %>%
  map(
    .,
    ~ tibble(
      "UMAP 1" = .x[, 1],
      "UMAP 2" = .x[, 2],
      "ID" = rownames(.x)
    )
  ) %>%
  map(., ~ left_join(., annot_col, by = c("ID" = "sample")))


meta_clus <- colnames(annot_col)
color_vars <- c(
  "pos",
  "trx_louvain_7",
  "metab_louvain",
  "microb_louvain",
  "fun_louvain",
  #"integ_louvain",
  "Clinical_score_(0-5)",
  "RVVC_pos",
  "CST"
) %>%
  set_names(.)

#### Combine and save plots ####
make_umap_panel <- function(var, umap_dfs, outdir = "../Results/UMAP2/") {
  # create plots for each dataset
  p_list <- imap(
    umap_dfs,
    ~ UMAP_fun(.x, var, "txt_clin", title = .y)
  )

  # combine with shared legend
  p_combined <- patchwork::wrap_plots(p_list) +
    patchwork::plot_layout(nrow = 1, guides = "collect") &
    theme(legend.position = "right")

  # safe filename
  fname <- gsub("[^A-Za-z0-9_]", "_", var)

  # save
  ggsave(
    filename = paste0(outdir, "UMAP_", fname, ".png"),
    plot = p_combined,
    width = 14,
    height = 5,
    dpi = 300
  )

  return(p_combined)
}

plots <- map(color_vars, ~ make_umap_panel(.x, umap_dfs))
plots
$pos


$trx_louvain_7


$metab_louvain


$microb_louvain


$fun_louvain


$`Clinical_score_(0-5)`


$RVVC_pos


$CST

Code
# quartz(width = 6, height = 6, dpi = 60)
p_ <- patchwork::wrap_plots(plots, ncol = 1) +
  patchwork::plot_layout(guides = "collect", axis_titles = "collect") &
  theme(legend.position = "right", title = element_blank())

ggsave(
  paste0("../Results/Factor_plots2/UMAP_louvain", ".pdf"),
  p_,
  width = 15,
  height = 25
)
# use shapes:
# UMAP_fun(umap_dfs$Metabolomics, "trx_louvain_7", "txt_clin", shape_gr = "pos", shape = c(21, 24))
# no shapes
# UMAP_fun(umap_dfs$Metabolomics, "trx_louvain_7", "BV")

6. MOFA model construction

A MOFA object is created and configured with default data and training options. The number of latent factors is set a priori and can be tuned depending on variance explained.

Data options

Important arguments
  • scale_groups
    Scale groups to the same total variance?
    Default: FALSE

  • scale_views
    Scale views to the same total variance?
    Default: FALSE

  • views
    Names of the views to include in the model.

  • groups
    Names of the sample groups.

Model options

Important arguments
  • num_factors
    Number of latent factors.

  • likelihoods
    Likelihood per view.
    Options: "gaussian", "poisson", "bernoulli"
    By default, likelihoods are inferred automatically.

  • spikeslab_factors
    Use spike-and-slab sparsity prior on the factors?
    Default: FALSE

  • spikeslab_weights
    Use spike-and-slab sparsity prior on the weights?
    Default: TRUE

  • ard_factors
    Use Automatic Relevance Determination (ARD) prior on the factors?
    Default: TRUE when using multiple groups.

  • ard_weights
    Use Automatic Relevance Determination (ARD) prior on the weights?
    Default: TRUE when using multiple views.

Training options

Important arguments
  • maxiter
    Maximum number of training iterations.
    Default: 1000

  • convergence_mode
    Convergence speed of the algorithm.
    Options: "fast", "medium" (default), "slow"
    For exploratory analyses, "fast" is usually sufficient.

  • seed
    Random seed for reproducibility.

We use default options, however it is recomended to run the model several times with different seeds. In one of the papers published by MOFAS authors, they ran the model ~24 times, before picking the model with the highest elbo score.

MOFAobject <- create_mofa_from_df(mofa_df)

data_opts <- get_default_data_options(MOFAobject)
model_opts <- get_default_model_options(MOFAobject)
train_opts <- get_default_training_options(MOFAobject)

#model_opts$num_factors <- 10
train_opts$convergence_mode <- "medium"
train_opts$seed <- 1234

The functions bellow runs the model with different seeds and saves the model and a table with all seeds and corresponding elbo scores as output

Train MOFA model
seeds <- c(3, 100, 50)

run_mofa_with_seed <- function(seed) {
  # Make a local copy (important!)
  train_opts$seed <- seed

  MOFAobject <- prepare_mofa(
    MOFAobject,
    data_options = data_opts,
    model_options = model_opts,
    training_options = train_opts
  )

  MOFAmodel <- run_mofa(
    MOFAobject,
    outfile = paste0("../Results/MOFA/MOFA_allBL_noS84_seed_", seed, ".hdf5"),
    use_basilisk = TRUE
  )

  elbo <- max(get_elbo(MOFAmodel))

  # Return a single-row data frame
  tibble(
    seed = seed,
    elbo = elbo
  )
}

# existing training code stays the same
elbo_table <- map_dfr(seeds, ~ run_mofa_with_seed(.x))
write_csv(elbo_table, "../Results/MOFA/elbo_table_noS84.csv")
best_seed <- elbo_table$seed[which.max(elbo_table$elbo)]

MOFAobject without transcriptomics batch correction

X <- readRDS("../Results/MixOmic/X_object.RDS")
views <- list(
  Transcriptomics = t(X$strain$TR),
  Metagenomics = t(X$strain$MG),
  Metabolomics = t(X$species$MB)
)

MOFAobject <- create_mofa(views)

data_opts <- get_default_data_options(MOFAobject)
model_opts <- get_default_model_options(MOFAobject)
model_opts$num_factors <- 10

train_opts <- get_default_training_options(MOFAobject)
train_opts$convergence_mode <- "medium"
train_opts$seed <- 1234

MOFAobject <- prepare_mofa(
  MOFAobject,
  data_options = data_opts,
  model_options = model_opts,
  training_options = train_opts
)
MOFAmodel <- run_mofa(
  MOFAobject,
  outfile = "../Results/MOFA/MOFA_noBatch_strain.hdf5",
  use_basilisk = TRUE
)

7. Load trained MOFA model

We load the model with the highest elbo score

elbo_table <- read_csv("../Results/MOFA/elbo_table.csv")
elbo_table <- read_csv("../Results/MOFA/elbo_table_noS84.csv")
knitr::kable(elbo_table, )
seed elbo
3 -177584.7
100 -177577.2
50 -177588.7
best_seed <- elbo_table$seed[which.max(abs(elbo_table$elbo))]

MOFAmodel <- load_model(
  #"../Results/MOFA/MOFA_matched_seed_50.hdf5"
  # "../Results/MOFA/MOFA_allBL_seed_"
  paste0("../Results/MOFA/MOFA_allBL_noS84_seed_", best_seed, ".hdf5")
)

# batch corrected tr, ~17 000 genes, strain lvl, no followup samples
"../Results/MOFA/MOFA_matched_noM3_strain.hdf5"
[1] "../Results/MOFA/MOFA_matched_noM3_strain.hdf5"
# batch corrected tr, ~17 000 genes, strain lvl + 7 M3 follow up samples
"../Results/MOFA/MOFA_w73M_strain.hdf5"
[1] "../Results/MOFA/MOFA_w73M_strain.hdf5"
# same as above + 9 extra RVVC samples without matching trx
"../Results/MOFA/MOFA_w9RVVC_strain.hdf5"
[1] "../Results/MOFA/MOFA_w9RVVC_strain.hdf5"
# same as above + all 154 samples
"../Results/MOFA/MOFA_all_154_samples_strain.hdf5"
[1] "../Results/MOFA/MOFA_all_154_samples_strain.hdf5"
# Extract factors, and join metadata
clusters <- list()
clusters$fct1_clus <- cluster_samples(MOFAmodel, k = 4, factors = 1)$cluster
clusters$fct2_clus <- cluster_samples(MOFAmodel, k = 4, factors = 2)$cluster
clusters$fct4_clus <- cluster_samples(MOFAmodel, k = 4, factors = 4)$cluster

clust <- bind_cols(sample = names(clusters[[1]]), clusters)

meta <- meta %>%
  dplyr::left_join(., clust, by = "sample") %>%
  dplyr::mutate(across(all_of(names(clusters)), ~ factor(.x)))

# add meta data to model
samples_metadata(MOFAmodel) <- samples_metadata(MOFAmodel)[, c(
  "sample",
  "group"
)] %>%
  left_join(meta, by = c("sample"))

8. Model diagnostics

# inspect data distribution
ggdensity(mofa_df, x = "value", fill = "gray70") +
  facet_wrap(~view, nrow = 3, scales = "free") # + xlim(0.5,)

We first assess the overall quality of the model by inspecting variance explained and factor correlations.

# inspect the model:
ovw <- plot_data_overview(MOFAmodel) +
  theme(plot.margin = unit(c(1, 0, 1, 1), "cm")) + #  #EC645A, #CD3B8E, #994E95
  scale_fill_manual(
    values = c("#CC79A7", "#D55E00", "#CD3B8E", "gray", "#E69F00"),
    na.value = "gray"
  )
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
ovw

MOFAmodel
Trained MOFA with the following characteristics: 
 Number of views: 4 
 Views names: Functional Metabolomics Metagenomics Transcriptomics 
 Number of features (per view): 114 90 244 5000 
 Number of groups: 1 
 Groups names: single_group 
 Number of samples (per group): 88 
 Number of factors: 15 
p1 <- plot_variance_explained(MOFAmodel, plot_total = TRUE)[[2]]
p2 <- plot_variance_explained(MOFAmodel, factors = 1:10)

#+
#theme(axis.text.x = element_text(angle = 45, hjust = 0.8, vjust = 0.9))
# plot_grid(plotlist = list(p1, p2), ncol = 2)

p1$layers[[1]]$aes_params$fill <- "gray" # remove fixed fill
p1$layers[[1]]$aes_params$colour <- "#717272" # remove fixed fill

p1 <- p1 +
  coord_flip() +
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.line.y = element_blank(),
    plot.margin = unit(c(0, 0.1, 0, -0.5), "cm")
  )
p2 <- p2 +
  coord_flip() +
  theme(
    legend.position = c(0.5, 1.09), # 👈 just above panel, tight
    legend.direction = "horizontal",
    legend.text = element_text(size = 6),
    legend.title = element_text(size = 6),
    legend.text.position = "top",
    legend.margin = margin(0, 0, -0, 0), # moves the legend box
    legend.box.margin = margin(-0, 0, -0, -0)
  ) + # moves the legend) +
  guides(fill = guide_colorbar(title = "", barwidth = 12, barheight = 0.3)) +
  theme(axis.text.y = element_blank()) +
  theme(plot.margin = unit(c(0, 0.5, 0, 0), "cm")) +
  scale_y_discrete(labels = 1:10) +
  scale_fill_gradientn(
    colors = c("gray97", "darkblue"),
    labels = scales::label_number(suffix = "%") # 👈 THIS is the key
  )

# quartz(width = 9.5, height = 3)
plot_grid(plotlist = list(ovw, p2, p1), nrow = 1, align = c("h"))

# p$layers[[1]]$aes_params$alpha
ve_df <- get_variance_explained(MOFAmodel)$r2_per_factor$single_group %>%
  as.data.frame() %>%
  rownames_to_column("Factor") %>%
  pivot_longer(
    -Factor,
    names_to = "view",
    values_to = "variance_explained"
  ) %>%
  mutate(Factor = factor(.$Factor, levels = paste0("Factor", 1:15)))

d <- ve_df %>%
  filter(grepl("^Factor1$", .$Factor)) %>%
  #filter(grepl("^Factor[1-9]$", .$Factor)) %>%
  summarize(sum = sum(variance_explained), .by = c("Factor", "view")) %>%
  filter(sum >= 2)

ggplot(ve_df, aes(x = Factor, y = variance_explained, fill = view)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(
    values = c("#CC79A7", "#D55E00", "#CF1C90", "#E69F00"),
    na.value = "gray"
  ) +
  labs(
    x = "Factor",
    y = "Variance explained (%)",
    fill = "view"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

Correlation between factors

A good sanity check is to verify that the Factors are largely uncorrelated. In MOFA there are no orthogonality constraints such as in Principal Component Analysis, but if there is a lot of correlation between Factors this suggests a poor model fit. Reasons could be that you used too many factors or perhaps the normalisation is not adequate.

plot_factor_cor(MOFAmodel)

MOFAbatch <- load_model(
  "../Results/MOFA/MOFA_noBatch_strain.hdf5"
)
Warning in .quality_control(object, verbose = verbose): Factor(s) 1 are strongly correlated with the total number of expressed features for at least one of your omics. Such factors appear when there are differences in the total 'levels' between your samples, *sometimes* because of poor normalisation in the preprocessing steps.
samples_metadata(MOFAbatch) <- samples_metadata(MOFAbatch) %>%
  left_join(meta, by = "sample")

plot_factors(
  MOFAbatch,
  factors = "all",
  color_by = "batch"
)

When running MOFA the first time, the internal quality control warned that at least one of the factors was strongly corelated with the total number of expressed features in one of the views. This is typically a result of batch effect. As seen in the plot above Factor 1 perfectly seperates the two batches of the transcriptomics data. Thus batch correction was performe in the upstream QC_Transcriptomics.qmd script.

p <- plot_factors(
  MOFAmodel,
  factors = "all",
  color_by = "Clinical_score_(0-5)"
) +
  scale_fill_manual(values = pal$`Clinical_score_(0-5)`, na.value = "white")

p

p <- plot_factor(
  MOFAmodel,
  factors = "all",
  color_by = "RVVC_pos",
  group_by = "RVVC_pos",
) +
  scale_fill_manual(values = pal$RVVC_pos, na.value = "white")

p

plot_factors(
  MOFAmodel,
  factors = c(1, 3),
  color_by = "Clin_gr"
) +
  stat_ellipse(
    #aes(color = RVVC_AS, fill = RVVC_AS),
    geom = "polygon",
    alpha = 0.25
  ) +
  scale_fill_manual(values = pal$Clin_gr, na.value = "white")

library(ggExtra)
p <- get_factors(MOFAmodel, factors = c(1, 3), as.data.frame = TRUE) %>%
  as_tibble() %>%
  select(-group) %>%
  pivot_wider(names_from = "factor") %>%
  left_join(., samples_metadata(MOFAmodel), by = "sample") %>%
  ggplot(., aes(x = Factor1, y = Factor3, colour = RVVC_pos)) +
  geom_point(size = 2, alpha = 0.8, show.legend = F) +
  scale_colour_manual(values = pal$RVVC_pos) +
  coord_cartesian(expand = T) +
  # scale_x_continuous(expand = expansion(mult = 0.02)) +
  # scale_y_continuous(expand = expansion(mult = 0.02)) +
  theme_classic()

ggMarginal(
  p,
  type = "density",
  groupColour = TRUE,
  trim = FALSE,
  groupFill = TRUE
) +
  stat_ellipse(
    #aes(color = RVVC_AS, fill = RVVC_AS),
    geom = "polygon",
    alpha = 0.25
  )
NULL
plot_mofa_factor_vs_variable <- function(
  df,
  factor = 1,
  y_var,
  palette = pal[[y_var]],
  jitter_width = 0.1,
  add_marginal = TRUE,
  marginal_type = "density"
) {
  # jitter object
  jitterer <- position_jitter(width = jitter_width, seed = 123)

  # Main ggplot
  p <- ggplot(
    df,
    aes(
      x = .data[[paste0("Factor", factor)]],
      y = .data[[y_var]],
      colour = .data[[y_var]]
    )
  ) +
    geom_point(
      size = 1,
      alpha = 0.8,
      show.legend = FALSE,
      position = jitterer
    ) +
    geom_point(
      aes(colour = NULL),
      pch = 21,
      size = 1,
      alpha = 0.4,
      show.legend = FALSE,
      colour = "gray",
      position = jitterer
    ) +
    geom_boxplot(
      aes(colour = .data[[y_var]]),
      fill = "transparent",
      show.legend = FALSE,
      outlier.shape = NA,
      median.color = "black"
    ) +
    coord_cartesian(expand = TRUE) +
    theme_minimal() +
    theme(
      axis.text.y.left = element_blank(),
      axis.title.y.left = element_blank()
    )

  # Add palette if provided
  if (!is.null(palette)) {
    p <- p + scale_colour_manual(values = palette)
  }

  # Add marginal plot if requested
  if (add_marginal) {
    p <- ggMarginal(
      p,
      type = marginal_type,
      groupColour = TRUE,
      groupFill = TRUE,
      trim = FALSE,
      margins = "x",
      size = 1
    )
  }

  return(p)
}

# Extract factors, reshape wide, join metadata
df <- get_factors(MOFAmodel, factors = c(1), as.data.frame = TRUE) %>%
  as_tibble() %>%
  select(-group) %>%
  #mutate(fct1_clus = as.character(clusters$cluster[.$sample])) %>%
  pivot_wider(names_from = "factor") %>%
  left_join(samples_metadata(MOFAmodel), by = "sample")
# quartz(width = 3, height = 2, dpi = 150)
plot_mofa_factor_vs_variable(
  df,
  y_var = "RVVC_pos"
)

plot_mofa_factor_vs_variable(
  df,
  y_var = "Clinical_score_(0-5)"
)

plot_mofa_factor_vs_variable(
  df,
  y_var = "fct1_clus"
)

plot_mofa_factor_vs_variable(
  df,
  palette = NULL,
  y_var = "fct4_clus"
)

color_vars <- c(
  "pos",
  "trx_louvain_7",
  "metab_louvain",
  "microb_louvain",
  #"fun_louvain",
  "integ_louvain",
  "Clinical_score_(0-5)",
  "RVVC_pos",
  "RVVC_AS"
) %>%
  set_names(.)

# fct_var <- map(
#   vars,
#   ~ plot_factors(
#     MOFAmodel,
#     factors = "all",
#     color_by = .x,
#   ) +
#     scale_fill_manual(values = pal[[.x]], na.value = "white")
# )

plots <- imap(
  color_vars,
  ~ plot_factor(
    MOFAmodel,
    factor = "all",
    #group_by = .x,
    color_by = .x,
    #dot_size = 4,
    #dodge = TRUE,
    stroke = 0.4,
    #add_violin = TRUE,
    #add_boxplot = FALSE
  ) +
    ylim(-4, 5) +
    scale_fill_manual(values = pal[[.x]], na.value = "white")
)

ggsave(
  paste0("../Results/Factor_plots2/Factor_all_gr", ".pdf"),
  gridExtra::marrangeGrob(grobs = plots, ncol = 1, nrow = 1, height = 7)
)
Saving 7 x 5 in image
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).
Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).
Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).
Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).
Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).
Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).
Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).
Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).

9. Covariate-Factor correlation

#samples_metadata(MOFAmodel) <- samples_metadata(MOFAobject)
covariates <- samples_metadata(MOFAmodel)[,
  28:ncol(samples_metadata(MOFAmodel))
] %>%
  colnames()
groups <- samples_metadata(MOFAmodel)[, 1:23] %>% colnames()

# remove columns
# covariates <- covariates[!(grepl("Follow_up", covariates))]
Clin_info <- list(
  Clin_score = covariates[1:12], # Clinical score + Symptom score
  Wet_smear = covariates[13:18], # Wet smear
  Fungal_culture = covariates[19:26], # Fungal culture
  confounders = covariates[32:37], # general confounders
  relationship = covariates[38:42], # relationship
  drugs = covariates[43:60], # drugs
  reproductive_info = covariates[c(61:70, 74:76)], # reproductive info
  louvain = groups[13:20] # reproductive info
)

# reproductive info
# quartz(width = 6, height = 6, dpi = 100)

p <- imap(
  Clin_info,
  ~ wrap_elements(
    panel = ~ correlate_factors_with_covariates(
      MOFAmodel,
      covariates = .x,
      win.asp = 1, # max(6, max(nchar(.x)) * 0.18) / max(3, length(.x) * 0.35) ,
      plot = "r", # use "log_pval" to plot log p-values
      transpose = T
    )
  )
)
par(mar = c(0, 0, 0, 0), bg = NA)
# p[["reproductive_info"]]
p
$Clin_score


$Wet_smear


$Fungal_culture


$confounders


$relationship


$drugs


$reproductive_info


$louvain

imap(
  p,
  ~ {
    vars <- Clin_info[[.y]]

    h <- max(3, length(vars) * 0.35) # height per variable
    w <- max(6, max(nchar(vars)) * 0.18) # width per character

    ggsave(
      filename = paste0("../Results/Covar_cor/", "Fact_", .y, ".png"),
      plot = .x,
      width = w,
      height = h,
      dpi = 300
    )
  }
)
$Clin_score
[1] "../Results/Covar_cor/Fact_Clin_score.png"

$Wet_smear
[1] "../Results/Covar_cor/Fact_Wet_smear.png"

$Fungal_culture
[1] "../Results/Covar_cor/Fact_Fungal_culture.png"

$confounders
[1] "../Results/Covar_cor/Fact_confounders.png"

$relationship
[1] "../Results/Covar_cor/Fact_relationship.png"

$drugs
[1] "../Results/Covar_cor/Fact_drugs.png"

$reproductive_info
[1] "../Results/Covar_cor/Fact_reproductive_info.png"

$louvain
[1] "../Results/Covar_cor/Fact_louvain.png"

10. Factor visualisation

p <- plot_factors(
  MOFAmodel,
  factors = c(1, 2, 3, 4),
  dot_size = 1,
  #stroke = 1,
  color_by = "RVVC_pos"
) +
  scale_fill_manual(
    values = pal$RVVC_pos,
    na.value = "transparent",
    aesthetics = c("colour", "fill")
  )
# quartz(width = 6, height = 6, dpi = 150)

p

ggsave(
  filename = paste0("../Results/Factor_plots2/", "Factor_1-3_grid", "", ".png"),
  plot = p,
  width = 7,
  height = 5,
  dpi = 300
)

### Plot and save many versions:
my_fun <- function(factors, color_var) {
  p <<- do.call(
    plot_factors,
    list(
      object = MOFAmodel, # ← correct argument name
      factors = factors,
      dot_size = 1,
      shape_by = NULL,
      color_by = color_var
    )
  )

  p +
    scale_fill_manual(
      values = pal[[color_var]],
      na.value = "transparent",
      aesthetics = c("colour", "fill")
    )
}

factor_list <- list(c(1, 2, 7), c(1, 8, 9), c(1, 3, 10), c(4, 5, 6), c(4, 5, 6))
color_vars <- c(
  "trx_louvain_7",
  "metab_louvain",
  "microb_louvain",
  "metab_louvain",
  "RVVC_AS"
)

plots <- purrr::map2(factor_list, color_vars, my_fun)
# quartz(width = 6, height = 6, dpi = 150)
# plots[[3]]

pdf(
  file = paste0("../Results/Factor_plots2/Grid_Factors", ".pdf"),
  #units = "in",
  #res = 600,
  #width = 8.5, height = 10 # Epi
  width = 6,
  height = 4 # 11 # my_h # 17 (all samples)
)
plots
[[1]]

[[2]]

[[3]]
Warning: Groups with fewer than two data points have been dropped.
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_density()`).
Warning: Groups with fewer than two data points have been dropped.
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_density()`).
Warning: Groups with fewer than two data points have been dropped.
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_density()`).

[[4]]

[[5]]
#draw(H)
dev.off()
quartz_off_screen 
                2 
Code
plot_factor_set <- function(
  MOFAmodel,
  factors,
  color_vars,
  pal,
  out_file,
  height = 5,
  width_per_plot = 2.8
) {
  plots <- imap(
    color_vars,
    ~ plot_factor(
      MOFAmodel,
      factor = factors,
      color_by = .x,
      dot_size = 2,
      dodge = TRUE,
      stroke = 0.4,
      add_violin = TRUE,
      add_boxplot = FALSE
    ) +
      scale_fill_manual(values = pal[[.x]], na.value = "white") +
      theme(
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank()
      )
  )

  p <- patchwork::wrap_plots(plots, nrow = 1) +
    patchwork::plot_layout(guides = "collect", axis_titles = "collect") &
    theme(legend.position = "bottom")

  ggsave(
    filename = out_file,
    plot = p,
    width = width_per_plot * length(color_vars),
    height = height,
    dpi = 300
  )

  p
}

meta_clus <- colnames(annot_col)


fctors <- c(
  "RVVC_AS",
  "pos",
  "Clinical_score_(0-5)",
  "RVVC_pos",
  "metab_louvain"
) %>%
  set_names()

map(
  c(1, 2, 3, 4),
  ~ plot_factor_set(
    MOFAmodel = MOFAmodel,
    factors = .x,
    color_vars = fctors,
    pal = pal,
    out_file = paste0("../Results/Factor_plots2/Factor", .x, "_gr", ".png")
  )
)
[[1]]


[[2]]


[[3]]


[[4]]

Code
c(1, 2, 3, 4, 5, 6, 7, 8)
[1] 1 2 3 4 5 6 7 8
Code
c(9, 10, 11, 12, 13, 14, 15)
[1]  9 10 11 12 13 14 15
Code
fctors <- c(
  "trx_louvain_7",
  "metab_louvain",
  "microb_louvain",
  "fun_louvain"
) %>%
  set_names()
update_geom_defaults("violin", aes(linewidth = 0))
update_geom_defaults("boxplot", aes(linewidth = 0))
p <- map(
  c(1, 2, 3, 4, 5, 6, 7, 8),
  ~ plot_factor_set(
    MOFAmodel = MOFAmodel,
    factors = .x,
    color_vars = fctors,
    pal = pal,
    out_file = paste0("../Results/Factor_plots2/Factor", .x, "_louvain", ".png")
  )
)

# quartz(width = 6, height = 6, dpi = 60)
p_ <- patchwork::wrap_plots(p, ncol = 1) +
  patchwork::plot_layout(guides = "collect", axis_titles = "collect") &
  theme(legend.position = "none")

ggsave(
  paste0("../Results/Factor_plots2/Factor_1-8_louvain_", ".png"),
  p_,
  width = 8,
  height = 10
)
# not working
# ggsave(paste0("../Results/Factor_plots/Factor_4-8_gr", ".pdf"),
#        gridExtra::marrangeGrob(grobs=p, ncol=1, nrow=1, height = 5, width=2)))

11. Feature-level analysis

views <- c("Transcriptomics", "Metabolomics", "Metagenomics", "Functional")
c <- c("#CC79A7", "#D55E00", "#CF1C90", "#E69F00")
p1 <- map(
  views,
  ~ plot_weights(
    MOFAmodel,
    view = .x,
    factor = 1,
    nfeatures = 10,
    text_size = 3
  ) +
    theme_transparent()
)
p2 <- map(
  views,
  ~ plot_top_weights(
    MOFAmodel,
    factor = 1,
    view = .x,
    nfeatures = 10
  ) +
    theme_minimal()
)
plot_grid(plotlist = p2)

Top metagenomic features driving Factor 2

p <- map2(p1, p2, ~ plot_grid(plotlist = list(.x, .y))) %>% set_names(., views)
p
$Transcriptomics

Top metagenomic features driving Factor 2


$Metabolomics

Top metagenomic features driving Factor 2


$Metagenomics

Top metagenomic features driving Factor 2


$Functional

Top metagenomic features driving Factor 2

Code
# Modified function: plot_data_heatmap2() with sample_order argument
# It is identical to the original except for 13 added lines.
plot_data_heatmap2 <- function(
  object,
  factor,
  view = 1,
  groups = "all",
  features = 50,
  annotation_features = NULL,
  annotation_samples = NULL,
  transpose = FALSE,
  imputed = FALSE,
  denoise = FALSE,
  max.value = NULL,
  min.value = NULL,
  sample_order = NULL, # ✅ NEW ARGUMENT
  ...
) {
  if (!is(object, "MOFA")) {
    stop("'object' has to be an instance of MOFA")
  }

  stopifnot(length(factor) == 1)
  stopifnot(length(view) == 1)

  # ✅ INTERNAL HELPERS MUST USE triple-colon
  groups <- MOFA2:::.check_and_get_groups(object, groups)
  factor <- MOFA2:::.check_and_get_factors(object, factor)
  view <- MOFA2:::.check_and_get_views(object, view)

  # Extract model matrices
  W <- do.call(
    rbind,
    get_weights(object, views = view, factors = factor, as.data.frame = FALSE)
  )
  Z_list <- lapply(get_factors(object)[groups], function(z) {
    as.matrix(z[, factor])
  })
  Z <- do.call(rbind, Z_list)[, 1]
  Z <- Z[!is.na(Z)]

  # Choose raw, imputed, or denoised data
  if (isTRUE(denoise)) {
    data <- predict(object, views = view, groups = groups)[[1]]
  } else if (isTRUE(imputed)) {
    data <- get_imputed_data(object, view, groups)[[1]]
  } else {
    data <- get_data(object, views = view, groups = groups)[[1]]
  }
  if (is(data, "list")) {
    data <- do.call(cbind, data)
  }

  # ----------------------
  # FEATURE SELECTION
  # ----------------------
  if (is.numeric(features)) {
    if (length(features) == 1) {
      features <- rownames(W)[tail(order(abs(W)), n = features)]
    } else {
      features <- rownames(W)[order(-abs(W))[features]]
    }
    features <- names(W[features, ])[order(W[features, ])]
  } else if (is.character(features)) {
    stopifnot(all(features %in% features_names(object)[[view]]))
  } else {
    stop("features must be numeric or character vector")
  }

  data <- data[features, ]
  data <- data[, names(Z), drop = FALSE]
  data <- data[, apply(data, 2, function(x) !all(is.na(x))), drop = FALSE]

  # ----------------------
  # ✅ CUSTOM SAMPLE ORDER
  # ----------------------
  if (is.null(sample_order)) {
    # default MOFA2 ordering
    order_samples <- names(sort(Z, decreasing = TRUE))
    order_samples <- order_samples[order_samples %in% colnames(data)]
  } else {
    # user‑specified ordering
    sample_order <- sample_order[sample_order %in% colnames(data)]
    if (!all(sample_order %in% colnames(data))) {
      stop("sample_order contains sample names not found in data")
    }
    order_samples <- sample_order
  }

  data <- data[, order_samples, drop = FALSE]

  # ----------------------
  # ANNOTATION HANDLING
  # ----------------------
  if (!is.null(annotation_samples)) {
    if (is.data.frame(annotation_samples)) {
      if (any(!order_samples %in% rownames(annotation_samples))) {
        stop("annotation_samples rownames must match sample names")
      }

      annotation_samples <- annotation_samples[order_samples, , drop = FALSE]
    } else if (is.character(annotation_samples)) {
      tmp <- object@samples_metadata
      rownames(tmp) <- tmp$sample
      tmp$sample <- NULL
      tmp <- tmp[order_samples, , drop = FALSE]
      annotation_samples <- tmp[, annotation_samples, drop = FALSE]
      rownames(annotation_samples) <- order_samples
    } else {
      stop("annotation_samples must be a data.frame or character vector")
    }

    # logical/character must become factor
    to_factor <- sapply(annotation_samples, function(x) {
      is.logical(x) || is.character(x)
    })
    if (any(to_factor)) {
      annotation_samples[, which(to_factor)] <-
        lapply(annotation_samples[, which(to_factor), drop = FALSE], as.factor)
    }
  }

  if (transpose) {
    data <- t(data)
    tmp_ann <- annotation_samples
    annotation_samples <- annotation_features
    annotation_features <- tmp_ann
  }

  # cap values
  if (!is.null(max.value)) {
    data[data >= max.value] <- max.value
  }
  if (!is.null(min.value)) {
    data[data <= min.value] <- min.value
  }

  # ----------------------
  # ✅ FINAL HEATMAP CALL
  # ----------------------
  pheatmap::pheatmap(
    data,
    annotation_row = annotation_features,
    annotation_col = annotation_samples,
    ...
  )
}

# old alternativer version:
# set sample order (columns)
# NB! can not be used with denoise = TRUE!! (because it changes the Z values used to compute the denoised values used in the heatmap )
set_sample_order <- function(MOFAmodel, factor, order) {
  Z <- MOFAmodel@expectations$Z$single_group[, factor]

  # assign artificial values based on your desired order
  Z[order] <- seq_along(order)

  MOFAmodel@expectations$Z$single_group[, factor] <- Z
  MOFAmodel
}

# MOFAmodel2 <- set_sample_order(MOFAmodel, factor = 1, order = sample_order)
Code
meta_headers <- colnames(samples_metadata(MOFAmodel))
gr_df <- samples_metadata(MOFAmodel)[, c(
  "pos",
  "fct1_clus",
  "Clinical_score_(0-5)",
  "RVVC_pos",
  "CST",
  "BV",
  "Wet smear: clue cells (y/n)",
  "metab_louvain"
)] %>% #"sample",
  mutate_at(., 1:ncol(.), factor)
rownames(gr_df) <- samples_metadata(MOFAmodel)$sample

# determine column order when cluster_cols=F
sample_order <- meta %>%
  filter(sample %in% rownames(gr_df)) %>%
  # mutate(fct1_clus = factor(.$fct1_clus, levels = c("3", "4", "1", "2"))) %>%
  arrange(RVVC_pos) %>%
  #arrange(RVVC_pos, `Clinical_score_(0-5)`) %>%
  arrange(fct1_clus, desc(`Clinical_score_(0-5)`)) %>%
  pull(sample)

gr_df <- gr_df[sample_order, ]

ann_colors <- map(colnames(gr_df), function(val) {
  # Levels for this column
  levs <- levels(gr_df[[val]])

  # CASE 1: column exists in pal → use provided palette
  if (val %in% names(pal)) {
    colors <- pal[[val]]
    return(set_names(colors, levs))
  }
  # CASE 2: column not in pal → generate paired palette
  colors <- gg_color_hue(length(levs))
  set_names(colors, levs)
  # colors <- brewer.pal(length(levs), "Paired")
  # if (length(levs) == length(colors)) {
  #   set_names(colors, levs)
  # } else {
  #   set_names(colors[1:length(levs)], levs)
  # }
})
# Name the list by column names
ann_colors <- set_names(ann_colors, colnames(gr_df))

# quartz(width = 6, height = 4, dpi = 150)
plot_data_heatmap2(
  MOFAmodel,
  view = "Transcriptomics",
  factor = 1,
  features = 20,
  sample_order = sample_order,
  cluster_cols = FALSE,
  annotation_samples = gr_df[, 2:5], #gr_df[,-1], "Clin_gr"
  denoise = FALSE,
  scale = "row",
  annotation_colors = ann_colors
)

Code
# quartz(width = 7, height = 4, dpi = 150)
plot_data_heatmap2(
  MOFAmodel,
  view = "Metabolomics",
  factor = 1,
  features = 20,
  #sample_order = NULL,
  cluster_cols = FALSE,
  annotation_samples = gr_df[, c(2:5)], #gr_df[,-1], "Clin_gr"
  denoise = F,
  scale = "row",
  annotation_colors = ann_colors
)

plot_data_heatmap has an interesting argument to “beautify” the heatmap: denoise = TRUE. Instead of plotting the (noisy) input data, we can plot the data reconstructed by the model, where noise has been removed:

# quartz(width = 4, height = 3, dpi = 150)
p <- plot_factors(
  MOFAmodel,
  factors = c(1, 2),
  color_by = "Clinical_score_(0-5)",
  shape_by = "pos",
  dot_size = 2.5,
  show_missing = T
) +
  scale_fill_manual(values = pal[["Clinical_score_(0-5)"]], na.value = "white")

p <- p +
  geom_hline(yintercept = 0, linetype = "dashed") +
  geom_vline(xintercept = (-0), linetype = "dashed")
p

plot_factors(
  MOFAmodel,
  factors = c(1, 3),
  color_by = "Clin_gr"
) +
  scale_fill_manual(values = pal$Clin_gr, na.value = "white") +
  stat_ellipse(
    #aes(color = RVVC_AS, fill = RVVC_AS),
    geom = "polygon",
    alpha = 0.25
  )

p

p <- plot_factors(
  MOFAmodel,
  factors = c(1, 3),
  color_by = "trx_louvain_7",
  shape_by = "Clin_gr",
  dot_size = 2.5,
  show_missing = T
) +
  scale_fill_manual(values = pal$trx_louvain_7, na.value = "white")

p <- p +
  geom_hline(yintercept = 0, linetype = "dashed") +
  geom_vline(xintercept = (-0), linetype = "dashed")
p

p <- plot_factors(
  MOFAmodel,
  factors = c(2, 1),
  color_by = "CST",
  shape_by = "Clin_gr",
  dot_size = 2.5,
  show_missing = T
)

p <- p +
  scale_fill_manual(values = pal$CST, na.value = "white") +
  geom_hline(yintercept = 0, linetype = "dashed") +
  geom_vline(xintercept = (-0), linetype = "dashed")
p

p <- plot_factors(
  MOFAmodel,
  factors = c(4, 1),
  color_by = "CST",
  shape_by = "Clin_gr",
  dot_size = 2.5,
  show_missing = T
) +
  scale_fill_manual(values = pal$CST, na.value = "white")
p

# scale_fill_manual(values = pal$CST, na.value = "white") +
# stat_ellipse(
#   #aes(color = RVVC_pos),
#   geom = "polygon",
#   alpha = 0.25
# )
plot_data_scatter(
  MOFAmodel,
  view = "Transcriptomics",
  factor = 1,
  features = 20,
  color_by = "Clinical_score_(0-5)"
) +
  scale_fill_manual(
    values = pal$`Clinical_score_(0-5)`,
    na.value = "transparent"
  )

plot_data_scatter(
  MOFAmodel,
  view = "Metabolomics",
  factor = 1,
  features = 20,
  color_by = "RVVC_AS"
) +
  scale_fill_manual(values = pal$Clin_gr, na.value = "transparent")

12. Dimensionality reduction

set.seed(42)
MOFAmodel <- run_umap(MOFAmodel)

plot_dimred(
  MOFAmodel,
  method = "UMAP",
  color_by = "RVVC_pos",
  dot_size = 5
) +
  scale_fill_manual(values = pal$RVVC_pos, na.value = "transparent")
Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
of ggplot2 3.3.4.
ℹ The deprecated feature was likely used in the MOFA2 package.
  Please report the issue at <https://github.com/bioFAM/MOFA2>.

#MOFAmodel <- run_umap(MOFAmodel, factors = c("Factor4"))

Step 5: Shankey plot

library(niceRplots)

plot_meta_sankey <- function(
  vars = c("CST", "trx_louvain_7"),
  palette = pal1,
  drop_na = TRUE,
  return_pdf = TRUE,
  pdf_name = "sankey"
) {
  stopifnot(length(vars) == 2)

  ## Select and clean metadata
  m <- meta %>%
    filter(sample %in% m$sample[m$at_least_two & m$all_BL]) %>%
    dplyr::select(dplyr::all_of(vars))

  if (drop_na) {
    #m <- tidyr::drop_na(m)
    m <- tidyr::drop_na(m, vars[1])
    m <- mutate(
      m,
      !!vars[2] := tidyr::replace_na(as.character(.data[[vars[2]]]), "NA")
    )
  }

  ## Extract labels
  tmp <- m[, vars, drop = FALSE]
  tmp <- as.data.frame(tmp)

  ## Ensure factors
  tmp[[1]] <- factor(tmp[[1]])
  tmp[[2]] <- factor(tmp[[2]])

  ## Plot sankey
  #pdf( paste0("../results/integ_louvain_sankey.pdf"),width = 1.5*3.5,height = 1.5*3.5)
  #par(mar=c(4,4,4,4))
  #plot_sankey(df = tmp, pal = pal1)
  #dev.off()

  plot_expr <- {
    plot_sankey(df = tmp, pal = c(palette, "NA" = "gray"))
  }

  if (return_pdf) {
    ## Plot sankey
    pdf(
      paste0("../Results/sankey/", pdf_name, ".pdf"),
      width = 1.5 * 3.5,
      height = 1.5 * 3.5
    )
    par(mar = c(4, 8, 4, 4))
    plot_sankey(df = tmp, pal = palette)
    dev.off()
  }
  #plot_sankey(df = tmp, pal = palette)
}

# NB! the plot_meta_sankey function filters samples so that only the MOFA samples are included
# quartz(width = 6, height = 6, dpi = 150)
v <- c("metab_louvain", "CST")
plot_meta_sankey(v, pdf_name = "sankey_metab_CST", palette = pal[[v[1]]])

quartz_off_screen 
                2 
v <- c("microb_louvain", "CST")
plot_meta_sankey(v, pdf_name = "sankey_microb_CST", palette = pal[[v[1]]])

quartz_off_screen 
                2 
v <- c("CST", "Clinical_score_(0-5)")
plot_meta_sankey(v, pdf_name = "sankey_CST_Clin_score", palette = pal[[v[1]]])

quartz_off_screen 
                2 
v <- c("CST", "trx_louvain_7")
plot_meta_sankey(v, pdf_name = "sankey_CST_TRX", palette = pal[[v[1]]])

quartz_off_screen 
                2 
v <- c("metab_louvain", "BV")
plot_meta_sankey(v, pdf_name = "sankey_metab_BV", palette = pal[[v[1]]])

quartz_off_screen 
                2 
fct <- c(
  "trx_louvain_7",
  "metab_louvain",
  "microb_louvain",
  "fun_louvain"
) %>%
  set_names()

map(
  fct,
  ~ plot_meta_sankey(
    c("RVVC_pos", .x),
    pdf_name = paste0("RVVC_pos_", .x, "_sankey"),
    palette = pal[["RVVC_pos"]]
  )
)

$trx_louvain_7
quartz_off_screen 
                2 

$metab_louvain
quartz_off_screen 
                2 

$microb_louvain
quartz_off_screen 
                2 

$fun_louvain
quartz_off_screen 
                2 
#pdf( paste0("../results/integ_louvain_sankey.pdf"),width = 1.5*3.5,height = 1.5*3.5)
#par(mar=c(4,4,4,4))
#plot_sankey(df = tmp, pal = pal1)
#dev.off()
# install.packages("ggalluvial")
library(ggalluvial)

lov <- c(
  "Transcriptomics" = "trx_louvain_7",
  "Metabolomics" = "metab_louvain",
  "Microbiome" = "microb_louvain",
  "Functional" = "fun_louvain"
)

# --- 1. Prepare dataframe ---
# Ensure factors are treated as characters (avoid NA-level problems)
df_long <- result %>%
  select(sample, trx_louvain_7, metab_louvain, fun_louvain, microb_louvain) %>%
  mutate(across(-sample, as.character)) %>%
  drop_na() %>%
  pivot_longer(
    cols = -sample,
    names_to = "col",
    values_to = "cluster"
  ) %>%
  mutate(
    omic = factor(
      col,
      levels = lov,
      labels = names(lov)
    ),
    color = mapply(
      function(colname, clust) {
        pal[[colname]][as.integer(clust)]
      },
      col,
      cluster
    )
  )


# --- 2. Build the alluvial structure ---
l <- c(
  "Tr_4",
  "Tr_2",
  "Tr_1",
  "Tr_3",
  "Tr_6",
  "Tr_5",
  "Me_5",
  "Me_1",
  "Me_3",
  "Me_2",
  "Me_6",
  "Me_4",
  "Mi_4",
  "Mi_1",
  "Mi_2",
  "Mi_3",
  "Mi_5",
  "Fu_5",
  "Fu_2",
  "Fu_3",
  "Fu_1",
  "Fu_4",
  "Fu_6"
)

df_alluv <- df_long %>%
  mutate(alluvium = sample) %>%
  mutate(lvl = paste0(str_extract(omic, "^\\w\\w"), "_", cluster)) %>%
  mutate(lvl = factor(.$lvl, levels = l))

# --- 3. Plot ---
ggplot(
  df_alluv,
  aes(
    x = omic,
    stratum = lvl, #cluster,
    alluvium = alluvium,
    fill = color,
    label = cluster
  )
) +
  geom_flow(alpha = 0.5, color = "gray40") +
  geom_stratum(color = "black") +
  geom_text(stat = "stratum", color = "black", size = 3) +
  scale_fill_identity() +
  #scale_fill_brewer(type = "qual", palette = "Set2") +
  theme_minimal(base_size = 14) +
  labs(
    title = "Cluster Transitions Across Omics Layers",
    x = "Omics Layer",
    y = "Sample Count",
    fill = "Cluster"
  )

Extracting data for downstream analysis

# "factors" is a list of matrices, one matrix per group with dimensions (nsamples, nfactors)
factors <- get_factors(MOFAmodel, factors = "all")
lapply(factors, dim)
# long format
factors <- get_factors(MOFAmodel, as.data.frame = TRUE)
head(factors, n = 3)

# "weights" is a list of matrices, one matrix per view with dimensions (nfeatures, nfactors)
weights <- get_weights(MOFAmodel, views = "all", factors = "all")
lapply(weights, dim)
# long format
weights <- get_weights(MOFAmodel, as.data.frame = TRUE)
head(weights, n = 3)

# "data" is a nested list of matrices, one matrix per view and group with dimensions (nfeatures, nsamples)
data <- get_data(MOFAmodel)
lapply(data, function(x) lapply(x, dim))[[1]]
# long format
data <- get_data(MOFAmodel, as.data.frame = TRUE)
head(data, n = 3)
factors <- get_factors(MOFAmodel, as.data.frame = TRUE)
factors <- factors %>%
  as_tibble() %>%
  select(-group) %>%
  left_join(meta, by = "sample") %>%
  filter(grepl("Factor1$|Factor2$|Factor3$|Factor4$", .$factor))

plot_factor_violins <- function(data, group_var, comparisons) {
  p <- ggplot(
    data,
    aes(x = .data[[group_var]], y = value, fill = .data[[group_var]])
  ) +
    geom_violin(trim = FALSE) +
    geom_point() +
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    facet_wrap(~factor, scales = "free_y", nrow = 1) +
    #facet_grid(rows = vars(RVVC_pos), cols = vars(factor), scales = "free", space = "free") +
    labs(
      x = "Groups",
      y = "Factor value"
    ) +
    theme_classic() +
    scale_fill_manual(values = pal[[group_var]]) +
    theme(
      strip.text = element_text(size = 10),
      axis.text.x = element_text(angle = 45, hjust = 1),
      legend.position = "none"
    )
  # Add stats if provided
  if (!is.null(comparisons)) {
    p <- p +
      stat_compare_means(
        comparisons = comparisons,
        label = "p.signif",
        method = "wilcox.test"
      )
  }
  return(p)
}

my_comparisons <- list(
  list(c("RVVC_pos", "Control_neg")),
  list(c("0", "5"), c("4", "5"))
  #list( c("0", "1-3"), c("0", "4-5") )
)

# quartz(width = 6, height = 6)
cols <- c("RVVC_pos", "Clinical_score_(0-5)")
plots <- map2(
  cols,
  my_comparisons,
  ~ plot_factor_violins(
    data = factors,
    group_var = .x,
    comparisons = .y
  )
)
wrap_plots(plots, ncol = 1, axis_titles = "collect")

Summary

This file is a fully executable Quarto document with named chunks for reproducibility, caching, and debugging.

# code not currently in use, but could be usefull

## ---- overview-plot ------------------------------------------------
# shows the same as overview plot, but coloured by metadata and not by omic type
mofa_df %>%
  left_join(select(meta, sample, RVVC_AS), by = "sample") %>%
  select(sample, view, RVVC_AS) %>%
  unique() %>%
  mutate(RVVC_AS = factor(.$RVVC_AS)) %>%
  mutate(sample = fct_reorder(sample, as.integer(RVVC_AS), .na_rm = FALSE)) %>%

  ggplot(., aes(x = sample, y = view, fill = RVVC_AS)) +
  geom_tile() + #scale_fill_manual(values = c(missing = "grey", colors)) +
  theme_nothing() +
  theme(
    axis.text.y = element_text(size = 12),
    legend.position = "top",
    plot.margin = unit(c(1, 1, 0, 1), "cm")
  ) +
  scale_fill_manual(values = pal$Clin_bin, na.value = "transparent")


## ---- mofa-variance-barplot ------------------------------------------------
plot_variance_explained(
  MOFAmodel,
  plot_total = FALSE
)

df_long <- get_factors(
  MOFAmodel,
  factors = "all",
  groups = "all",
  as.data.frame = TRUE
) %>%
  left_join(., samples_metadata(MOFAmodel)[, -1], by = "sample") %>%
  as_tibble()

df_wide <- pivot_wider(df_long, names_from = "factor", values_from = "value")
plot_fact.fun <- function(X, Y, gr_val, pal) {
  p <- ggplot(
    df_wide,
    aes(x = .data[[X]], y = .data[[Y]], colour = .data[[gr_val]])
  ) +
    geom_point(size = 2, alpha = 0.9) +
    labs(x = X, y = Y) +
    theme_classic() +
    scale_fill_manual(
      values = pal[1:length(unique(na.omit(df_wide[[gr_val]])))],
      aesthetics = c("colour", "fill")
    ) +
    theme(
      axis.text = element_text(size = rel(0.8), color = "#373844"),
      axis.title = element_text(size = rel(1.1), color = "#373844"),
      axis.line = element_line(color = "#373844", linewidth = 0.5),
      axis.ticks = element_line(color = "#373844", linewidth = 0.5)
    )
  return(p)
}
map(vars, ~ plot_fact.fun("Factor1", "Factor3", .x, pal3))
plot_fact.fun("Factor1", "Factor3", "group_2", pal3)

## other version:
# df_long <- get_factors(
#   MOFAmodel,
#   factors = "all",
#   groups = "all",
#   as.data.frame = TRUE
# ) %>%
#   left_join(., samples_metadata(MOFAobject)[, -2], by = "sample") %>%
#   as_tibble()

# df_wide <- pivot_wider(
#   df_long,
#   names_from = "factor",
#   values_from = "value"
# ) %>%
#   mutate(lab = ifelse(.$RVVC_AS == "RVVC_AS", .$sample, NA))

# plot_fact.fun <- function(X, Y, gr_val, pal) {
#   p <- ggplot(
#     df_wide,
#     aes(x = .data[[X]], y = .data[[Y]], colour = .data[[gr_val]])
#   ) +
#     geom_point(size = 2, alpha = 0.9) +
#     geom_text(
#       aes(label = .data[["lab"]]),
#       size = 3,
#       hjust = 0,
#       nudge_x = 0.07,
#       color = "gray51"
#     ) +
#     labs(x = X, y = Y) +
#     theme_classic() +
#     scale_fill_manual(
#       na.value = "white",
#       values = pal[1:length(unique(na.omit(df_wide[[gr_val]])))],
#       aesthetics = c("colour", "fill")
#     ) +
#     theme(
#       axis.text = element_text(size = rel(0.8), color = "#373844"),
#       axis.title = element_text(size = rel(1.1), color = "#373844"),
#       axis.line = element_line(color = "#373844", linewidth = 0.5),
#       axis.ticks = element_line(color = "#373844", linewidth = 0.5)
#     )
#   return(p)
# }
# vars <- c(
#   "RVVC_AS",
#   "Clin_gr",
#   "group_2",
#   "hyphae",
#   "group_cross",
#   "Clinical_score_(0-5)"
# ) %>%
#   set_names()

# # colour pallets
# blue <- brewer.pal(6, name = "Blues") %>% c(., "gray")
# Red <- brewer.pal(6, name = "Reds") #%>% c(., "gray")
# pal <- c("#f26386", "#fd814e", "#a4d984", "#fbbc52") # "#f588af", "#fd814e"
# pal2 <- c("#2266ac", "#91c5de")

# #Red <- brewer.pal(6, name = "Reds") #%>% c(., "gray")
# pal1 <- c("#ffa998", '#FB5273', "#902267") %>% c(., "gray") #
# pal3 <- c("#9e8dec", "#cc93cf", "#a5c97b", "#de9548") #"#abcb4b" "#63d3b4" "#902267",

# plot_fact.fun("Factor1", "Factor3", "group_2", pal3)

# p <- map(vars, ~ plot_fact.fun("Factor2", "Factor1", .x, pal3))
# p[["RVVC_AS"]] +
#   stat_ellipse(
#     aes(color = RVVC_AS, fill = RVVC_AS),
#     geom = "polygon",
#     alpha = 0.25
#   )

## ---- weights-by-view ------------------------------------------------
# test the
weights_TR <- get_weights(MOFAmodel, views = "Transcriptomics")$Transcriptomics
head(sort(abs(weights_TR[, 1]), decreasing = TRUE))

TRX <- t(X$species$TR)
mt_genes <- grepl("^MT", rownames(TRX))
mt_fraction <- colSums(TRX[mt_genes, ]) / colSums(TRX)

cor(
  get_factors(MOFAmodel)$group1[, 1],
  mt_fraction,
  use = "complete.obs"
)