Author

Philipp Sven Lars Schäfer

Published

November 28, 2024

1 Packages

Code
suppressPackageStartupMessages({
  library(tidyverse)
  library(ggdark)
  library(magick)
  source(file.path("..", "src", "read_data.R"))
  source(file.path("..", "src", "colors.R"))
  source(file.path("..", "src", "generate_targets.R"))
})

2 Data

Code
input_dir = file.path("..", "data")
Code
meta_data <- read_harmonized_meta_data(input_dir)
gene_meta <- read_gene_meta(input_dir)

experimental_data <- read_raw_experimental_data(input_dir)
experimental_data <- filter_experimental_data(meta_data, experimental_data, gene_meta)
pbmc_cell_frequency | Removed 550 specimens because missing in meta data
plasma_ab_titer | Removed 6931 specimens because missing in meta data
plasma_cytokine_concentration_by_olink | Removed 495 specimens because missing in meta data
pbmc_cell_frequency | Removed 56 features because not in feature subset
plasma_ab_titer | Removed 48 features because not in feature subset
plasma_cytokine_concentration_by_olink | Removed 234 features because not in feature subset
t_cell_polarization | Removed 3 features because not in feature subset
plasma_cytokine_concentration_by_olink | Removed 300 features because qc warning
plasma_ab_titer | Removed 10540 measurements because wrong unit used
plasma_cytokine_concentration_by_olink | Removed 2400 measurements because wrong unit used
plasma_cytokine_concentration_by_legendplex | Removed 8 because specimen is outlier
plasma_cytokine_concentration_by_olink | Removed specimen 750, 760, 824, 833, 894, 903 because fraction of measurements below LOQ > 50%
plasma_ab_titer | Removed specimen 674, 675, 676 because fraction of measurements below LOD > 50%
Code
wide_experimental_data <- generate_wide_experimental_data(experimental_data=experimental_data, 
                                                 impute="zero")
plasma_cytokine_concentration_by_olink | NA Fraction: 0.00288417166589755 | Imputed with zero imputation
t_cell_activation | NA Fraction: 0.0576923076923077 | Removed samples: 681
t_cell_activation | NA Fraction: 0.0553691275167785 | Imputed with zero imputation
t_cell_polarization | NA Fraction: 0.0134099616858238 | Imputed with zero imputation
Code
celltype_meta <- read_celltype_meta(input_dir)
gene_meta <- read_gene_meta(input_dir)
protein_meta <- read_protein_meta(input_dir)

3 Time Points

Code
specimen_list <- get_specimen_per_day(meta_data)

purrr::imap(specimen_list, ~ .x %>% dplyr::mutate(day=.y)) %>%
  dplyr::bind_rows() %>%
  dplyr::mutate(day = factor(day, levels=paste0("day_", c(0, 1, 3, 14, 30)))) %>%
  ggplot() +
  geom_histogram(aes(x=actual_day_relative_to_boost), binwidth=1) +
  geom_vline(xintercept=c(0,1,3,14, 30), color="forestgreen") +
  facet_wrap(~day, ncol=1) +
  ggdark::dark_mode(verbose=FALSE) +
  scale_x_continuous(breaks=seq(min(unlist(acceptable_differences)),
                                max(unlist(acceptable_differences))))

4 Generate Targets for each Task

4.1 Task 1) Antibody level tasks

  • 1.1) Rank the individuals by IgG antibody levels against pertussis toxin (PT) that we detect in plasma 14 days post booster vaccinations.

  • 1.2) Rank the individuals by fold change of IgG antibody levels against pertussis toxin (PT) that we detect in plasma 14 days post booster vaccinations compared to titer values at day 0.

Code
targets_task_1 <- generate_targets_task_1(meta_data=meta_data, 
                                          experimental_data=experimental_data, 
                                          experimental_data_settings=experimental_data_settings, 
                                          specimen_list=specimen_list)
Code
plot_targets(targets_task_1)

Code
targets_task_1 %>%
  dplyr::left_join((meta_data %>% dplyr::select(subject_id, dataset) %>% dplyr::distinct()),
                   by="subject_id") %>%
  ggplot() +
  geom_histogram(aes(x=task11), bins=50, color="black") +
  facet_wrap(~dataset, ncol=1) + 
  ggdark::dark_mode()

Code
targets_task_1 %>%
  dplyr::left_join((meta_data %>% dplyr::select(subject_id, dataset) %>% dplyr::distinct()),
                   by="subject_id") %>%
  ggplot() +
  geom_histogram(aes(x=task12), bins=50, color="black") +
  facet_wrap(~dataset, ncol=1) + 
  ggdark::dark_mode()

Code
targets_task_1 %>%
  dplyr::left_join((meta_data %>% dplyr::select(subject_id, dataset) %>% dplyr::distinct()),
                   by="subject_id") %>%
  ggplot() +
  geom_density(aes(x=task12)) +
  facet_wrap(~dataset, ncol=1) + 
  ggdark::dark_mode()

4.2 Task 2) Cell frequency tasks

  • 2.1) Rank the individuals by predicted frequency of Monocytes on day 1 post boost after vaccination.

  • 2.2) Rank the individuals by fold change of predicted frequency of Monocytes on day 1 post booster vaccination compared to cell frequency values at day 0.

Code
targets_task_2 <- generate_targets_task_2(meta_data=meta_data, 
                                          experimental_data=experimental_data, 
                                          experimental_data_settings=experimental_data_settings,
                                          specimen_list=specimen_list)
targets_task_2
Code
plot_targets(targets_task_2)

Code
targets_task_2 %>%
  dplyr::left_join((meta_data %>% dplyr::select(subject_id, dataset) %>% dplyr::distinct()),
                   by="subject_id") %>%
  ggplot() +
  geom_histogram(aes(x=task21), bins=50, color="black") +
  facet_wrap(~dataset, ncol=1) + 
  ggdark::dark_mode()

Code
targets_task_2 %>%
  dplyr::left_join((meta_data %>% dplyr::select(subject_id, dataset) %>% dplyr::distinct()),
                   by="subject_id") %>%
  ggplot() +
  geom_histogram(aes(x=task22), bins=50, color="black") +
  facet_wrap(~dataset, ncol=1) + 
  ggdark::dark_mode()

Code
targets_task_2 %>%
  dplyr::left_join((meta_data %>% dplyr::select(subject_id, dataset) %>% dplyr::distinct()),
                   by="subject_id") %>%
  ggplot() +
  geom_density(aes(x=task22)) +
  facet_wrap(~dataset, ncol=1) + 
  ggdark::dark_mode()

4.3 Task 3) Gene expression tasks

  • 3.1) Rank the individuals by predicted gene expression of CCL3 on day 3 post-booster vaccination.

  • 3.2) Rank the individuals by fold change of predicted gene expression of CCL3 on day 3 post booster vaccination compared to gene expression values at day 0.

Code
targets_task_3 <- generate_targets_task_3(meta_data=meta_data, 
                                          experimental_data=experimental_data, 
                                          experimental_data_settings=experimental_data_settings, 
                                          specimen_list=specimen_list,
                                          gene_meta=gene_meta)
targets_task_3
Code
plot_targets(targets_task_3)

Code
targets_task_3 %>%
  dplyr::left_join((meta_data %>% dplyr::select(subject_id, dataset) %>% dplyr::distinct()),
                   by="subject_id") %>%
  ggplot() +
  geom_histogram(aes(x=task31), bins=50, color="black") +
  facet_wrap(~dataset, ncol=1) + 
  ggdark::dark_mode()

Code
targets_task_3 %>%
  dplyr::left_join((meta_data %>% dplyr::select(subject_id, dataset) %>% dplyr::distinct()),
                   by="subject_id") %>%
  ggplot() +
  geom_density(aes(x=log10(task31))) +
  facet_wrap(~dataset, ncol=1) + 
  ggdark::dark_mode()

Code
targets_task_3 %>%
  dplyr::left_join((meta_data %>% dplyr::select(subject_id, dataset) %>% dplyr::distinct()),
                   by="subject_id") %>%
  ggplot() +
  geom_histogram(aes(x=task32), bins=50, color="black") +
  facet_wrap(~dataset, ncol=1) + 
  ggdark::dark_mode()

Code
targets_task_2 %>%
  dplyr::left_join((meta_data %>% dplyr::select(subject_id, dataset) %>% dplyr::distinct()),
                   by="subject_id") %>%
  ggplot() +
  geom_density(aes(x=task22)) +
  facet_wrap(~dataset, ncol=1) + 
  ggdark::dark_mode()

5 Conclusions

  • Task 1: Substantial differences in the marginal distributions of the target variables between the cohorts / years.

  • Task 2: Substantial differences in the marginal distributions of the target variables between the cohorts / years.

  • Task 3: Small differences in the marginal distributions of the target variables between the cohorts / years.