olink_normalization {OlinkAnalyze}R Documentation

Description

Normalizes two Olink datasets to each other, or one Olink dataset to a reference set of medians values.

Usage

olink_normalization(
  df1,
  df2 = NULL,
  overlapping_samples_df1,
  overlapping_samples_df2 = NULL,
  df1_project_nr = "P1",
  df2_project_nr = "P2",
  reference_project = "P1",
  reference_medians = NULL
)

Arguments

df1

First dataset to be used for normalization (required).

df2

Second dataset to be used for normalization. Required for bridge and subset normalization.

overlapping_samples_df1

Character vector of samples to be used for the calculation of adjustment factors in df1 (required).

overlapping_samples_df2

Character vector of samples to be used for the calculation of adjustment factors in df2. Required for subset normalization.

df1_project_nr

Project name of first dataset (required).

df2_project_nr

Project name of second dataset. Required for bridge and subset normalization.

reference_project

Project to be used as reference project. Should be one of df1_project_nr and df2_project_nr. Required for bridge and subset normalization.

reference_medians

Dataset with columns "OlinkID" and "Reference_NPX". Required for reference median normalization.

Details

The function handles three different types of normalization:

The output dataset is df1 if reference median normalization, or df2 appended to df1 if bridge, subset or cross-product normalization. The output dataset contains all original columns from the original dataset(s), and the columns:

Value

Tibble or ArrowObject with the normalized dataset.

Examples



# prepare datasets
npx_df1 <- npx_data1 |>
  dplyr::mutate(
    Normalization = "Intensity"
  )
npx_df2 <- npx_data2 |>
  dplyr::mutate(
    Normalization = "Intensity"
  )

# bridge normalization

# overlapping samples - exclude control samples
overlap_samples <- intersect(x = npx_df1$SampleID,
                             y = npx_df2$SampleID) |>
  (\(x) x[!grepl("^CONTROL_SAMPLE", x)])()

# normalize
olink_normalization(
  df1 = npx_df1,
  df2 = npx_df2,
  overlapping_samples_df1 = overlap_samples,
  df1_project_nr = "P1",
  df2_project_nr = "P2",
  reference_project = "P1"
)

# subset normalization

# find a suitable subset of samples from each dataset:
# exclude control samples
# exclude samples that do not pass QC
df1_samples <- npx_df1 |>
  dplyr::group_by(
    dplyr::pick(
      dplyr::all_of("SampleID")
    )
  )|>
  dplyr::filter(
    all(.data[["QC_Warning"]] == 'Pass')
  ) |>
  dplyr::ungroup() |>
  dplyr::filter(
    !grepl(pattern = "^CONTROL_SAMPLE", x = .data[["SampleID"]])
  ) |>
  dplyr::pull(
    .data[["SampleID"]]
  ) |>
  unique()
df2_samples <- npx_df2 |>
  dplyr::group_by(
    dplyr::pick(
      dplyr::all_of("SampleID")
    )
  )|>
  dplyr::filter(
    all(.data[["QC_Warning"]] == 'Pass')
  ) |>
  dplyr::ungroup() |>
  dplyr::filter(
    !grepl(pattern = "^CONTROL_SAMPLE", x = .data[["SampleID"]])
  ) |>
  dplyr::pull(
    .data[["SampleID"]]
  ) |>
  unique()

# select a subset of samples from each set from above
df1_subset <- sample(x = df1_samples, size = 16L)
df2_subset <- sample(x = df2_samples, size = 20L)

# normalize
olink_normalization(
  df1 = npx_df1,
  df2 = npx_df2,
  overlapping_samples_df1 = df1_subset,
  overlapping_samples_df2 = df2_subset,
  df1_project_nr = "P1",
  df2_project_nr = "P2",
  reference_project = "P1"
)

# special case of subset normalization using all samples
olink_normalization(
  df1 = npx_df1,
  df2 = npx_df2,
  overlapping_samples_df1 = df1_samples,
  overlapping_samples_df2 = df2_samples,
  df1_project_nr = "P1",
  df2_project_nr = "P2",
  reference_project = "P1"
)

# reference median normalization

# For the sake of this example, set the reference median to 1
ref_med_df <- npx_data1 |>
  dplyr::select(
    dplyr::all_of(
      c("OlinkID")
    )
  ) |>
  dplyr::distinct() |>
  dplyr::mutate(
    Reference_NPX = runif(n = dplyr::n(),
                          min = -1,
                          max = 1)
  )

# normalize
olink_normalization(
  df1 = npx_df1,
  overlapping_samples_df1 = df1_subset,
  reference_medians = ref_med_df
)

# cross-product normalization

# get reference samples
overlap_samples_product <- intersect(
  x = unique(OlinkAnalyze:::data_ht_small$SampleID),
  y = unique(OlinkAnalyze:::data_3k_small$SampleID)
) |>
  (\(.) .[!grepl("CONTROL", .)])()

# normalize
olink_normalization(
  df1 = OlinkAnalyze:::data_ht_small,
  df2 = OlinkAnalyze:::data_3k_small,
  overlapping_samples_df1 = overlap_samples_product,
  df1_project_nr = "proj_ht",
  df2_project_nr = "proj_3k",
  reference_project = "proj_ht"
)



[Package OlinkAnalyze version 4.0.2 Index]