From d9f48f73ce3e639d77cd5f5eff0c2ae63b5578f6 Mon Sep 17 00:00:00 2001
From: Jonathan Kitt <jonathan.kitt@inrae.fr>
Date: Mon, 3 Feb 2025 14:18:37 +0100
Subject: [PATCH 1/2] delete redundant functions()

---
 R/add_snps.R       | 38 ----------------------------
 R/standardise_lr.R | 59 -------------------------------------------
 R/standardize_lr.R | 62 ----------------------------------------------
 3 files changed, 159 deletions(-)
 delete mode 100644 R/add_snps.R
 delete mode 100644 R/standardise_lr.R
 delete mode 100644 R/standardize_lr.R

diff --git a/R/add_snps.R b/R/add_snps.R
deleted file mode 100644
index df0a2d9..0000000
--- a/R/add_snps.R
+++ /dev/null
@@ -1,38 +0,0 @@
-#' Add SNPs data
-#'
-#' @param lr lr values
-#' @param path_to_snps file containing the list of SNPs (three columns needed : 'probeset_id', 'chromosome', 'position')
-#' @param rm_unknown whether or not to remove rows when 'chromosome' or 'position' is missing (defaults to TRUE)
-#'
-#' @return a [tibble()]
-#' @export
-#'
-#' @examples
-#' \dontrun{
-#' add_snps(lr, path_to_snps, rm_unknown = TRUE)
-#' }
-
-add_snps <- function(lr, path_to_snps, rm_unknown = TRUE) {
-
-  # Set NULL variables
-
-  file_name <- probeset_id <- NULL
-
-  snps <- readr::read_delim(path_to_snps) |>
-    dplyr::select(probeset_id, chromosome, position)
-
-  lr <- lr |>
-    dplyr::left_join(snps) |>
-    dplyr::select(file_name, chromosome, probeset_id, position,
-                  dplyr::everything()) |>
-    dplyr::arrange(file_name, chromosome, position)
-
-  if (rm_unknown == TRUE) {
-    lr <- lr |>
-      tidyr::drop_na(chromosome, position)
-  }
-
-  # Assign to Global Environment
-  assign(x = "lr", value = lr, pos = ".GlobalEnv")
-
-}
diff --git a/R/standardise_lr.R b/R/standardise_lr.R
deleted file mode 100644
index 040110f..0000000
--- a/R/standardise_lr.R
+++ /dev/null
@@ -1,59 +0,0 @@
-#' Standardise lr values
-#'
-#' @param lr lr values
-#' @param method how to perform standardisation ("ref", "mean", or "both", defaults to "mean")
-#' @param ref name of the sample to use for standardisation (must be defined if method is "ref" or "both", defaults to NULL)
-#'
-#' @return a [tibble()]
-#' @export
-#'
-#' @examples
-#' \dontrun{
-#' standardise_lr(lr, method = "mean", ref = NULL)
-#' }
-
-standardise_lr <- function(lr, method = "mean", ref = NULL) {
-
-  # Set NULL variables
-
-  file_name <- probeset_id <- NULL
-
-  # If method = "ref" or "both", check if ref is defined
-  if(method  %in% c("ref", "both")){
-    print("Checking if reference sample is defined")
-    if(is.null(ref)){
-      stop("Reference sample is not defined")
-    }
-    if(!is.null(ref) & !ref %in% unique(lr$file_name)) {
-      stop("Reference sample doesn't exist in the list of samples")
-    }
-  }
-
-  # If method = "mean", calculate lr_st_mean
-  if (method == "mean") {
-    lr <- lr |>
-      dplyr::mutate(lr_mean = mean(lr, na.rm = TRUE),
-                    .by = probeset_id) |>
-      dplyr::mutate(lr_st_mean = lr - lr_mean) |>
-      dplyr::select(-lr_mean)
-  }
-
-  # If method = "ref", calculate lr_st_ref
-  if (method == "ref") {
-    lr <- lr |>
-      dplyr::mutate(lr_st_ref = lr - lr[file_name == ref],
-                    .by = probeset_id)
-  }
-
-  # If method = "both", calculate lr_st_ref & lr_st_mean
-  if (method == "both") {
-    lr <- lr |>
-      dplyr::mutate(lr_st_ref = lr - lr[file_name == ref],
-                    lr_mean = mean(lr, na.rm = TRUE),
-                    .by = probeset_id) |>
-      dplyr::mutate(lr_st_mean = lr - lr_mean,
-                    .before = lr_st_ref) |>
-      dplyr::select(-lr_mean)
-  }
-
-}
diff --git a/R/standardize_lr.R b/R/standardize_lr.R
deleted file mode 100644
index fa780b3..0000000
--- a/R/standardize_lr.R
+++ /dev/null
@@ -1,62 +0,0 @@
-#' Standardize lr values
-#'
-#' @param lr lr values
-#' @param method how to perform standardization ("ref", "mean", or "both", defaults to "mean")
-#' @param ref name of the sample to use for standardization (must be defined if method is "ref" or "both", defaults to NULL)
-#'
-#' @return a [tibble()]
-#' @export
-#'
-#' @examples
-#' \dontrun{
-#' standardize_lr(lr, method = "mean", ref = NULL)
-#' }
-
-standardize_lr <- function(lr, method = "mean", ref = NULL) {
-
-  # Set NULL variables
-
-  file_name <- probeset_id <- NULL
-
-  # If method = "ref" or "both", check if ref is defined
-  if(method  %in% c("ref", "both")){
-    print("Checking if reference sample is defined")
-    if(is.null(ref)){
-      stop("Reference sample is not defined")
-    }
-    if(!is.null(ref) & !ref %in% unique(lr$file_name)) {
-      stop("Reference sample doesn't exist in the list of samples")
-    }
-  }
-
-  # If method = "mean", calculate lr_st_mean
-  if (method == "mean") {
-    lr <- lr |>
-      dplyr::mutate(lr_mean = mean(lr, na.rm = TRUE),
-                    .by = probeset_id) |>
-      dplyr::mutate(lr_st_mean = lr - lr_mean) |>
-      dplyr::select(-lr_mean)
-  }
-
-  # If method = "ref", calculate lr_st_ref
-  if (method == "ref") {
-    lr <- lr |>
-      dplyr::mutate(lr_st_ref = lr - lr[file_name == ref],
-                    .by = probeset_id)
-  }
-
-  # If method = "both", calculate lr_st_ref & lr_st_mean
-  if (method == "both") {
-    lr <- lr |>
-      dplyr::mutate(lr_st_ref = lr - lr[file_name == ref],
-                    lr_mean = mean(lr, na.rm = TRUE),
-                    .by = probeset_id) |>
-      dplyr::mutate(lr_st_mean = lr - lr_mean,
-                    .before = lr_st_ref) |>
-      dplyr::select(-lr_mean)
-  }
-
-  # Assign to Global Environment
-  assign(x = "lr", value = lr, pos = ".GlobalEnv")
-
-}
-- 
GitLab


From ddda846a7943e83986fd22eb43fa25667c855422 Mon Sep 17 00:00:00 2001
From: Jonathan Kitt <jonathan.kitt@inrae.fr>
Date: Mon, 3 Feb 2025 14:54:58 +0100
Subject: [PATCH 2/2] delete redudant functions and add count_otv parameter to
 detect_cpts()

---
 NAMESPACE             |  3 ---
 R/detect_cpts.R       | 38 +++++++++++++++++++++++++++++++++++---
 man/add_snps.Rd       | 26 --------------------------
 man/detect_cpts.Rd    |  7 ++++---
 man/standardise_lr.Rd | 26 --------------------------
 man/standardize_lr.Rd | 26 --------------------------
 6 files changed, 39 insertions(+), 87 deletions(-)
 delete mode 100644 man/add_snps.Rd
 delete mode 100644 man/standardise_lr.Rd
 delete mode 100644 man/standardize_lr.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 86f2d28..337771f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,11 +1,8 @@
 # Generated by roxygen2: do not edit by hand
 
-export(add_snps)
 export(calculate_lr)
 export(detect_cpts)
 export(read_axiom)
-export(standardise_lr)
-export(standardize_lr)
 import(changepoint)
 import(dplyr)
 import(plyranges)
diff --git a/R/detect_cpts.R b/R/detect_cpts.R
index 12c86ad..f07a2c2 100644
--- a/R/detect_cpts.R
+++ b/R/detect_cpts.R
@@ -10,7 +10,7 @@
 #' @param pen_value penalty value (defaults  to 0)
 #' @param max_cpts maximum number of  changepoints (defaults to 1 for AMOC)
 #' @param min_length minimum segment length, i.e. number of markers (defaults to 1)
-#' @param count_otvs whether or not to add number of OTVs in detected segments (defaults to TRUE)
+#' @param count_otv whether or not to add number of OTVs in detected segments (defaults to TRUE)
 #'
 #' @import dplyr
 #' @import changepoint
@@ -22,13 +22,13 @@
 #' @examples
 #' \dontrun{
 #' detect_cpts(lr, file_name = NULL, chromosome = NULL, var = "lr_st_mean", calc = "mean", method = "AMOC",
-#' penalty = "MBIC", pen_value = 0, max_cpts = 1, min_length = 1 count_otvs = TRUE)
+#' penalty = "MBIC", pen_value = 0, max_cpts = 1, min_length = 1 count_otv = TRUE)
 #' }
 
 detect_cpts <- function(lr, file_name = NULL, chromosome = NULL,
                         var = "lr_st_mean", calc = "mean",
                         penalty = "MBIC", pen_value = 0,
-                        method = "AMOC", max_cpts = 3, min_length = 1) {
+                        method = "AMOC", max_cpts = 3, min_length = 1, count_otv = TRUE) {
 
   # Set NULL variables
 
@@ -200,6 +200,38 @@ detect_cpts <- function(lr, file_name = NULL, chromosome = NULL,
 
   }
 
+  if (count_otv == TRUE) {
+
+    ind <- unique(cpts$file_name)
+
+    olap_list <- list()
+
+    for (i in ind) {
+
+      seg <- cpts |>
+        dplyr::filter(file_name == i)
+
+      seg_gr <- seg |>
+        dplyr::select(seqnames = chromosome, start, end) |>
+        plyranges::as_granges()
+
+      snps_gr <- lr_sub |>
+        dplyr::filter(file_name == i) |>
+        dplyr::select(seqnames = chromosome, start = position, end = position, genotyping_call) |>
+        plyranges::as_granges()
+
+      otvs_gr <- snps_gr |>
+        plyranges::filter(genotyping_call == -2)
+
+      olap_list[[i]] <- seg |>
+        dplyr::mutate(count_snps = plyranges::count_overlaps(seg_gr, snps_gr),
+                      count_otvs = plyranges::count_overlaps(seg_gr, otvs_gr))
+    }
+
+    cpts <- dplyr::bind_rows(olap_list)
+
+  }
+
   return(cpts)
 
 }
diff --git a/man/add_snps.Rd b/man/add_snps.Rd
deleted file mode 100644
index 4b10d75..0000000
--- a/man/add_snps.Rd
+++ /dev/null
@@ -1,26 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/add_snps.R
-\name{add_snps}
-\alias{add_snps}
-\title{Add SNPs data}
-\usage{
-add_snps(lr, path_to_snps, rm_unknown = TRUE)
-}
-\arguments{
-\item{lr}{lr values}
-
-\item{path_to_snps}{file containing the list of SNPs (three columns needed : 'probeset_id', 'chromosome', 'position')}
-
-\item{rm_unknown}{whether or not to remove rows when 'chromosome' or 'position' is missing (defaults to TRUE)}
-}
-\value{
-a \code{\link[=tibble]{tibble()}}
-}
-\description{
-Add SNPs data
-}
-\examples{
-\dontrun{
-add_snps(lr, path_to_snps, rm_unknown = TRUE)
-}
-}
diff --git a/man/detect_cpts.Rd b/man/detect_cpts.Rd
index 776d89e..c9e2434 100644
--- a/man/detect_cpts.Rd
+++ b/man/detect_cpts.Rd
@@ -14,7 +14,8 @@ detect_cpts(
   pen_value = 0,
   method = "AMOC",
   max_cpts = 3,
-  min_length = 1
+  min_length = 1,
+  count_otv = TRUE
 )
 }
 \arguments{
@@ -38,7 +39,7 @@ detect_cpts(
 
 \item{min_length}{minimum segment length, i.e. number of markers (defaults to 1)}
 
-\item{count_otvs}{whether or not to add number of OTVs in detected segments (defaults to TRUE)}
+\item{count_otv}{whether or not to add number of OTVs in detected segments (defaults to TRUE)}
 }
 \value{
 a \code{\link[=tibble]{tibble()}}
@@ -49,6 +50,6 @@ Detect changepoints in signal intensity data
 \examples{
 \dontrun{
 detect_cpts(lr, file_name = NULL, chromosome = NULL, var = "lr_st_mean", calc = "mean", method = "AMOC",
-penalty = "MBIC", pen_value = 0, max_cpts = 1, min_length = 1 count_otvs = TRUE)
+penalty = "MBIC", pen_value = 0, max_cpts = 1, min_length = 1 count_otv = TRUE)
 }
 }
diff --git a/man/standardise_lr.Rd b/man/standardise_lr.Rd
deleted file mode 100644
index 48f2038..0000000
--- a/man/standardise_lr.Rd
+++ /dev/null
@@ -1,26 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/standardise_lr.R
-\name{standardise_lr}
-\alias{standardise_lr}
-\title{Standardise lr values}
-\usage{
-standardise_lr(lr, method = "mean", ref = NULL)
-}
-\arguments{
-\item{lr}{lr values}
-
-\item{method}{how to perform standardisation ("ref", "mean", or "both", defaults to "mean")}
-
-\item{ref}{name of the sample to use for standardisation (must be defined if method is "ref" or "both", defaults to NULL)}
-}
-\value{
-a \code{\link[=tibble]{tibble()}}
-}
-\description{
-Standardise lr values
-}
-\examples{
-\dontrun{
-standardise_lr(lr, method = "mean", ref = NULL)
-}
-}
diff --git a/man/standardize_lr.Rd b/man/standardize_lr.Rd
deleted file mode 100644
index 8792558..0000000
--- a/man/standardize_lr.Rd
+++ /dev/null
@@ -1,26 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/standardize_lr.R
-\name{standardize_lr}
-\alias{standardize_lr}
-\title{Standardize lr values}
-\usage{
-standardize_lr(lr, method = "mean", ref = NULL)
-}
-\arguments{
-\item{lr}{lr values}
-
-\item{method}{how to perform standardization ("ref", "mean", or "both", defaults to "mean")}
-
-\item{ref}{name of the sample to use for standardization (must be defined if method is "ref" or "both", defaults to NULL)}
-}
-\value{
-a \code{\link[=tibble]{tibble()}}
-}
-\description{
-Standardize lr values
-}
-\examples{
-\dontrun{
-standardize_lr(lr, method = "mean", ref = NULL)
-}
-}
-- 
GitLab