From cf9e474a8f12ad7f86a53854b7d51d79b87dd6e6 Mon Sep 17 00:00:00 2001
From: benjaminfurman <benjamin.ls.furman@gmail.com>
Date: Fri, 22 Aug 2025 16:02:43 -0700
Subject: [PATCH 1/7] feat: add function for mode ploidy.

---
 R/ploidy.R                      | 30 ++++++++++++++++++++++++++++++
 R/utils.R                       | 17 +++++++++++++++++
 vignettes/various_functions.Rmd | 17 ++++++++++++++++-
 3 files changed, 63 insertions(+), 1 deletion(-)
diff --git a/R/ploidy.R b/R/ploidy.R
index b5f78b4..9334b08 100644
--- a/R/ploidy.R
+++ b/R/ploidy.R
@@ -40,3 +40,33 @@ weighted_ploidy <- function(
 
   return(sample_ploidy)
 }
+
+
+#' find the mode CN per chromosome, then mode across the chromosomes
+#'
+#' Done per chromosome first so that large chromosomes don't dominate the
+#' result. It is critical that the input be even bin level data, or this makes
+#' no sense to do. Measures of the copy number states need to be done in evenly
+#' sized bins.
+#'
+#' @param bin_df dataframe of bin level data
+#' @param sample_col string column name identifying samples
+#' @param cn_col string column name for copy number states
+#' @param chrom_col string column name for chromosomes
+#' @return tibble/dataframe of results by cell_id/sample
+mode_ploidy <- function(
+    bin_df,
+    sample_col = "cell_id",
+    cn_col = "state",
+    chrom_col = "chr") {
+  bin_df |>
+    dplyr::group_by(.data[[sample_col]], .data[[chrom_col]]) |>
+    dplyr::summarise(
+      chr_mode = cust_mode(.data[[cn_col]])
+    ) |>
+    dplyr::group_by(.data[[sample_col]]) |>
+    dplyr::summarise(
+      mode_ploidy = cust_mode(chr_mode)
+    ) |>
+    dplyr::ungroup()
+}
diff --git a/R/utils.R b/R/utils.R
index 9f111ac..3fad3ae 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -223,3 +223,20 @@ make_cellid_matrix <- function(cell_df, name_col, val_col) {
   rownames(cell_mtx) <- dplyr::pull(cell_mtx_init, cell_id)
   return(cell_mtx)
 }
+
+
+#' find most common element in a vector
+#'
+#' For ties, just the first one in the list is returned.
+#'
+#' @param x vector of values
+#' @return most common element of input vector
+#' @export
+cust_mode <- function(x, na.rm = FALSE) {
+  if (na.rm) {
+    x <- x[!is.na(x)]
+  }
+
+  ux <- unique(x)
+  return(ux[which.max(tabulate(match(x, ux)))])
+}
diff --git a/vignettes/various_functions.Rmd b/vignettes/various_functions.Rmd
index 38ccd9c..4d11611 100644
--- a/vignettes/various_functions.Rmd
+++ b/vignettes/various_functions.Rmd
@@ -283,6 +283,9 @@ filt_reads <- ex_reads |>
 c(filt_reads = nrow(filt_reads), input_reads = nrow(ex_reads))
 ```
 
+<br>
+<br>
+<br>
 
 ## Ploidy of samples
 
@@ -294,7 +297,19 @@ We can get an idea of the ploidy of a sample by taking the mean of the copy numb
 This function does that:
 
 ```{r}
-weighted_ploidy(ex_segs) |>
+dlptools::weighted_ploidy(ex_segs) |>
+  dplyr::slice_head(n = 5)
+```
+
+<br>
+
+Alternative, if you want integers, is to get the mode ploidy. The should only be done for evenly sized bins, or the mode wouldn't make much sense.
+
+```{r}
+dlptools::mode_ploidy(
+  ex_reads,
+  sample_col = "cell_id"
+) |>
   dplyr::slice_head(n = 5)
 ```
 

From 35debbc2d92a9923195e6bed45814f7abb211417 Mon Sep 17 00:00:00 2001
From: benjaminfurman <benjamin.ls.furman@gmail.com>
Date: Mon, 25 Aug 2025 10:56:41 -0700
Subject: [PATCH 2/7] feat: mark CN seg state relative to ploidy.

---
 NAMESPACE                       |  2 ++
 R/cn_features.R                 | 52 +++++++++++++++++++++++++++++++++
 man/cust_mode.Rd                | 17 +++++++++++
 man/make_cellid_matrix.Rd       |  8 ++---
 man/mode_ploidy.Rd              | 31 ++++++++++++++++++++
 man/ploidy_relative_cn_state.Rd | 27 +++++++++++++++++
 6 files changed, 133 insertions(+), 4 deletions(-)
 create mode 100644 man/cust_mode.Rd
 create mode 100644 man/mode_ploidy.Rd
 create mode 100644 man/ploidy_relative_cn_state.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 443f0e5..1005166 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -22,6 +22,7 @@ export(compute_tip_sibling_distances)
 export(convert_dists_to_pairwise)
 export(convert_long_reads_to_wide)
 export(create_expected_bins)
+export(cust_mode)
 export(expand_length_to_bins)
 export(extract_sigminer_wang_features)
 export(extract_wu_features)
@@ -54,6 +55,7 @@ export(mark_bins_overlapping_centromeres)
 export(mark_mask_regions)
 export(medicc_profiles_to_foreground)
 export(pairwise_bin_difference)
+export(ploidy_relative_cn_state)
 export(plot_bg_state_highlight)
 export(plot_fg_state_highlight)
 export(plot_heatmap_of_tip_changes)
diff --git a/R/cn_features.R b/R/cn_features.R
index b284897..f5281fe 100644
--- a/R/cn_features.R
+++ b/R/cn_features.R
@@ -214,3 +214,55 @@ extract_sigminer_wang_features <- function(segs_df) {
 
   return(sm_tally$nmf_matrix)
 }
+
+
+#' Could changes of state relative to ploidy.
+#'
+#' Marks CN segments as a gain or loss, relative to the mode ploidy of the
+#' sample.
+#'
+#' @param segs_df a dataframe of CN segments
+#' @param annotate_input boolean. return input dataframe annotating each
+#' @param return_matrix boolean. Return a cell-by-feature matrix of counts.
+#' @return tibble/dataframe of counts
+#' @export
+ploidy_relative_cn_state <- function(
+    segs_df,
+    sample_col = "cell_id",
+    annotate_input = FALSE,
+    return_matrix = FALSE) {
+  sample_mode_ploidy <- segs_df |>
+    reads_to_segs() |>
+    mode_ploidy(sample_col = sample_col)
+
+  segs_df <- dplyr::left_join(
+    segs_df,
+    sample_mode_ploidy,
+    by = sample_col
+  )
+
+  segs_df <- segs_df |>
+    dplyr::mutate(
+      cn_v_ploidy = dplyr::case_when(
+        state < mode_ploidy ~ "ploidy-loss",
+        state > mode_ploidy ~ "ploidy-gain",
+        state == mode_ploidy ~ "ploidy-match"
+      )
+    )
+
+  if (annotate_input) {
+    return(segs_df)
+  }
+
+  feat_count <- segs_df |>
+    dplyr::group_by(.data[[sample_col]], .drop = FALSE) |>
+    dplyr::count(cn_v_ploidy) |>
+    dplyr::ungroup()
+
+  if (return_matrix) {
+    feat_mtx <- make_cellid_matrix(feat_count, "cn_v_ploidy", "n")
+    return(feat_mtx)
+  }
+
+  return(feat_count)
+}
diff --git a/man/cust_mode.Rd b/man/cust_mode.Rd
new file mode 100644
index 0000000..2df51e8
--- /dev/null
+++ b/man/cust_mode.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{cust_mode}
+\alias{cust_mode}
+\title{find most common element in a vector}
+\usage{
+cust_mode(x, na.rm = FALSE)
+}
+\arguments{
+\item{x}{vector of values}
+}
+\value{
+most common element of input vector
+}
+\description{
+For ties, just the first one in the list is returned.
+}
diff --git a/man/make_cellid_matrix.Rd b/man/make_cellid_matrix.Rd
index 87aa7c2..9809f96 100644
--- a/man/make_cellid_matrix.Rd
+++ b/man/make_cellid_matrix.Rd
@@ -25,12 +25,12 @@ can be pivoted out.
 }
 \examples{
 cell_df <- data.frame(
-   cell_id = c("cell_1", "cell_1", "cell_1"),
-   some_col = c("A", "B", "C"),
-   some_val = 1:3
+  cell_id = c("cell_1", "cell_1", "cell_1"),
+  some_col = c("A", "B", "C"),
+  some_val = 1:3
 )
 
-make_cellid_matrix(cell_df, name_col="some_col", val_col="some_val")
+make_cellid_matrix(cell_df, name_col = "some_col", val_col = "some_val")
 
 #       A B C
 # cell_1 1 2 3
diff --git a/man/mode_ploidy.Rd b/man/mode_ploidy.Rd
new file mode 100644
index 0000000..59dbbca
--- /dev/null
+++ b/man/mode_ploidy.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ploidy.R
+\name{mode_ploidy}
+\alias{mode_ploidy}
+\title{find the mode CN per chromosome, then mode across the chromosomes}
+\usage{
+mode_ploidy(
+  bin_df,
+  sample_col = "cell_id",
+  cn_col = "state",
+  chrom_col = "chr"
+)
+}
+\arguments{
+\item{bin_df}{dataframe of bin level data}
+
+\item{sample_col}{string column name identifying samples}
+
+\item{cn_col}{string column name for copy number states}
+
+\item{chrom_col}{string column name for chromosomes}
+}
+\value{
+tibble/dataframe of results by cell_id/sample
+}
+\description{
+Done per chromosome first so that large chromosomes don't dominate the
+result. It is critical that the input be even bin level data, or this makes
+no sense to do. Measures of the copy number states need to be done in evenly
+sized bins.
+}
diff --git a/man/ploidy_relative_cn_state.Rd b/man/ploidy_relative_cn_state.Rd
new file mode 100644
index 0000000..67fd481
--- /dev/null
+++ b/man/ploidy_relative_cn_state.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/cn_features.R
+\name{ploidy_relative_cn_state}
+\alias{ploidy_relative_cn_state}
+\title{Could changes of state relative to ploidy.}
+\usage{
+ploidy_relative_cn_state(
+  segs_df,
+  sample_col = "cell_id",
+  annotate_input = FALSE,
+  return_matrix = FALSE
+)
+}
+\arguments{
+\item{segs_df}{a dataframe of CN segments}
+
+\item{annotate_input}{boolean. return input dataframe annotating each}
+
+\item{return_matrix}{boolean. Return a cell-by-feature matrix of counts.}
+}
+\value{
+tibble/dataframe of counts
+}
+\description{
+Marks CN segments as a gain or loss, relative to the mode ploidy of the
+sample.
+}

From 5cda4de99d1365d92b0e05aba665f7d0acb282eb Mon Sep 17 00:00:00 2001
From: benjaminfurman <benjamin.ls.furman@gmail.com>
Date: Wed, 27 Aug 2025 14:22:14 -0700
Subject: [PATCH 3/7] feat: add segment type marking.

---
 NAMESPACE                                |   3 +
 R/bin_filling.R                          |  30 +-
 R/centromere_marking.R                   | 136 -------
 R/chromosome_layouts.R                   | 433 +++++++++++++++++++++++
 R/cn_features.R                          |   2 +-
 R/utils.R                                |   8 +
 inst/extdata/hg19_gap.txt.gz             | Bin 0 -> 7423 bytes
 inst/extdata/hg38_gap.txt.gz             | Bin 0 -> 12368 bytes
 man/add_centromere_locations.Rd          |   2 +-
 man/add_chromosome_length.Rd             |  18 +
 man/add_telomere_positions.Rd            |  23 ++
 man/fill_state_from_neighbours.Rd        |   2 +-
 man/import_telos_file.Rd                 |  28 ++
 man/is_chr_used_in_chroms.Rd             |  15 +
 man/load_chrom_info_file.Rd              |   2 +-
 man/load_ucsc_centromeres.Rd             |   2 +-
 man/mark_bins_overlapping_centromeres.Rd |   2 +-
 man/mark_segs_chromosome_span.Rd         |  67 ++++
 vignettes/feature_extraction.Rmd         |  22 ++
 19 files changed, 624 insertions(+), 171 deletions(-)
 delete mode 100644 R/centromere_marking.R
 create mode 100644 R/chromosome_layouts.R
 create mode 100644 inst/extdata/hg19_gap.txt.gz
 create mode 100644 inst/extdata/hg38_gap.txt.gz
 create mode 100644 man/add_chromosome_length.Rd
 create mode 100644 man/add_telomere_positions.Rd
 create mode 100644 man/import_telos_file.Rd
 create mode 100644 man/is_chr_used_in_chroms.Rd
 create mode 100644 man/mark_segs_chromosome_span.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 1005166..bded12f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -10,6 +10,7 @@ export(RAW_COPY_COLORS)
 export(STATE_COLORS)
 export(add_centromere_locations)
 export(add_missing_bins_for_cells)
+export(add_telomere_positions)
 export(add_tip_ancestors_to_df)
 export(basic_tile_plot)
 export(cell_cn_profile)
@@ -42,6 +43,7 @@ export(get_tips_that_avoid_redundant_comps)
 export(import_annotations_df)
 export(import_clones)
 export(import_dlp_files)
+export(import_telos_file)
 export(import_tree)
 export(library_from_cell)
 export(load_chrom_info_file)
@@ -53,6 +55,7 @@ export(make_tree_plot_obj)
 export(map_states_to_letters)
 export(mark_bins_overlapping_centromeres)
 export(mark_mask_regions)
+export(mark_segs_chromosome_span)
 export(medicc_profiles_to_foreground)
 export(pairwise_bin_difference)
 export(ploidy_relative_cn_state)
diff --git a/R/bin_filling.R b/R/bin_filling.R
index 9b03eea..f47fa65 100644
--- a/R/bin_filling.R
+++ b/R/bin_filling.R
@@ -1,31 +1,3 @@
-#' loading UCSC chromosome length files
-#' @param version default "hg19", can also load "hg38"
-#' @return tibgble of chromosome, total length, etc.
-#' @export
-load_chrom_info_file <- function(version = c("hg19", "hg38")) {
-  version <- match.arg(version)
-
-  chrom_files <- list(
-    hg19 = "hg19_chromInfo.txt.gz",
-    hg38 = "hg19_chromInfo.txt.gz"
-  )
-
-  chrom_info <- get_package_file_path(chrom_files[version]) |>
-    vroom::vroom(
-      col_names = c("chr", "total_length", "misc"),
-      show_col_types = FALSE
-    ) |>
-    dplyr::filter(
-      # remove the unnecessary chromosomes
-      stringr::str_detect(chr, "_|M", negate = TRUE)
-    ) |>
-    dplyr::mutate(
-      chr = stringr::str_replace(chr, "chr", "")
-    )
-
-  return(chrom_info)
-}
-
 #' For a given length, create bins of a given size
 #'
 #' e.g., length = 10, bin = 5, will get bins: 1-5, 6-10
@@ -128,7 +100,7 @@ add_missing_bins_for_cells <- function(
 #' other states to fill in.
 #'
 #' @param state_df dataframe of state data with NAs for some bins that need
-#' to be filled in. E.g., after dlptools::running add_missing_bins_for_cells()
+#' to be filled in. E.g., after dlptools::add_missing_bins_for_cells()
 #' @export
 #' @importFrom rlang .data
 fill_state_from_neighbours <- function(state_df, cols_to_fill = c("state")) {
diff --git a/R/centromere_marking.R b/R/centromere_marking.R
deleted file mode 100644
index cb505fa..0000000
--- a/R/centromere_marking.R
+++ /dev/null
@@ -1,136 +0,0 @@
-#' add centromere information to reads by chromosome
-#'
-#' See dlptools::read_and_prep_ucsg_cenrtomeres() for details of file origins.
-#'
-#' @param centro_file NULL or string to the path if you download yourself
-#' @param hg19 boolean to target hg19 for loading
-#' @param hg38 boolean to target hg38 for loading
-#' @return input table with centromere information added by chromosome
-#' @export
-add_centromere_locations <- function(
-    reads_df, centro_file = NULL, version = c("hg19", "hg38")) {
-  version_choice <- match.arg(version)
-
-  centros <- load_ucsc_centromeres(
-    centro_file = centro_file, version = version_choice
-  )
-
-  reads_df <- dplyr::left_join(
-    reads_df,
-    centros,
-    by = dplyr::join_by(chr == chrom)
-  )
-
-  return(reads_df)
-}
-
-#' add boolean if bin overlaps with a centromere.
-#'
-#' Can optionally specify a padding to mark locations of bins as being close
-#' enough to a centromere. Often bins near centromeres are corrupt in their
-#' state calls. In the past, we have filtered within 3 Mb of centromeres.
-#'
-#' IMPORTANT! This adds the padding to each side of the centromere. So if you
-#' specify a padding of 3 Mb, it will be within 3 Mb of the start and 3 Mb of
-#' the end of the centromere.
-#'
-#' @param reads_df tibble of read data
-#' @param padding int of number of BP to add to each side of the centromere
-#' @param bin_start_col Default: start. column name of the start of bins.
-#' @param bin_end_col Default: end column name of the end of bins.
-#' @param version default 'hg19', or choose 'hg38' for locations of centromeres.
-#' @return input table, but with a boolean 'within_centro' column added (and
-#' potentially other centromere information columns, if needed)
-#' @export
-mark_bins_overlapping_centromeres <- function(
-    reads_df,
-    padding = 0,
-    bin_start_col = "start",
-    bin_end_col = "end",
-    version = c("hg19", "hg38")) {
-  version_choice <- match.arg(version)
-  columns_check <- all(c("centro_start", "centro_end") %in% colnames(reads_df))
-
-  if (!columns_check) {
-    warning(paste0(
-      "centromere locations not present, adding locations for ",
-      version_choice,
-      ". If that is not correct, set version to 'hg38')."
-    ))
-
-    reads_df <- add_centromere_locations(reads_df = reads_df)
-  }
-
-  reads_df <- reads_df |>
-    dplyr::mutate(
-      centromere_padding = padding,
-      overlaps_centro = (
-        .data[[bin_start_col]] <= centro_end + padding &
-          .data[[bin_end_col]] >= centro_start - padding
-      )
-    )
-
-  return(reads_df)
-}
-
-
-#' load and prep default centromerefiles from UCSC
-#'
-#' Cytoband files originally obtained from UCSC golden path genomes and parsed
-#' to collect the total centromere spans.
-#'
-#' hg19:
-#' https://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/cytoBand.txt.gz
-#' hg38:
-#' https://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/cytoBand.txt.gz
-#'
-#' TODO: can I stash the file if it's already loaded once?
-#'
-#' @param centro_file NULL or string to the path if you download yourself
-#' @param hg19 boolean to target hg19 for loading
-#' @param hg38 boolean to target hg38 for loading
-#' @return tibble of parsed centromere spans
-#' @export
-load_ucsc_centromeres <- function(
-    version = c("hg19", "hg38"),
-    centro_file = NULL) {
-  version_choice <- match.arg(version)
-
-  if (is.null(centro_file)) {
-    default_centro_files <- c(
-      # https://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/cytoBand.txt.gz
-      hg19 = "hg19_cytoBand.txt.gz",
-      # https://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/cytoBand.txt.gz
-      hg38 = "hg38_cytoBand.txt.gz"
-    )
-
-    centro_file <- get_package_file_path(default_centro_files[version_choice])
-  }
-
-  # import file and filter to features of interest
-  centros <- vroom::vroom(
-    centro_file,
-    col_names = c("chrom", "start", "end", "arm_full_name", "feat"),
-    show_col_types = FALSE
-  ) |>
-    dplyr::filter(feat == "acen") |>
-    dplyr::mutate(
-      # removes the 'chr' from the chromosome name, e.g., chr1 -> 1
-      chrom = stringr::str_extract(chrom, "([A-Z]|[0-9]+)")
-    )
-  centros <- centros |>
-    dplyr::group_by(chrom) |>
-    dplyr::mutate(
-      centro_start = min(start),
-      centro_end = max(end),
-      centro_span = centro_end - centro_start,
-      arm = stringr::str_extract(arm_full_name, "[a-z]")
-    )
-
-  # pivot out to one line per chromosome of centromere measurements
-  centros <- centros |>
-    dplyr::select(-c(arm_full_name, feat)) |>
-    tidyr::pivot_wider(names_from = arm, values_from = c(start, end))
-
-  return(centros)
-}
diff --git a/R/chromosome_layouts.R b/R/chromosome_layouts.R
new file mode 100644
index 0000000..b67aa44
--- /dev/null
+++ b/R/chromosome_layouts.R
@@ -0,0 +1,433 @@
+#' loading UCSC chromosome length files
+#' @param version default "hg19", can also load "hg38"
+#' @return tibgble of chromosome, total length, etc.
+#' @export
+load_chrom_info_file <- function(version = c("hg19", "hg38")) {
+  version <- match.arg(version)
+
+  chrom_files <- list(
+    hg19 = "hg19_chromInfo.txt.gz",
+    hg38 = "hg38_chromInfo.txt.gz"
+  )
+
+  chrom_info <- get_package_file_path(chrom_files[version]) |>
+    vroom::vroom(
+      col_names = c("chr", "total_length", "misc"),
+      show_col_types = FALSE
+    ) |>
+    dplyr::filter(
+      # remove the unnecessary chromosomes
+      stringr::str_detect(chr, "_|M", negate = TRUE)
+    ) |>
+    dplyr::mutate(
+      chr = stringr::str_replace(chr, "chr", "")
+    )
+
+  return(chrom_info)
+}
+
+#' add chromosome lengths to a cn dataframe
+#'
+#' Handles chromosomes specified with "chr#" or just #/X/Y.
+#'
+#' @param cn_df dataframe of copy number information
+#' @param version string. hg19 (default) or hg38
+#' @param chrom_col string. name of column with chromosome information.
+add_chromosome_length <- function(cn_df, version = c("hg19", "hg38"), chrom_col = "chr") {
+  chrom_info <- load_chrom_info_file(version = version)
+
+  if (is_chr_used_in_chroms(cn_df[[chrom_col]])) {
+    chrom_info <- dplyr::mutate(chrom_info, chr = stringr::str_c("chr", chr))
+  }
+
+  cn_df <- dplyr::left_join(
+    cn_df,
+    chrom_info,
+    by = dplyr::join_by({{ chrom_col }} == chr)
+  )
+
+  return(cn_df)
+}
+
+
+#' add centromere information to reads by chromosome
+#'
+#' See dlptools::read_and_prep_ucsg_cenrtomeres() for details of file origins.
+#'
+#' @param centro_file NULL or string to the path if you download yourself
+#' @param hg19 boolean to target hg19 for loading
+#' @param hg38 boolean to target hg38 for loading
+#' @return input table with centromere information added by chromosome
+#' @export
+add_centromere_locations <- function(
+    reads_df, centro_file = NULL, version = c("hg19", "hg38")) {
+  version_choice <- match.arg(version)
+
+  centros <- load_ucsc_centromeres(
+    centro_file = centro_file, version = version_choice
+  )
+
+  reads_df <- dplyr::left_join(
+    reads_df,
+    centros,
+    by = dplyr::join_by(chr == chrom)
+  )
+
+  return(reads_df)
+}
+
+#' add boolean if bin overlaps with a centromere.
+#'
+#' Can optionally specify a padding to mark locations of bins as being close
+#' enough to a centromere. Often bins near centromeres are corrupt in their
+#' state calls. In the past, we have filtered within 3 Mb of centromeres.
+#'
+#' IMPORTANT! This adds the padding to each side of the centromere. So if you
+#' specify a padding of 3 Mb, it will be within 3 Mb of the start and 3 Mb of
+#' the end of the centromere.
+#'
+#' @param reads_df tibble of read data
+#' @param padding int of number of BP to add to each side of the centromere
+#' @param bin_start_col Default: start. column name of the start of bins.
+#' @param bin_end_col Default: end column name of the end of bins.
+#' @param version default 'hg19', or choose 'hg38' for locations of centromeres.
+#' @return input table, but with a boolean 'within_centro' column added (and
+#' potentially other centromere information columns, if needed)
+#' @export
+mark_bins_overlapping_centromeres <- function(
+    reads_df,
+    padding = 0,
+    bin_start_col = "start",
+    bin_end_col = "end",
+    version = c("hg19", "hg38")) {
+  version_choice <- match.arg(version)
+  columns_check <- all(c("centro_start", "centro_end") %in% colnames(reads_df))
+
+  if (!columns_check) {
+    warning(paste0(
+      "centromere locations not present, adding locations for ",
+      version_choice,
+      ". If that is not correct, set version to 'hg38')."
+    ))
+
+    reads_df <- add_centromere_locations(reads_df = reads_df)
+  }
+
+  reads_df <- reads_df |>
+    dplyr::mutate(
+      centromere_padding = padding,
+      overlaps_centro = (
+        .data[[bin_start_col]] <= centro_end + padding &
+          .data[[bin_end_col]] >= centro_start - padding
+      )
+    )
+
+  return(reads_df)
+}
+
+
+#' load and prep default centromerefiles from UCSC
+#'
+#' Cytoband files originally obtained from UCSC golden path genomes and parsed
+#' to collect the total centromere spans.
+#'
+#' hg19:
+#' https://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/cytoBand.txt.gz
+#' hg38:
+#' https://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/cytoBand.txt.gz
+#'
+#' TODO: can I stash the file if it's already loaded once?
+#'
+#' @param centro_file NULL or string to the path if you download yourself
+#' @param hg19 boolean to target hg19 for loading
+#' @param hg38 boolean to target hg38 for loading
+#' @return tibble of parsed centromere spans
+#' @export
+load_ucsc_centromeres <- function(
+    version = c("hg19", "hg38"),
+    centro_file = NULL) {
+  version_choice <- match.arg(version)
+
+  if (is.null(centro_file)) {
+    default_centro_files <- c(
+      # https://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/cytoBand.txt.gz
+      hg19 = "hg19_cytoBand.txt.gz",
+      # https://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/cytoBand.txt.gz
+      hg38 = "hg38_cytoBand.txt.gz"
+    )
+
+    centro_file <- get_package_file_path(default_centro_files[version_choice])
+  }
+
+  # import file and filter to features of interest
+  centros <- vroom::vroom(
+    centro_file,
+    col_names = c("chrom", "start", "end", "arm_full_name", "feat"),
+    show_col_types = FALSE
+  ) |>
+    dplyr::filter(feat == "acen") |>
+    dplyr::mutate(
+      # removes the 'chr' from the chromosome name, e.g., chr1 -> 1
+      chrom = stringr::str_extract(chrom, "([A-Z]|[0-9]+)")
+    )
+  centros <- centros |>
+    dplyr::group_by(chrom) |>
+    dplyr::mutate(
+      centro_start = min(start),
+      centro_end = max(end),
+      centro_span = centro_end - centro_start,
+      arm = stringr::str_extract(arm_full_name, "[a-z]")
+    )
+
+  # pivot out to one line per chromosome of centromere measurements
+  centros <- centros |>
+    dplyr::select(-c(arm_full_name, feat)) |>
+    tidyr::pivot_wider(names_from = arm, values_from = c(start, end))
+
+  return(centros)
+}
+
+
+#' load UCSC gap files for telomeres
+#'
+#' Honestly, this function is sort of pointless. The telomeres in both genome
+#' versions are marked as 10Kb for p and q on all chromosomes. Telomeres are
+#' variable and better estimates of their size exist, e.g.,
+#' https://www.nature.com/articles/s41467-024-48917-7
+#'
+#' This function just serves as a way to set up easy marking of where CNs occur
+#' on their respective chromosome.
+#'
+#' The only exception to the 10Kb size is chr17. It is not listed in the gap
+#' file for hg19. chr17 is known to have small chromosomes. Here, I set the p
+#' to 3000 Kb and q to 5000 Kb, inferred from the article referenced above.
+#'
+#' @param version string. hg19 (default) or hg38
+#' @return tibble of telomere information
+#' @export
+import_telos_file <- function(version = c("hg19", "hg38")) {
+  version_choice <- match.arg(version)
+
+  default_telo_files <- c(
+    # https://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/gap.txt.gz
+    hg19 = "hg19_gap.txt.gz",
+    # https://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/gap.txt.gz
+    hg38 = "hg38_gap.txt.gz"
+  )
+
+  telos_file <- get_package_file_path(default_telo_files[version_choice])
+
+
+  # col names by inspection and checking against here by looking up gap table
+  # data format description
+  # https://genome.ucsc.edu/cgi-bin/hgTables?hgsid=2018561924_ZAtdZC9CFEJw8BKiYeUjd2ImvhS7&clade=mammal&org=Human&db=hg19&hgta_group=allTables&hgta_track=hg19&hgta_table=gap&hgta_regionType=genome&position=chr7%3A155%2C592%2C223-155%2C605%2C565&hgta_outputType=primaryTable&hgta_outFileName=
+  telos <- readr::read_tsv(
+    telos_file,
+    col_names = c(
+      "bin", "chrom", "telostart", "teloend", "ix", "n", "size", "feat_type", "bridge"
+    )
+  ) |>
+    dplyr::filter(feat_type == "telomere") |>
+    dplyr::mutate(
+      chrom = dplyr::if_else(
+        # removes the 'chr' from the chromosome name, e.g., chr1 -> 1
+        # this is to facilitate matching with our reads/segs files that just
+        # have 0-0/X/Y
+        chrom %in% c("chrX", "chrY"),
+        stringr::str_extract(chrom, "[A-Z]"),
+        stringr::str_extract(chrom, "[0-9]+")
+      )
+    )
+
+  # now mark p and q telos
+  telos <- telos |>
+    dplyr::mutate(
+      arm = dplyr::if_else(telostart == 0, "p", "q")
+    ) |>
+    dplyr::select(arm, chr = chrom, telostart, teloend) |>
+    tidyr::pivot_wider(names_from = arm, values_from = c(telostart, teloend))
+
+  # special case for chr17
+  # for hg19, chr17 doesn't have telomeres listed, so need to fake some
+  # telomere data.
+  # marking p as 3000 and q as 5000 following:
+  # https://www.nature.com/articles/s41467-024-48917-7
+  chr17_total_len <- load_chrom_info_file() |>
+    dplyr::filter(chr == 17) |>
+    dplyr::pull(total_length)
+
+  chr17_telo <- data.frame(
+    chr = "17",
+    telostart_p = 0,
+    teloend_p = 3000,
+    telostart_q = chr17_total_len - 5000,
+    teloend_q = chr17_total_len
+  )
+
+  telos <- dplyr::bind_rows(telos, chr17_telo) |>
+    dplyr::relocate(chr, telostart_p, teloend_p)
+
+  return(telos)
+}
+
+#' add telomere positions to a CN df
+#'
+#' See [dlptools::import_telos_file()] but really just a convenience function.
+#' Telomere positions in the UCSC gap file are all just 10Kb, except chr17,
+#' which I set to 3Kb and 5Kb, again see linked function for details.
+#'
+#' @param cn_df dataframe of CN information, segments or read bins. Just needs
+#' a 'chr' column.
+#' @param version string. hg19 (default) or hg38, not that it makes a
+#' difference.
+#' @return input dataframe with new columns of telomere information
+#' @export
+add_telomere_positions <- function(cn_df, version = c("hg19", "hg38")) {
+  telos <- import_telos_file(version = version)
+
+  cn_df <- dplyr::left_join(cn_df, telos, by = "chr")
+
+  return(cn_df)
+}
+
+
+#' label CN segments based on relative chromosomal positions
+#'
+#' Uses centromere and telomere coordinates to label where a segment sits on a
+#' chromosome, relative to telomeres and centromeres.
+#'
+#' Possible categories are:
+#' 1. telomere bound (telo-bound) - segment touches a telomere
+#' 2. centromere bound (centro-bound) - segment touches or crosses the
+#' centromere
+#' 3. arm (arm) - segment spans a whole are (*with conditions)
+#' 4. whole chromosome (whole-chrom) - segment spans the entire chromosome
+#' (*with conditions)
+#' 5. intersitial (inert) - occuring within the chromosome, not touching the
+#' centromere, telomeres, and not big enough to be an entire arm.
+#'
+#' You can set a min_bound_distance which reflects how close a feature needs to
+#' be to be considered "touching". For example, we can considere a segment
+#' telomere bound if within traditional 1 DLP bin by setting this distance to
+#' 500k (default). This allows for some level of measurement error.
+#'
+#' Users can also set the proportion of the arm or whole chromosome a segment
+#' needs to span to be considered either category. Default is spaning 90% of
+#' either feature. Meaning, if the segment (end - start)/arm_length is at least
+#' 90% of the arm_length, the segment is considered an "arm" spanning segment.
+#'
+#' This input dataframe requires several columns to be pregenerated. Users need
+#' to run [dlptools::add_chromosome_length()],
+#' [dlptools::add_centromere_locations()], and
+#' [dlptools::add_telomere_positions()]. See the the [features vignette](https://molonc.github.io/dlptools/articles/feature_extraction.html) for more
+#' details
+#'
+#' @param segs_df dataframe of CN segments
+#' @param min_bound_distance integer. Distance to adjacent feature to be
+#' considered associated with that feature.
+#' @param min_span_of_arm float. Proportion of arm to cover to be considered an
+#' arm segment.
+#' @param min_span_of_chrom float. Proportion of the chromosome to cover to be
+#' considered a whole chromosome segment.
+#' @param acro_fix_whole_chrom. boolean. Whether to reset acrocentric
+#' chromosome CN segments to "whole-chrom" if they span futher than the Q arm.
+#' Honestly, probably not useful
+#' @return input dataframe, annotated with segment scale information. Primary
+#' column of interest being seg_span_event.
+#' @export
+mark_segs_chromosome_span <- function(
+    segs_df,
+    min_bound_distance = 5e5, # given scale of DLP, this should probably be 1 bin width, at least
+    min_span_of_chrom = 0.9,
+    min_span_of_arm = 0.9,
+    acro_fix_whole_chrom = FALSE) {
+  required_cols <- c("teloend_p", "telostart_q", "centro_start", "centro_end", "total_length")
+  if (!all(required_cols %in% colnames(segs_df))) {
+    stop(
+      paste(
+        "required columns missing! Need:",
+        paste(required_cols, collapse = ", "),
+        "see functions: dlptools::add_chromosome_length, dlptools::add_centromere_locations, dlptools::add_telomere_positions",
+        sep = "\n"
+      )
+    )
+  }
+
+  segs_df <- segs_df |>
+    dplyr::mutate(
+      # find closest telomere to the CN event
+      telo_p_dist = start - teloend_p,
+      telo_q_dist = telostart_q - end,
+      telo_dist = dplyr::case_when(
+        # in telo or touching at both ends
+        telo_p_dist <= 0 & telo_q_dist <= 0 ~ 0,
+        # in or touching at one end
+        telo_p_dist <= 0 | telo_q_dist <= 0 ~ 0,
+        # closer to p
+        telo_p_dist <= telo_q_dist ~ telo_p_dist,
+        # closer to q
+        telo_p_dist > telo_q_dist ~ telo_q_dist,
+      ),
+      telo_bound = telo_dist <= min_bound_distance,
+
+      # find where CN sits relative to centromere
+      centro_p_dist = centro_start - end,
+      centro_q_dist = start - centro_end,
+      centro_dist = dplyr::case_when(
+        # crossing centro somewhere
+        centro_p_dist <= 0 & centro_q_dist <= 0 ~ 0,
+        # on the p side
+        centro_p_dist >= 0 & centro_q_dist < 0 ~ centro_p_dist,
+        # on th q side
+        centro_p_dist < 0 & centro_q_dist >= 0 ~ centro_q_dist,
+      ),
+      spans_centro = end > centro_end & start < centro_start,
+      centro_bound = centro_dist <= min_bound_distance,
+
+      # span of chromosome
+      spans_chrom = (end - start) / total_length >= min_span_of_chrom,
+
+      # marking of arm events.
+      p_arm_length = centro_start - teloend_p,
+      q_arm_length = telostart_q - centro_end,
+      spans_arm = dplyr::case_when(
+        end < centro_start ~ (end - start) / p_arm_length >= min_span_of_arm,
+        start > centro_end ~ (end - start) / q_arm_length >= min_span_of_arm,
+        .default = FALSE
+      ),
+
+      # trying to be simple and Shih et al. 2023 10.1038/s41586-023-06266-3
+      # inspired
+      seg_span_event = dplyr::case_when(
+        spans_chrom ~ "whole-chrom",
+        centro_bound & telo_bound & !spans_centro ~ "arm",
+        !telo_bound & !centro_bound & !spans_chrom & spans_arm ~ "arm",
+        telo_bound & !centro_bound & !spans_chrom ~ "telo-bound",
+        centro_bound | spans_centro ~ "centro-bound",
+        .default = "inter"
+      )
+    )
+
+  if (acro_fix_whole_chrom) {
+    # these chromosomes have p arms >10Mb. Short, but not crazy. Might not need
+    # this fix.
+    acro_chroms <- c("13", "14", "15", "21", "22")
+
+    segs_df <- segs_df |>
+      dplyr::mutate(
+        # basically span of the q arm
+        acro_span = dplyr::if_else(
+          chr %in% acro_chroms & start > centro_start,
+          (end - start) / (total_length - centro_end),
+          NA
+        ),
+        seg_span_event = dplyr::case_when(
+          !(chr %in% acro_chroms) ~ seg_span_event,
+          acro_span >= q_span ~ "whole-chrom",
+          .default = seg_span_event
+        )
+      )
+  }
+
+  return(segs_df)
+}
diff --git a/R/cn_features.R b/R/cn_features.R
index f5281fe..5dbff35 100644
--- a/R/cn_features.R
+++ b/R/cn_features.R
@@ -232,7 +232,7 @@ ploidy_relative_cn_state <- function(
     annotate_input = FALSE,
     return_matrix = FALSE) {
   sample_mode_ploidy <- segs_df |>
-    reads_to_segs() |>
+    segs_to_reads() |>
     mode_ploidy(sample_col = sample_col)
 
   segs_df <- dplyr::left_join(
diff --git a/R/utils.R b/R/utils.R
index 3fad3ae..7e65c26 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -240,3 +240,11 @@ cust_mode <- function(x, na.rm = FALSE) {
   ux <- unique(x)
   return(ux[which.max(tabulate(match(x, ux)))])
 }
+
+
+#' detect if chromosomes labels include "chr"
+#' @param chrom_vec vector of chromosome labels, probably pulled from a
+#' dataframe of cn information.
+is_chr_used_in_chroms <- function(chrom_vec) {
+  return(any(stringr::str_detect(unique(chrom_vec), "chr")))
+}
diff --git a/inst/extdata/hg19_gap.txt.gz b/inst/extdata/hg19_gap.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..9d45f51c3e934bd1828b9c239af1833d230b772e
GIT binary patch
literal 7423
zcmV<b9021ViwFp%J$GIJ17~4yE_8Tw0F;<pa$H9UhMy;^X`rF$O=Jrymm|kc*;19F
zhh+Ep>D!ELq;d4%^I76QeFGYfx?VnhexXabP{GfIF265&ef;|T?eXRD^7Q<hx=k}`
ztx~%%k4J^E4&#qspPwEt|33Z=(>6`nTBCju8V|dgK0m#F`E$P2r)g~@<`?Dh@Wj-L
z?zc*1E_nKQ3MX@DYqsK5VR(_=$FsIr-B0iKx)vuzxpxOpnV^)se(xCxQ}4t8S)g_A
z-}fW<R;V_zP*|CuEgPZl?Iu=J4`>s*CH7QkZBO^+#rYzo@B~R0P14pmQ4u%k#ezC5
za?gf2>)>L&ky=ozi%jllx|nqZ;r)wD)7f1`R9&moAeBYde;5c7mZLa!^-wENNLdH-
z`1Ja6e7kJjv95g8jd<`S{QUU(`0@<%=WicgzdVg$%RZ{gE!>6&`6T=**^*mLN0eBQ
z5KZEiZ;7?NCsnw!lLa9f264;$h|J3oX+<<>kS*fYGk3-D91(&So)qMZFkU92YZ%p9
zx;B>}s*~6?32TV-h?r~jNo^8)OGG!*!$<_%K{|>3N9R}@adxj7Mxrr0Ym>NBKD%>J
z@bLoUAYa5?ZZ_g(sJ<hssv|Ry5AsP;(t~Q{EXv5LX$gY&4}xQ5euRv(HE>yEtRN|q
zgtF9#H*Sp=HpG4qwMoLB`QEKa3`uN=lqO+`sl!Iv-zqo<A^U=wOcF`6MPoXm4v)3i
zhhEC0namQ=@uFU}TMaMoS|%YxvW>7cJ1*LBpW#@L)Nryv<arR@h)jf;2UQ;gQNDYK
z)}>-Wvh-?$`lQ(SWP14f5{VKfNQ1aeBF(&9c4RO%56K_@Wo({KTK@Xe^y<FcVav4`
zPtr+J_>Gca?s>UeI&8c7Hyi(Q5^jxoTVihzn;MyyCs=wBJ1vC(5n&o&6LMEsI|N0?
zW!=LNOPx?ln%ItD+2IJbfZn`S5F1N(%qz}s={OsxuWKPBTFsFB0DXYetk&vs6jtGz
zRs-4qX=%bG--?FI2e}HM6J%uDpVxmQX>bq}SMcEpC>HKWyrAMiL`BvdfNf0Ybyk9H
zP!$Z)X+e>6yV`cFNJOYd2l#+WS$OnS@`RyuM{x*1J`bpzIT&2zcrT*}?zP1YsF>QI
zlDZ}4eMF9Ij#_ElWj1NnRzJsK*{&J6(W#aigu-ADuA8)!xDlb0P#&ojS!@szbSgT5
zhI$kSIr1^~dQuc~StQ#Y>*24@*a32S`1Iu;Brw!EaJa~x<9QwO?~floeR}@-$7&4b
zuH5f6w%y%?QMVx~>87#CG@og(G+yRfP?ER`0QYl1m87DibrsRoPN~!~1S40V_I;x)
z>}PIA0_aQoqtlGR&g_ueD=sAf>a+mdeThJi{W3$&*!J!1yu?RWue!c;Ki)(mGHK7u
zF#ql)AmRt0E%Ps0X(<d*DMg_VXai6YWeOy`B|(@Vf8!?g)?`4FwB@m_=!V!*RG&zD
zG60QFUP9CH7(u0A8kIAsPXcD;W}zF=G8I^Dk*03kX8tDxi^FsU3tkXu9{0d_ydNq2
zi;=?jBSn@W>T-$r7>_JUR@K3mMmTGv_{K7xmUZy7-zz8w(b2`Vv!T?tov>0rGw*_u
zu*FK@g@d|g=RkfmxKRbwi45{dCBN_b5eX_g>wS<fqV`5FQ|!nDrK?A;8Yy5Bxx9NI
za_8V^SuGn|n>4Bdg=Y+p`276x`taf9+joL(N_~O6xyk*A`c=dO>d0Mhq%P9E2)Aep
z(CXmgqX9<w$Ar~oN?cQ-()+luv<&i^N2Bu*p`*pR2h~Y#mY&S5$l~=L)Fye$%Mn-}
zRixo4)Pr=8=brPuNRP;tD8D4P3)Ek7f%<=4p#CoxpeUX8IE|yBidi+0L1tNdO--+w
zWf@eL#@}7H{?fX2+qBztTe|L}IJV{P!sAQ8R57kBoTOah>yt*37;Q01Diq<jmGrhB
zl_Am-qo2@uHeruCE}D&Np42k}Ix_TRHNXbk7N6G?tnCO;)DuR8eVrhgz|IKh0BckZ
z02?6L=L#=pgdR0Sg`i{E0iKPps&u_LJC9x&&;}sUWdfGHPhPvD&cK@y<pWf*WfQ8>
zbL1Rfcq8wEWRP7Oh}50z7q39Odj+mm(U4ihI!ZhAiQ`F}e(Egy#b^szkiDYaYYWmw
z+Luc}qU;*=5ptnyNJ=X?_S?*l{X4hkR&C?(@1=@N_RiPJF22M^1K4xTs!aIm-Z#|D
z(Bs4%<#Iryw3RywTsLTwu<|}!1kh5)MbTc5WLW}qN%HRxtE6f0aWjrqbUf1U@BP@K
zw?P-?0GmLr>6Nhsh*6w^p{rRyqkqj$20^iO5??Li;zL2h1GuFT6&>W17^8E=g&SZ4
zl=BbjOMq^#Mgj@w11$YYKy=pMdP5U}Spsx`S$6p$drZZ{$Jamg;XeQX0RR8WNGt&5
z6H_yDP{j^xdVy(dqiO&E00960lw8}6Ehi3r?!N&8Hhj&|NirABB+)#~|0`?R)SbO|
zN0EYcn$=YW#$LYAZRS2Nz}EYK+sM!~A6GHMRtvHp|N0%e)pYXZ+L?T;itjToB7p%w
zD{g#wiA9iK7^7g|ad*yh2aXjG@dP;m2eQ5NH;_0}&|(aOx!%xR`6iT66$Zpfbw;o)
z-=R98|M|BGT$ni&OaLnICDn&RXO=;Q{rK-cupli-U@>t95IwN=Slt)EY+=d~O?xCG
zc)RZ*>ku03#_jP?6|eo`L$64fcM?8L6~=0~AMf8ocF)d2c{=Mlv_hjjUUv<JVPT;T
zp=@D1z~!~a;@S!-YH)Zyz;U|XKNV?>?3t$@gO?i2kq>QHLZR$r$rw~77a`aA7PNr*
zbPmOBB-J;+(4+prGuuou@w~&Zb+4wvu(=?2JGGnK^Q^v4Z?B`Aw88e-h|(r(>Cm><
zYE1lto<IOeq5$T%3{?2Dz?cY~ZedhhTxe~X0jb*}f%bt(V%H&<3#~5^TMPvVyfJp&
z1$9BwG9bP{bc5K0tkV+v_D$<!`Nz-S|4$9UA`k!n2Lm#`Ny)}XN~Sf>nTRfUv@s4J
zv!&Jiw#^53GiqwL;$t4G`L_RlY_)*CF&t>-LeW)uVCV(_Ey{*t1fjc7(?M@nQS$+e
z`C=O|zpg#o@j%;gRwgdLePAjZ)|0UVgbaeXa0-NPdlTn=VE0F5k;Qkvq+x?mMfG7m
z_PYI(Z{3i_G?<WbayZowmX>t~n}ah<FbE-dg%1F0USRtQTLw0yva^Fj{njh@^HYr_
zym)Gp?_y1}GBJ{Us4ou6%)H#N>_tI{)QuT6r!qx8K6ad68E5-gs3-RE+yHlwNh7na
z96Xq<pPt*&<+*Ty+jHfKy+3z?du#A4_};<6Q%;|sn=f0G+P+2k^Eywkfn>rY!hv$&
z_lkbh$R>^jwJh-UVC)>$2jKxN7iEUCnD6XuBWu5V!xrehA!tB^VYX-I-mg?38?+x>
zQDb<0{@f&g$|pzLlrVf9>>hx3h;bFUL@iD5?_^dzYHu8uwM~oKCis(z4d*mkB!s$5
zm(=+(U^A_-nhL>s_IxHE5oh)sc1{56Z1A(nla^huM|CG^dwl|+-KoNw(sa6Dt!)|7
zRh%k}P7zAdV<tEt3XIoQmB4L&t59KtwRsAr$2zU;JDA1<=L0+@c#%zzG^c~6DihnI
zO~KxDKV_S%0HViSd|?4FN%K~V9vuT!ZtGw&Hy?Ya-d{k;rX;dVs!Or?hH%9xv102|
zb19Fyq6kuR-9W534ZJR;;!++*Fn$3A$rBHWdd<b9PL?O!*1XUk)+;84ZCtEwQ)(e4
zSr$@R2HZiNl?<0gNJ(sY&bH(D&DII|+&VG(oG36pFhqlbhF~kMEyw&hZS3=3lkGlF
zaXL+h;hDG<+m70q1GJUFr~=y<LNuQ}vuGQn)IQ9k(mHAg>q=nSA+V!^O=}Qx4dD@L
zOo;3dVd;EO&=BNBre@m><;OG%2w+2~pCJ3p0#ypgc)RJJ%H(6tik~!CN6#cXok=b~
z%LkagO~khO$OB#<mQJT_q!Agd;p><c=D5lhY1>|AFeimoA6<N0)fZIr@#@&xQ13p4
zRpwUq`Rsc#Z&cL~WTirg*LQ@nSA_^2AuK;&S|)Z9<lfn8!9tb`a<}Vzqq0w*vmgXc
zK9KeUIG<BD;On%aMmA3JF`J#A0bAvjx9L@|c@4W$@ry%P)+UB*I#&_*h_a6eam*B!
zfl}KxrNSo<=E*AGheFG5Lr?icL-L$8=WJ`@TP@2HQQ8pO+`UNlrbHCg5~B1##dj{F
z?R_>iiA|Z>s|&KMiAK+#(BF`x74H-bVckGGqPI{;sS4B<1gj<XN>b2e{YJKzY8kEw
zVHAZC!m^`vOo}I`iCDCWc-!drsiwV8d*Q2&qKC#j*a1sy%vMlB7n&|}@e=dFE)sje
zzgWj}0j~Y!_R6%qq>B~HI5dZ3;5IBZ`hv>1c6y8*gUB47oK;q^j*zS@8OjN?PlY5K
z=xwXOr!sL6PVY%abk2@%>smK6*4oVuOUy>h1#8Mm4GHk9{&~IlQ4qs&3Ny~&jYWK1
zcK-KZd<eMt8wV>KxtF*L62D!q@4^<$05IdK2q7TUL;;G&Nb>eJW4?P$@UCU33kVw&
zVG%#pZmOT=bWqj~X9l765bC;rew(if_lP4UQ+V4grGbL8DT5u$jb6lsrK`#?A%TD&
z6k`uBgeK%V>UT-lk9$#{HrFYS%$hWv+VD0AO4`mXnFrIaWe4R0)Hcw%L?01kMF5G&
zsM}05)9CS9jSue|l0Mb;qp-nBJ==5e>Ja)AmVJ^n=0oie>Bk;Up}qEo@SGDj5JM`<
zw-CAu-Bxx|(tQ0~5Te5H7fMjyklJ!5P5T73MjYEIA@wQcrH>f8Jqc799Fslk3_C(H
zd(kQu-@}Y-8FkC~;GxO_i_G!lG*+AQ%}V*_LOEDgkD1s!1k+#*t>@x`KG;yO>1Yce
zg2`(-6D+R_DtG#@AFK1Myie~R4&!+;c4MMe?VBlRDSyoCC!Md4aMy+P<%8(BP?m-|
zB6j$khDW(DmP`}B&gQB%1HWRC5)%D9C2h-_2U2znl)Z4vAdQf;tpXO=RyecvDz3PA
z$0?cSyP23*ujU82rycBog~aP2F7;)He2<a}L0H0;)Q>O3kjAoS=nUGYf=q9qS4~5)
zr9XcD@3*KGvzW-67>#7fc^DhQ%JY8V3B$%d93l3`w3+Z5g<pO&>y#U$f<FM#U_P)C
zRGw~)LG)_~bd}4Dqc~dGPGe;*>RkPn8w$s{!V|)yHC~nR0`|yK-h<%lyy=R;H7}!Z
z#fM+mJKnF}KyYPnQLJr_-lRuhAk{1RZ%`%KAR#JHB5bXf%!Vi5PA}LVs0s(nt3S1}
z+1C#(bA}hxeQ$F`WWZ}(t@*N-xrUy{y>}3tagEXBU|(Fve_z3<)N?N<Sii%JK_WG>
zE5gh#zYpS>)3*!4PH*=5V!f4R)QHk~`|TY5Jk&P9LNHm0*Xq1~;MSsY)6YF)cNlmm
z@6_>zh+oCvm-Uv_-l9qy>@SD2hbDp2aP=#wr=AyNwy>UYU1&YNXbTyJI3Fo$P2N6@
z{c~{Pg&8|6NZfEm9LkvS^tAi(mn`6KEGi+G=_L2Wu?6pOQTKxCuunkoVgh4HUw*6J
zy#EFUUk)q77A@Wx`|{)}@BY4jSOw=&wqN0#v8_Fthw!DBPs{i7S-@i1JBbgI!c>kI
zbM&HEn(qdrC_9w!Y0yhVB46G}Sw_1k)&gd1Z%-VR)+TbfMc%K64aAVFXd7>3eA%o=
zNYTr~?njMRs_D6^-|yD1$mtc&-XBkUpbE7Z>PvPEn&F8(TJm-=Bb%}R?U!GOa}ovz
z<3?&4`<$3WzJSIJ#LJsdM^HwfryaI&v#9Oyqyj6D!(io^rBS(@&Nl9kf-O)sC^FLF
zMVnw5`FhIo4a719HCJeea|vrD?5BJlvIB(ED)=ff>`>-yBz_hw*wudA7qHa0fKqB_
z6d$!0tDee!s?-59*I<k$yb|GutYBgLN;!5|WlW7XgOd0SGgo`)%S9D6_dBe>xUn-2
z8qB^izr2lKp?#$sfBgLOPvWZInZ*a?UJ*%0BBz&|t7s1pczw@tU)jf1<!4@GI^*ye
zR+*+o{K2$}do&!o%h^owfPtuZhmRN-md0Nr5bnkYoLfhla>U%cTj<J<e;pTGec&4S
zTVsK;A~KfnX@XT*TK*HxsKCm8qD1=Q`o*}}E;xJOhGd4|Le_)R?4dOqZJxgH0P$)@
z=1o6FNqiVz5>fMoRA_qQVCQ+^9=6SxwTQYPXvNCZonX*m92)un#7;5`kRUYWYFP6F
zSpE74#+EvaUbNhHy%@XQ`k!#TDF?g7zvaNvm3y|z?zc1Sv3FSRmIfIG|As{l9|b+A
zV|D5d9DIxdi4pI~t5mL<-eL4G$tM&H9TNehBg~3;V2nrivOIOa{*y39$k?;&mU$jJ
z8A;wl7?6OSVF_WuJXKR<MWuHHRi7RCU6@kyc%r!VeGTE!@7>$>^WKtK#ol2m>!l<f
zsd}3?O+IhoUo1$vfs}DB0>s|fc;aa`C!GtvzGDU!pn3F-W1>+pK;(1L_l;;C%a+aJ
z&LW5-EswKW{!N1X7S<5#(k#Ms>zLF#vukt}9UQRkAtqfJOaKg(ub{BV`#j0}KBbX)
zfl3hGv0CodW*}Y|OY3%QZ_QoYIA;D4l}Vwq)%UQ%7E~?-b3hgxx-#B;I?e-R{JR~r
z*Lc&-W?(-qoc1tmJB)qltm`7cANay)GpdxOiQ2W0?2=!^G!PrFyj037`IjSn*D0{I
zp$vFnF&Y-e^K;_fZDnY7n0f$6W;r5($djU_^rfNs3jhHB|3YLHU{?gZG|I>lv>+W^
z-x`Cae^9gm00030|Ad^$j^i*4M9=vfTf2Uz&kYO)J^zoXM@oqob#gKfsn}6uic7Js
zuW>>#p?14Mq&J6gJ|Fh$GRo<tXydaRia;JN5{>OE-}FSWU|J4xQ*wJ(O_0k!57>;?
z`R~^^Fld@ou=QplAM^0uagUdiFJC~RP=4b8j0iRO7S7ZqR>{i~78&_qzJ1eTFuiW)
zdP#BlN55bZ&MjAkjSN^?_#7bB9bwmpjJ+%8*r|ALoYGeGRg6{hVm*F-{`~<~AB-ev
zL9|6e3ht`piqp9u(XJ@S8<na;MLnfW2U0GPRlnf~h9N-9gHWuukR@Dm&g};4MsbI#
zQE>{cMwHMBdW(axsqmNit!jKt+r`QBZ0^zb0VhY3NRFwW);u!tkWP`@bR$Pt)VpGG
z7jaVvn?{WxhjWB<`xE5r^zB@W_0v5Wmj^1V4feumXX!o!OPQ9$!{|eFcf#EvA5mSa
z*~}5){flz}FQLwM$I%|3l@d_@DW2)F#gD5wL#qQUmPS=aSrhY*9(q)0d8>&3g;S`H
z4}aKvxvtBZ@97r~u&k)V#4PCaH*Ii>KbYn|+%DYq3WK0+W2UK4Tk;N;T+I0;54XLN
zlx-=bLf0a}2OcEimfI(s5<<OMh~Y3p<BAp>cc(zfHd+1D;EVn#G68J|#Y9|IWTVj^
zrYo%Fg=9#3*o@S?1hnXQRjT+oq0ysNbt9KI!)0;63Zg@qi4?TTFmid%fPNvBAS9hu
zg<)Hayl($&IZs3(e}asI{thJhrEnG!A`{W~iguceV#ZO5VD)tSyqU?BK|GXE%oxq!
z>&>FKQ!D>kCW^G)wz8|uIs>cHzP`8(41^yTzgS^dJ_EK;j7m>b*UqBSyF+kJZWWIg
zu1&3p(MzU`lSw66-L2Tya$rt$F%ad$ZWb9-w_C0yM*76W2%E;OKCEqt3G+VmB(+Bq
zR-D$p7MiJj-pPpUI&RF0!z#aZ(#>x%6IIx$&8K#hyHm0Wc6mS2@M)BVuDE?nJ0`)~
zsyWzosfkeBu{9IC+u<o7V&}%To;3H*r^wNY>Q5O)w1DcD!IwN9%4<JbI*iQ89or~W
zo%yljx3)stDKh}890H?yU*ay8QOA>^Xjclnp55f;%-WZVbHVd$lRRUxsF{_96}F||
z++MdQ3NEgz=#`k=3j0!f;snZ1P??~&<jYS}EX~_Y-R4U523jq3cOxy`h5v8>3@I3a
z0U9Z%ZdyM71TbG5t$w;GJ;#VQAr|9c_!uXjOEA>oN+^xNSog47uM;3=Ld`)&HzTek
zHvR(u0RR7doI#evAP7X)a~l;zz5m6ADkb*3CbRpC)ay1e3IS{9AV>mW$$*2G2x<V5
z`c$qRxIy4q3gTQ(A1Xn5+G$opfUh>IBZu(&3y}28Pu*_^Y9nZ^$F3`~K}Omxv|9Xx
zpmM>o3T#{GaFy;u^~?zP0pg%3T0$B@X4-Ii1#nrzs6BTgNZLK);r$Wds0qJqb=e>*
zZw8YXcAsb*MN7mqf}|DLCGo5cMJPcOg)sn1TC}f^0Pz;Hm44vGa7*Jqp{Rf_IKGn}
zC^J`BD3DGKf^GP6G>nMl<03S|ovZ(GTHB)MTI-MK*6iOxmh&JVjdcxidJjGn3mH)X
z7(sK(gtE@FXZfc2)G?!r->c`CSO^r+6RdZ91qEn+AiTuFVE!9u#|9Wd)4uh3de)$r
z29L2HB!C1>?GR!NaKXfa<6FlDmE3YArla|@M8C8{uLPC6mEtNE`qw5heIe+Zh3!0w
ziI0~4Zu?-2pqg7!UIokyP$-;h5;XU8sK>_-pHUiJ!w@Yt!nZBg*$38Ej8YJBHy)BB
zXdX0<xZabV(EzeRb35UxB^15*)a`&y(4KBJSKv9dhZhcWgZA{zt#)3k)qGMYoEe&K
zFwL7yuA<XL#K8u^{@mLeDxBp6$MlDk?eEzdY0Z3+SgfQzH3~N;(k}o20RR7uT}yW4
zAPijhZ9pK5Zi4TBkx}W>NnYfhtf+;sjltrhQdt}|s-}>jf1!@xpu?BCh`j@~S=Wh9
z(Sg+&B78vVunjdXNiawg&q`@JkzNp$J}T#&zwx)ufB9R!qdYHxbb{&CV)po8NCSr-
zI6_5HIWfsQ>v98Zz9wE)W6jr$$JEp;TdQv`%OtWiCWO)vzUx*}NnVb}ZEF)~q)2lI
zue7pVGa7d-wb8+(PTk5~eSv#q1JwwtUU)eNy>ijpxq%037Nj~Ll-j2QeqQ3A7X((p
zz5);jyXqS83<gvXMB*V;sEbK}Ddoy71_x4183&#WLb!nkC7)=44y#R2fY9Z|2vD)M
z(>jb+e_MZ$pNb9s`7&3Kx2w{U=E`2fj4ak|%>k9op4{KVO}CW@Dg&)Rwd~P!Nupy?
zmzzKWS9A?WR8u>5)<OQ*puvV#pjB%4w)}7=crU|}9T7D&#G~HTP__sfzWP~AmqK>*
zgVz6T2JwWqV_L~L>9&NtwjFUfWT?jw=xpI@Xh!SsAx95+Q`gESF*vOZr>*F6^dt@z
zpLs|jO7k5$$f7tExw@B55VFeEF8%(SgS=fcCfr}u|8Zb&BPeE$&;4KJ(j(S)za%_f
zdcayVXT)_|xV&_M^`&Q6ll#2%f#n<)C*(I&K-~L<7aX-vLOjV9M0FS`u2z=XyNlz+
z3B=F|8KmN6Im@+dB&d6!LXHIOIdgDj=fJ~5Qw~5JwCXnRJ_73ur)pRWIw)1AvZM6<
zfck;JfnFb=|N2{iZOP*86Qb+JiX$Y6DEOfz@$GQ@iV8y-PC)?JL9Naa<`9zuJ%JO@
xl&{zHB<&DS$J&7xR{jc>0pe=9(;D%3ItbVBbe?e;Ag)>G{{U$ojwbtK007E1XVCxv

literal 0
HcmV?d00001

diff --git a/inst/extdata/hg38_gap.txt.gz b/inst/extdata/hg38_gap.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..1a6b842a1bf46c1270740d0ff67515d5b2bfc7b3
GIT binary patch
literal 12368
zcmV-WFt5)aiwFpsS=M6!17~4yE_8Tw0F+wWj@&j9eeQe>S*#mBVX?sef?+t5b%59d
zI7Wc|`#n{p)KZJxlgz{D*sddqWYwvQEH1Y%Uw_nFxz&Q7TfO~fqMvVnynlQ9@pk+E
z{)@N{Gb*O4H<G*vj3te~ynp}s`+tY2P19~&{nj`ywlwX-RGM`6n|NMoS=efsFG^O(
zJg%(N%Z(k@+@$o|pI<)z_S^d(zux}$_UA9#!YXy)=6tKIlmtY)Kj1{M+9pS>_o@xR
zIRTliIgYoT%M~GNEkW}pH!x!}wa}STrS9)myD3WtqCx{XueKE=+er9fY}rH_$l1oh
zohf~%NNE)|15qHXY(Z)JQ`j6EM-xdPm-?C#yG4B>3gpsO6yGVmk>NlF@<4dl2Nbi;
zgJRxDgA&Vce%a|eO9~u7%S1S-VC4?rcwewc3tVg>!{52aCeUkAuZ1_Hu?-sApmHJY
zCnOc;0tcL^#rtV&1eI5$R^R~xr4xW;!|UoR8eBcS0jP=Rvtlo>EqbPvI|q)5*sk0*
z<d*K>C4X*T-+sRRc!&A>x6eO+{~pIiyKYbuggaa{9#IB@hb%n3?G!kIH;sUGP!e^Q
zbsUDrO&)t^W!yWEB!Xu>6SfhG<pTk%rBA!&LcJFYV6EZQx%HGFY|Mi;;Gr9-!}qW<
zfLg^NTO-N^e*Bz+L&E(7IN^LagPi^P9Nst)bg&N4;O2T`2{k4B)pH6SfRGD-m&1C5
zR$+0X2%EJDdIM$UslIP+>)0r68^Am9h@w{vhWg3?2KDC*p;rtpNI4Xs7J@2q#!u|@
z14Zu?ygfv10Jt+}^jsfuV05e92cV7(&cNxH{k=29iV96&bYojD-R=zd82$**k+QIK
zi=|8D%To@V9tWPl24^gu=oJU<MiaDnv-Rrl3?wt{JwZe<7_s`+`b&;QXbgbOX8&mY
z8e*1DsiNVLo=4C0nBz|zt6T>lxFe@h)Ov}p10__cB@mPopycv9UE=G&L8Z}b0%RRX
z%two9sveuHo?w&(kE-Xs3rvSg_A-Em6*A)`A@;KA1Xp+>^8^y0UlZujURaDK@Bn>L
zJisq@d%O_#1o1dQ@0&=TC_4c;j}UuIMH3(m24NxFp*NFd=1@o+wz(ycjFY|~N!y-<
z8URHUl?}y{Dx)NTQ(o(?eUIT^2$Rw_R0B!Xxee#cQp(P1ihxAn3uHWy$dk&jZY66H
zv8V{eKz$(5@-iiTT$68h<KHxYuQDq3Wm-yc)EJNv083!vA2Eg@suC1i$rv7QPjWt%
zw{ej*QFht@Q1P4TwSersZ}!Nc$U%L8@G{dy^;qVC1c6av;dyZ60Ze7-n!Hi0AlE5$
z+iDr07WF&le1s%Ad5&X4tkBI-?vMwsF{dhN=i{T=zIR<kp+-JH)Gu}R1g?+1Quo7z
zuAsR}U%}=lBBa2VK0qJ%iq6`PuznO$$iKL9T*LDR<Fv&(%~2&*<W&ox32<G>mz161
z6|1fT7yttLa)>8;Dm4;)0MvYt)bfEn3c{YCXPWnLdc56Zc06(#754$EM@He-&p=hs
zWdJmEu-Ann`etuf07Z^BWRw8xtFVe9dt(r)6*y1;2p#H(9;=25KIZEq2W?mz02NVm
zn5nDw*o+)GC{c+vz>$7*m9S0>HdLAepdRAxD~6Rj!+Nxa01i<svF<pLOG|#Bz^t?h
zD)~$i{V^_(dL|`$HgrWg5hqfUh4<>rbt$*VKMgMu$Oh6bFN3Ll)C)|jX`{3K87Z#M
zeD(GH$Inlne|!sq727DNpC}sU1RBnJajT<sMI^>dAVBEu)d_7nVOlGy5gMQaT*K~Y
z9sL#A02<&LbtiK;TeksP09u&$yE)RE+5k1cYN1DS6zw#CBzSEOsjvxZfW=blzKw>;
zB_}`uBJydm)U$aGA_8=Z{__KKBOT_JP3mH)Cv%tZ-2hPc8!eVfw3&O6n5G1<1T=#2
zS@JD+qD^9*#AVS2DcKoKJd_{peM!((g(?c3Nr2as81e;>0oJ1{Zu{yhTDcYjo<K4Y
z-?j1_{nigW^nu8NKxiZflGUKFM*F(4ni|T$5(pwWkgeZQ?(c~9i5vDgkWS?NrDpKo
z(hUCRn!*1_GtycRo|=GpYDP3r=TYa85CLifpyf!}XjNYj!fz!(Pgl+n1N!F|lpZoX
zfCgBp$K4#YS<5xx;3y`Kv1CKsJU+qilvW(IYykYoP!Nia_Bn;iP(XlGz~_R=Qzy9(
zU;rx>nHt4In%rrC4zN;@gLy+aR&4+jjY}0dnybZ#AVC9IsmReB?TXj{6@XStJ(|Ow
z28hhuR)Se}bB6oov>YJjMr|dSM{|xwA^~$$Wef9t-$N!c2?mHH*cMAYnfKC%d9N|6
zSmlP3c}zwVM48O1r5?<ojS-6h!sGe6)V{COW4fA?=9n}9FfX*{U=O9%m~1;FHNY6r
zFQl>Wf`W7z(_8=@Kn@+yxskn#kh(|B3IsVGsE&5@Dvae$+l_&EqrL7-<ZlV1Y&Lcj
zMqLVH-EItHv{P*Y6stoR`$N@vBA`DEsY`$&W>pHG?2!|ryvVM}+Nz?Oz7f_alyMpf
zdI_*_CE9O}TVu0*faoPwn%H*p4k}3;I6^YP(wAu36VAA|619#YH*<Wmi$s8kduT9b
zg$+ur$g%K7LcKsGKn}KDFCWUJ?i_<GLm|*rQG)hl9AdI91+h>(T9F8f1iWe~P4~5+
z;8#d)081b%LX~YL&ICtN1wh}IAg8pe8l!33U9fh~OdqHZgaowoTbry;|Na-Ah2#FK
ze){~!Pe`A1^orOt$B`x%Y4ULhJ4YlJHr${@--(l;xTe3wc<bS<bvEdi8=%KQYlxjP
zZ^imZX@mytVFr%UZG8h&4A9RUYg@FXvubSdo%3{uEx`4I0H5lXWhJU9Pv8k!TaFUu
zhqU;2LR}VZc*}32a+CJ#7z^*DhMGO*52z9o2-h{qSvL|BqJ?6p0D1u7zIr#_-wkDE
zl^EAX`yN1KHG4ZT;JhV<3gS@@#n+#RuTb8H_d0N*7#|#tQIHJRf2_6QqvWI@bKtlp
z&}gk~%&8v7p|jo^mhn{zpw$Gj9`ekH3`XoO-aA1|GG#U7IoX@&1o>YFRzn`!&%-H3
zW_<ty5WlM9_EB!wBkLzHiJ7M4fJdXjI<NZ9@RH+roj`KL^WtM>-yL)3F#5_#w&mPW
zwm0mk-5py6&;~$D-<BLQ-brmIwT;9t^R146MsbLM{_*+yukYU=`r#fuf726nZco{$
zp=*mzuX%a$Unj)<`Qtgh&BWj-F8bsKn@%ilpfOdF8(O)zJZTpH4*&rF|CC!xvg0-o
zTl+TRx5*P^lgbrxi=00Invx}ovOSv-s)l&9D6+pG&BN0lU>;Z)BOycAGd=wcd;S|p
zc|0aIe?By#_YYw|mhbIr`F>wp1e>{e@v?Qr{&MKX`uS=wHcgibC9tf_WwTpWHypXq
zk+bbc=ijhQ*qSh;Dpe!*d?q0_U=-{Cc?C1CMMyPLat=gpiimXpwguCw=Xhao=SK%L
znJKwXntkepl4-yR3T=kh&k<(L-KRIBBVwZHV9Nq<1gn^2gc!zwY|A$X*&P92WOnsE
z9}Hu%)+uKQ0;#M@5mt|1l|s+#<`a~SZT)iZ)Y<4YG6#gstdt-T7(gm_jgK8WBqGsd
zt<V8$SZUFpxs1oDz^e)%RHQk7sr@^BiZg{pp6q~0c2bAN<~?<0<c-0+0OOS8J^8t3
za-|?^ZVnJ;fTFha9Q9l%*cyFv(Am&ew-KiDnPS=;0)gd;q_%v&RVB_8q<sv>2#{0j
zC*hOZyxm7<k`#mpjFlZ=cv?2)FAT=G#X+Q#HSf@c4-}X>RyHfnZ6HNG`CD#a!zznn
z)$-<mh9apcALh95LTOvnw+Bc(x$WMVzK%!m%qiFbZ=7N!8w~sMBE8P|`;njvgpk(_
z(5=5Yy;2QCV8*IX(7_FGQ*-z4!fG}Ea!yqV&4ag@6!-UO)Y-982~f;SmbUiZ!{gh(
zfBlXBilzM*Js+UTk=2P5{rTX&4`;GFu72{K47Ct8G<W`~&->yN8T*`GY2zkVKvlN<
zJG%tMpDemBQ|*sjwi);@OxUXb^pR_+2gbc&B;kKThGW^(7Dhf?eZ}we`b;lW$LWEP
zSV{Vy&|`x2>wLhyxQlNke*X1od-GeXiuL%N6fx5r{><*!-e4mHsQTQo`~f$y*eVt5
z_}H`X>JuAhBB-Z^%y-mT9#hUW-;4(Y;Sfch`slNc=QF?4neov^Ddy?r+n31Bwm8NG
zq@IWlIze^NR`$uZaxkcZVTp=>3mO)k>g#wU%v7%!HK!7Q^0&kP&xP+3hc%?)KrNWI
zC~W00#>cJHK#`+>ilu!~*}9BJ&cM<WQe%l^)TgdCZ)BM7HwP&kMWF;Ndm;2$%<&#y
zUC=BbJ#$}_h0o(@_Pc|`%1&Rsb2`RbsxZV3g23vX-{Tt_5XnIhSd{b5<I^S&3=kNe
zMkRKv{7*}CrXZ5C0kEWa7rLN1$hk0j?at`tVDY2!dpuqhe-s8VI9O`&`!PetV!|I%
zIjE8u65;xg;PHSDR>=iXpv!g;5Wz(tZc`*R`IeCTpdgT_iQ*7y#00_?4*k?sxYpZ2
zN(aey(&ucf!2?sgZ(*;8<%o~p_3u;GIJo`CjIV#q_{_o>8QCCFOq5w&ZWQjklZHi+
zg8Gr23R6tssQ}L&J8ZYFKSGZ#wX_6J0ttW}q0i$Bqc?#DU}2%|s;A-H>HwXFExb%=
zc%`?zZeS}1r?aUyKn1X{iXe|yHdrPViYc!lV9fSmwWzcoTsY+osTV6#ID`bca<Tr9
zo^f?-7RrEg-HX2zAS3$-5=HOZWm##taapj&H){SkRg;#X!$G$_QbZbpcZn35MGw`&
zW0mH{mUfz2>&{30>4&%L%t(&*nW@G5mCcogt7t)w$Kp^}vk_|HL%*7t9t>~q#qxRT
ze*NJMpZ!>}JNq{GPUn7Mz{AaKnmgxjbC+$J8(wRlJKovz+y`CvaVkZLe0Zn(>)c`b
zU|TspSopr?2OVi-#d&FzW{=Rke%+@f?hT3}4OisNj`(myZsas=ayD)Jm+SJ$%(gWf
zeta_(x%u!A_1l{5+_Loy(^LJ?A5W)GJ>tuHymU>pr?7r}JZ*A!58tGmLy;hvS1ut)
z9hNIW+(lN`67(3~dP*D1PO#Pv!WPR|g(+QjH6S^x6i`aWwIBe8wyECNJq!*G1pY4N
zuVDaK*>_KRZHMbN$oRI633;bXlK?^OGNt$40vMUytAc}=spp``VOzG@<32@>yedJb
zo{=l4@t3{ClBPK*{c#S0s?M7Tv^@fd>mR;j%-&d`Y!_ZJQM(|NIZfoD;Z?OZv(k8>
zm}8LYRH!@Ry^#+wMf`Su={@dywRs<jGsZ2S8m|<{Lah@b4y_TNn6<}pXcVW~QyoKb
z7BVHs10ZZDUID(<>Q0QBV|IXyDvvgxswvN$7QM8|-VoioO2GM+f?Oz-Vxw3oE(Iv-
z8n7R7F<_~>YIQjI6y%n?2v5%A8J=i7uZ5&n(Z(xz-VLLKC=L=wX3_#Y5M^<1vw6zN
z@h$hv^$}prs4OmRUdLmPQCC>7Ke1v(H#SS&rpnftQ?}NaP7ri}vbeH&8DFqYNdW6a
zjmkX|MUZ}suP|r`L<A_mE{i($b$p|gLV!|=vZ!M>ZxL&}#ilTkpi571zh`h64>Lvs
z+bF%@mi21}yva4K(V3I2?7+xbUaq<@jCcDS;H#Coyjyf;^iB160`J@Ey>CkR6QwC*
z9)Md=MeJJ(dMj^6P80z}R%i9GU3ASo{DD+;!_+|~Y5n$R1{OC9FsuVZ7wh8fT}~7$
zyp<>b`-;$gd9&=yKx)&Q173<OZF#Bi%wX@e2vmkFMK$b7H#l*QW3Du+CkM;~N?UE;
z_xQ-GM9qQm=2Ks7-}WAFEGRjEb$b<E5R2{h8gEMC-Mcz~k+Qt8d1i!?DAFN-EqxW;
z661xxiPGPc3XhQoz{(a~6uaHpJ25ivNSgo~Bri($|B=4z11B3bA`UQ1RUiF!%k#{j
zR-+BT*l%0Dyh{|tpw!Us@&jlN&m%uD5bUT;PJm5WxBiU*E48Ho;GL?{=QQv4GXuuZ
z93;Arsth{y`TEC&l34{OD7@@gl|c8o(Q~16X%7VqzPTz5zE2VkMlC_TGy=TS%VV7T
z`kolw_yPjd17p7^k(duvIBa~!NtZn(VE?Jch0i&S@4=*hBS7f&s&V0S!fqH5`Xya&
z184#@E)_t*4n{_F8_A#ohMPC}y~Cc*9r<wh!NdYXW3O?O&J1&;<s5NesKlySp^z+W
zMWE)DD8I#Hjv$#PlZ{XWYJR9&T}UuF_-Kh~I2X2hGrP<|03{$J<jaXSwDHzCI|ykP
zAx~lSX7wQDgOqpDQ<24O@c)EdOOh-h2weAV${*<cFZME1txp&IW(5<29i<|IB>Ytd
zIhNG?Pdn1nvpv4xqX@It0H~_5cLGd;XcRM8wk?fQ)_B8W2BZ-_>L!iQ;^1p2k)SDm
zqGB-bTNs+jyA%$BXx#rK?};{MI1WBoRnZiA@jO#YeDJ2GEjG~*Hfk&Qpb>&mAxtMH
z(G)^6_qPxqp1B$|i>7FzR`yOZ6`62Hvjl)NhchjE70nCZ|3JHF*e>GwmQfflfp*ax
zPjriWs)Oi%PeU|E6Fr5YrCkst7tP^3>s~}Bu$K>_t%;Uso_+6-<I9<4bp)BDCEU=u
z7m*)8Jg<)=TEcnOy@>Gr2Er`RAsS*Xt!`z6J8=wW&N_*fu#Pcp>FR9f!4#!v4L7u%
z;%tCGe6BaqFtWdXoViRGZWv0@S|jUXZ7&QwJA_CMm$8c05InF~9cvC1C?9S<=^bi+
zH>{_5Nl+|f;GZYc!b)SCaAJ+f*iaSLl`N)3AH+o69>Dlc*-OMRWNx{X;L(IG<2e#b
z%c49oiNN-Ep64#k2RY>-_^boQ-?=Tb)3RbF^qeL`n5(~2TMuzKnZ&1&Nnre)$-9fg
z(6E1x7-Go$oydEL=UJ>nhWL#Dc={AC!?TzQ`A>j5H9s!#Dg>1ndVm4!)K1HtGJNjY
ziYRwII|a$wr+88!A{iDU235sh;wnTW!$QO$P5TrtiFM;-h(imqwAN4cM0C=+$_|E!
z8PRDR$36j-kOqVtlPP=)>}PGw7p!jIu~2&~JEA5qCMJ#J2XJ!KeekgbgT{*8SY)-B
z>HKfaZvN}lG%cwmbF|A%z#oyHu#P2%IBwcXYykX1G7)NTtt-X5v%dxpR~8eU`kOfJ
zWi`N}g&`Apgj=C?25q80lUzvuasMxsmg2*rOSf~HOmtyfAB1Zk@vjL>HJPy3;`;px
zU!EmV2>m7#Bj-<fgO+xc4s|g(dSx@e1~@f`wHU6RZ?lr+J;2TZf;X@~+L6WN(D1I4
zqXUy$6YPWXebvhTo&wByx1pXRT})UhYyH$Z1XPSh$S5rNlyJ`Lw&4KBc~J{7cQGYY
zx^~LLQv3q|0RR7#ok^DCFb+i5c^h#i57O7@Wu3qA1C-LHC_8&o#;d1DO%w@$h(O@k
zM_ZaO()N~r-*RN%O1EqQE5<ewsW10z4>iI1#IVTcC*N}vZ-;l4dz&#b$r42&cM9hV
z#C$??Rg;X2E``S+Hwxz)B%%i32C6C>8NB-Yl<;XX(=|r9_;t^RQ?T3IV3Ti?Nqs%s
zMo*eb1r0X)M*6Py`g?e`S$7)L1#NPuZ-<vmC76t%{MeVnJC1-1i2Iui>f7Pn>9ZIE
zkA8QnuZI_$CK>iM{A}{4bzD!zLOpXJvKv$b*UhcIAKu42+=kCj-Q4Qy;ZX&{GAzFQ
zsheAUJ3Q1hjmAdkCfRmP{^ckL-cbzTM%$7$CV4iB?`r{C6M2?Ika?I&hNQ42i(fG$
z6A1mxC|N$EE-I{2Z||5UX+jJ#TFxgSwf+pTV~%Nv#BiK1;sYchjD(TNH3wzF@>sWC
z;`LE>bmXM{n5L2A?wO5i39+Y?#LMUsULi=`tx<S;C&FUcQbn0P+}LsmZEc+h3&a~d
zVi}Yl{V1dFF8?c{opZ2-ZfN94D=Ekbq3p?kmltvt7S`8VRJ1@#XD?BC$~BSp2^>V{
z*N?4Mo|4(_mG9A|#?<v=-{)?V?K4`L&ro4g$SX;=lJbvu%Ip|HE(D_dURW#gKmUOS
z`2n&n^?jb=UX#fm|3EA7Z7B{Xs^ak_bW=Y+{`OK#D-LK!u0gIMnbgnV7s61nWz440
z?$EyG%|AieU9a{pdthGW@PB}tGER-X2j7bg-v-<Z*dpy=k#~hD)VBlX0~RW<{R_=Z
zIjmOy4z^&#*0hoOF~uQbTz9cP!VATn!-CmKjQ)DzZ$<8G#+c7R8py}_-@(#;`eBoN
zR;0wWg!2<ROH<eWZL^GLf7xFpiDH|_Q44X9a+X06YKRX}+AF3?<qD&B^iHBOIrg=z
zGPlv)Y(N&IQElkPkXyVfmdAxXAr0l?j?rWoWH-^{&Q6G3ec~IJfd@i7d0VgF0J%Yw
zmC@W=zlY850O<NZ+O^Bk1$=*?I^4M(FQ7tF5~Ch`;~^FwwW|{58_3+-PNK@5B7RMX
zVgQqLzdh#{RM)&Iqa~rV)|K%AGb~OVZm`c8qICW!sC~qwTv%mfNK`3G5@M{P#nHnQ
zK`gFGVHzkNSk$r|0>2Ooo!aPbbX{59aU=|qP6(=JVYrb=TgAE?0<I%y09)M)<;A2`
zWf)+W#*D|9Bh!{R1d5Bw!l_cAW+!i>x_3~&K;r~`EY$g?6Es<@Okz=G838FoM44QQ
z;Wj7!9-m5^K64RUl51~kZEbauWR{*Gk(IrJDw=`Uj*VR|)}ExrqXR;+g*N<#@W8|G
z?xI<|8c11DQyQQ@LFM+bo<whcNwEF`lJnhUfGk_3x-{<?YP2n}RmPAZ$G@&~$>GN%
z5h6J+^If})l3rv<w}^@&mIP7{8D*PnA2pwV7SLglAsT7fh}DER-+_H)IiVO*P`8q?
zO}CrVK2K1(Ew8hpK$UVoRhuFfk5e)9GDy#-6z~QTdY)KKRy<yA<Gl*)@XZ(DAn=ey
z8N}8~qa3Mz8r<KsUYvheq_tyV4z6A4GE#VuXc%c?NW4iE*NiU1wM6gSgyRI*pgTxC
zVo)x)6B8Z2XT#x&+NROXXqzB)I9CF7WqZ3MuVhfo*ET`w+u^!~EDtkr_4=S~y<M}Q
zzYz1JETofxUT^b2b#*V^5TxQTdOY9?C|HdY;ygi=PRwqwB(?VOt}OzB?JEOHP;0xp
zF6ZG9o#^2PI{2;KL-Uuz({V7AFc2C0Y29htYuNDy85X6v0adE?%}e$@8DhV}QZ?K_
zxw~x`sc(l<lOw~XK8vA5NQI!hT+lCsJy`@`C@uAS*xd5#;fWO#W5AheD*dTt6>NKj
zkP&q~1u+$ys()@|=Ql*>-I@`lO5D8or>^15E5sZFMayItEa+;(<HwqxA!r-3k!HR?
z51fTN@5hZ1{{peFIy$6^_jl|VB)^9vxvj$qY1!LU#-py~ldq7fHU$_Jr=~08k#FMA
zZxHE~%Rr%^Z)h`f%IENg{&aF99h_m)S0m0@$)+oq%}s?0GJ4oeV`1*d+GLs7dy@bf
z)+{H^XCmSi)7rcTM5#rKbBhe4zwJ9_)N@kqAvc@%$lC;qrm$>z-KCMEZ!VXYBeBwn
z>D$@g6Z!>$RigCztmzNr(mhPk@3nSpOY^5-Q6KH(+zg~Hf6VRC&wd%13)DBQqU*44
zMbg0w!yyaVE9u_5#*JRK$*$l985G``uQ<q39?^~NGC()P;bIe&GOyAO`v@teOp>$G
zQc#ao2Ivig^P6R$erXuSOkX=eyhA!-$}$k#>1~@He1mjpuFIg)O}`d|XNX1lc^V^&
z#qu4}5t@^s5C3fEu|VoX3_@SHX=XGT!KEs8K9ASFR4Ka*E((jlh7!*FQlqfI&apy%
zsvv)gHDtxT{&VQP|ANK|vN>sjo<v6Vz1344;A8<=bRR{itB`Cpi1JOXWpYw(V6?T6
zSU_XkiC2;Lvis(4SUU|=M7%fxMy0pP+5+PNYb%=yz-%}&Z}@x6Foc)DCT`)-v$I8<
zR6*q_6HIR4k7(-7#O+T#PTAl>CCCcTTex}d`B2Q7Vaoc>Wz1zwhR>+%j<<*o&zy>A
z^|0ddCC>LpIPrYcLO=r}mjWKQJ%WdeFDQLTwiQuk-?OwwPqFcG`u&Ju*F?1uH4oI6
zT_32@dg4##Z}h)7*cPC6(03@svHBYvo;%Wb!=@M%OHN_Bbz5aTd@Ic66;XhU6XUy=
zC6^ofbpPxP2L;t$`Jr!23ve&tMzeo_=YVf(uZ%sNKD(A3O0oP2kNSuTW{*z$FzrO9
z4Ws@6POUWEQJ|lozNSz-vvx1}fn)#p61&#uJF)XprvRf`6)fl9&s2%WrY-^q`;NI8
z+3=Z`^Lvf({SRcMZEVn3G*?tFtmx@hFkHj_!%rnkFFH{UO-Z5G>7--b4ya*2ZKVpy
zAp9tWKsB*Sed{`SAK{H!7PjRPLF!(gMYU5{@%zzlt1BDz{8m!1D%e8V|8I}K+a=R<
z=|C2=&iXC$%-3#THS9lq^fciSWm*W-!%MX6O|A<6?E!9c1ltg{P&FLRsZVsT23S|R
zc+Gq<Q}}LySDRZRtm?O9`PiZ@^t1r_r_zeB%q6Fd`<hW6&dq+ldcJh>Kgz7txXv5J
zWe4u?PD!@(Ue)EpOG{R1li9N1hK?wo!|<URhaE(%IAwL>+WyiZ=CTzHO@wAQz7MOG
zZy(1^yuLarF;JguHd5`g1#iL8ssXf)`U`K91|eN)RsO3!`-Fb>=V2GGVRuMXF5~Ce
zps7<=W*zo5?4La{)A0V#Dn`s{_PDES(q$cX7vC68N7Tv{ozI_ox-XYy54K}*YeG)`
z`;anab8&~a`-I($BFa(3zmlJ|=0fTb_i%FysrQbQw<6}R#C-1SPQxRfK*Oe|NGmQ%
zoP2YOMEru!dwfAYAEtK_kH|@$_Vo5rws(A!VjQd{a!x*W%>!6-S@mdp{Gc99Bq?*`
zs}~bdGXKe2xTxYt*wBH^oQ1B88ej|7*7@>yz}t*N*|W_YfmDu*$(pP7dA`2jk!=gK
z5m|Mje|I<zH&^cgpP}|4td2fH^W{GETSJz0wM@UOrK%QEDKUtlpd9n6z_%_|7rgVD
zXn973Zt%rAvCsDTvrpG&4^y7$ioSfs7fW$pKP@&FIwu5h>6TcMYWKC&##;-7_c<$H
zK0bUbtvJOiHsV;5MeBTVVVkYA(tQuSH73in&UFu`MGKoI9TcTEFBwhcCS|_I61F4Q
zNfW<Q`|8LmLGb}Bd2eYEn_uo*@2JDyCb?sawi)6;w*xtV<Run;+^Q9pXzT<J3j8Jg
zo{)}-Q-FT`$d-WMde~$^wYg`11%{@}5u!HxH@tmgFjM(6?0$Jm1R5mVqQSoZ1KsYO
zYHl{TVsMrRJ&M+B`~HQSBhLpUvVWW5&>$)Bd47HwFt#WS&GT7}ALSL@IPuT;L0XY{
z{?Imw=<%d^KY11xy5UG_cCLzN#MK=)di^>(U-&_3pLmH$jDDZt^6HeW6YMqQgG7q7
zSqLXhwUa5|B;RV-k6U_S!BI#F`a5JQ`BgWmy>*7X@D?OX(!(ME`oC$f2v+9Jv)K<m
zTgc=+qgBoZxa*Z3?6NKR{Y;Y)WFw_-<K6rOpX6>!W7t1@n;OvY{GsGfqDpmo#OsIi
z0M7xv16xt~e)}X~G?dRi&N$`F0IA#&IXIq<*WitG_Y2Av9hH=4ri|<!%tB`ccMHC6
z@#M5o#I#EwC~?06$%ii^=fWp?a#6u;@dUgfg+j5dWqq@w-yJ`?_h&uWsz1DpH`Z~}
zs(pWW^->+8sWf6I;+<%X88_Y5b;_#O0u{R|C}_kg;Q-Bf_3qP-J-(|e9KVCCmz=_Q
zOJb}p?FEOLhsxp@_N7&ScR~O4C%n1BiDpvt@?#%Va~{gSU!tiCkEka;A)2o?x;+U*
zPVeKS_Y0?^Ok&DRYM?q3U}aru>#=`%v?G8VY*TjV>ME!d*tolu``1Sjh0elE6nuH!
zvW0D%Pb<zZdJxCkoN0^wG2OCyuQ6Wd+zTIuvzKYJ%GveuQ@Dv4*F8L;yhsWsYu1rh
z*HEGQ9L$|~1##rLbJTBt$mcJ!)&(ye4j7H65B79jCUf*v@OgH|xZ`U+AYf-Ya@umC
z1V4pY>)4C?v1bmkT{zt^4MJ&_QziSYLgx`r-|T}{UWnOGww7JWz9jX#{CxGpBHkmS
ze+T7zGe}zt-wxmJYA9?3T6tkq8T%&g;c)3j6dii(QTtuJ$rxK^E;Ou(unCrixo*(R
zcSLVuwp^8W0S1uFZ|+?iS2D(yLu;qVC!(0=0wG;Gt7eRCWluP~EgT58wd;`4=yo+!
zs}uX0{13jLBuJ}4$^%D~F<$tw%LA_Gy;#^$DU)EHmoZ+N>TOt{alS`-)lsspQm}T&
z(&*Tqupjejv-ZlHk(Fh|5<eEYe^bD{4*Rx`T3tz$>KVEe!u_oJ4SOcBQ?^9ZvV{P3
zun5Hhd8DvoR1N=_?W?tAH=%Tz5tqlPd%9zqS8mfqYieSR3I;|U`%=|3ugTo`nxS2G
zRBxjyPq$S5=s9j``zW}C?~amNlm&Nlcbe)kRDU@XE1jT^T|vGnlXl<lqWM`d&%7Pe
z=)~aumzN^E7XbUr(G4{S(YLy;NUwn0NUgXrNbdD7XRr^6cwR>4T^?J&m;f6S-h;`E
z!90@FY*K%_s9y+;?xGBIu<&tNoz`$*g!8H(*s}R!MpA>*?0Xt{8BZ?&)ag#czUy3f
zVnrjA3s5rMe%&Z)HuD8g6XnM;)YsjklJp7DDMoXH4T7yk)hnQh?~IZ8R<Z{!V|5_k
zAT{c(nL(G$-2A@BGosV=Q3kp-6nA-OzSczjLTDA$vWyf}apsi^Yw_|cq?tEWWI&Q*
zVJ3$CaPxFU)YhLp-N+kkUX&Z_3aJ+avBlyBNs8R>1x{feTuSJY7CEMoy_W6NG1=<Z
zdYO<d7xeTa-&!F?VC24SbXw2jZEYcI!TKyC@0%^mWqC+XHubLcxybN(g$*K}2-3xs
z0uBi5_-@-tcP``rGOZ$5$n6ne<gvYpqnx+FW=6sp4Xw-YZq*ya{MOCCg0PIfPjsU^
z^kGWpZP-}vokwdgT}C<f=cwgGjMAl|f|?LU*{%fPaF+Q%Yw@5BGe(h7o_al~uz!N=
zc`AF5N|sT=$9GPxRL%$$jVhyTx!U1qbJilWR1RBODe03@;*&>_E)0#)xD|`3WxRM3
z+sm>Ds9O}K<`ObWdi0a}IaKg<pwCuY1Z9*5pU-&&AQE~_38Tp<TT*p6&PB5pno$pQ
zYi#DiC|g=}NavmKglSrW)Q*gjPo3GNRBr;B%{P?mSDR5}xYA*|KdWPY=!nI43+19)
zr6_BAcqK|+ebJha6V_?xPP-|yx?6FpNb(Nq7igTIeSP2bHU9J88My?DT$sGbu$r5v
zG|Rf*{EJalUkc%0_)$5vy@luvOGaMj{Tr%*f)kw}<2Z+lEs)Q!82^!UMt%Aij;hC6
z0ks7&o51>@*jU2#;S0>x?|;m&dC(e?QRH;OKY+}cgp7(y`Lk~(vH`IG(2koPO&=Ps
z{DL}nWs4?-|GoO<l_%!lJ1nAZLt#sGnb<rY2ypK(94MR{rXZ&&#x?i6#i3?I{RtPx
zKx9(>1FS~m7>Fxpr~A4FxqDcq1h&!+b@pey6{;<;%m7K0dqSg{e8Mj;S{P9*Mx6b<
zDUALD2zeXmIC1Jqe|yo{$Gn=OnRlGM(@J~j)bb81)akKN#3<nr=KO7d2uHTpmfMyo
z*o%()^6Z6cc7=!ssn4|!{Wa0J#dV~r`Iq{_KR*<k&|oI<1C46ma;R=_EcPPG6R2ws
zYuqW`J76pHC*h~g?%;nqA;l=f=n>(1S2?@!5oXcnGAQZUN&7sq)w5xEztf?2$d=X3
z)_!82Wrus|^94saan=tf-gjc!;ixY>^h3`yA2T1{q0J&1X|xlsD$DZc`FUJBik%!J
zzA2La!wl;nv}F(~fD`3SEpMv34>;}n@H4-@{^c5d)@y99=K4PX00960jGapogfIw1
z*K-@cXzqWp;WfID5N9^-x>BYTI-#L2SEn{I@p_`1Ak1w4dWYtvlbV)`t=uO#U7A7|
z+`2<i3A2edPt$~n7sswAh%S@;<Da42SrlONd9}uE9IY9JcL!!l$*;IGJ`s;z4N-+6
zbEl*#K~8y8n`gpC$Y64vu7s^Sl?l%{jB3?}WDP<MYgpB6qE6b!&QmhMjt$ymMl+i~
zuk<}=C4Yht&@v(dXVpkNXBPO>I4aFyM|;0qZo(PhJQ5)L#yumFPbZ`+af{9B7dPe#
zhs5Ep_v2z1f_!qX7TM<q%{X?8#0p@BL5Uf@|2Q7RwVRr%r;4tY8d;zv7y~YKNC<7C
z^M!v_`vL#}|NoSoO_t;!41?Emo0t#?mzZ^O|BDQtOm!%r=2dUHyg;3oA40HYDSADj
zKGB=s0ULcd8|?ZnU3=j#i!!4Ael$BuJ{CG8^e{59Nn95R^nMs!cc*YD10G+d`#!a+
zwSQY;Y5Cls0=Mg5?CDKYw-xv<hGn)3{L*vrqYW@_8+|zM-a?Oheb=9C(VOrvtY(US
zW5d=(*Dx*xG|wGD0llYx-FRe76{U8V1G8uP6KaqZly$}+ya4o(^D4uINI{*L%f7Hy
zD7YvTm^?3~t0(izZK*2e-Nu*exyc>NL`jqm`j(eT?f5zqM1&#TF=da_BdV{@e*XPJ
zN6#Dptnc+q{elQem2Ab_mQ|Jr&(RJiiCnCvbT^+`UQ1AfAMyH84A$<EMfzSK5)x%a
zAs@@@*UY6ZRB`G`rH+CcMMb-p9X`&G`@4yB|6(HDznVz*w-f39-9);-oJh$FXg4M=
zi3J#y5s|P|emQ3*zyqKq5#5JPzqZU&J;PBqObj=~?$z}SeRC|dcjKdLM8M#(CQuNz
zF@f)o%uOYZv@gXq{n}bn(EG`5UhFKdQPa|cwSr$dUVtOY$dgoo2g_trO0TDYnkTI^
zHE_<UIXfau^6ou`)ENg!he^7`SzvMGnf4H**VzNpk02Q|(<k0YJn9fR(z-&B5Y4mb
z$)<@8+Cz%&+E#b5kW<`ZH{^*Z(Ls<i-aUYD>HfO=sQ2`SP}>;UN)O^%I`apRV-E7d
z&9sX%@on)P(Ud;{oLhkcP1bQE+3)&aGi9Jd&_t;0m1VVRxyAQBlJYvl5m-?O4xSNn
zLQt3*Q#wd?O&0=eSWG#?H0D8?;I9y66YU4ja=sGH<^7I{Zr4N7fp}I8PO%(aJt0l}
z#0R3@B&iB-1pzzr`qP-m=n+AW5y37Mw<e;Hrz`wG>PNL?m`hz=pl_-Ib&gco9^sPq
z1h|j=F%_W7d>1>!LA93v!W6t-U9M5v=M7Ta9;;-TmYWJtY3g+75RhL(3&=|4l|tJC
zATyyp6zVXPGfd^DJnku)R;&=H5}gWmXtixA9rrwMh@{it^Iop1?vFCj5<3KhSIO7H
zWaJo50-kPqhdB4x)TBuonhH--V{(t!Q*iz1ofNF}&^Yt)cI4OV+7NEoBsLp8Sk{p`
zK*Flq0a+V#*E^W{%Q^%uYS9DJA>7#OGk|_=0a9@Fd;jztd+6REJC$sLsP4sWRRV5W
zxeh3l$ZG%vM83LO5dMS6;)47G00960eVs9G!!Qg*=iJ5;EipYq(E=ShXA6)U^!Vi?
zWw#ZhPUesgE4D1t;{Tr!&^TjzRs#M5kdRSzOislsxh&L)=RR&9&BGOXqy;U4RqHfu
z+mVeJoSc@bE<8ypB7U(FCH@y=hfLvt*W6@_f>6>*{UB@%MIyjCPP_-NWIEuPBw03J
zBPq4i;>tOE!J0jX#?S4!MrKV@+z7;Z&QaZ2RdmaE;GqFoYW#w~MfD&Yl=3G*9p`IK
zh({f&?m!F4^xu3|Zm;j~niSK-`{T>W=$EU_q4?%a4rnVJ>xRpssajv2*`O_v>V_6@
zY(^O%d7-VKs9OX9`_W)yel@LtbS{Cpu^-Jgk}Vp-DMOZK&?Yd4&OfTI-$Z_hW+4cu
zbAhTyFT@PrZLs!!u%pd{zJ~@|B#Dqu_y_hd><R^Qy1Lm-!E@RM?o&@_%a{b@KE=!J
z?X#-bx8A^ad=bPB>YHc2Ycn%94x5dDwsYjY!IsvIPP}oCZE2dfQoaG4{Afx~ssI2y
CmhRdB

literal 0
HcmV?d00001

diff --git a/man/add_centromere_locations.Rd b/man/add_centromere_locations.Rd
index 5ebd865..6478d90 100644
--- a/man/add_centromere_locations.Rd
+++ b/man/add_centromere_locations.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/centromere_marking.R
+% Please edit documentation in R/chromosome_layouts.R
 \name{add_centromere_locations}
 \alias{add_centromere_locations}
 \title{add centromere information to reads by chromosome}
diff --git a/man/add_chromosome_length.Rd b/man/add_chromosome_length.Rd
new file mode 100644
index 0000000..5686d9e
--- /dev/null
+++ b/man/add_chromosome_length.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/chromosome_layouts.R
+\name{add_chromosome_length}
+\alias{add_chromosome_length}
+\title{add chromosome lengths to a cn dataframe}
+\usage{
+add_chromosome_length(cn_df, version = c("hg19", "hg38"), chrom_col = "chr")
+}
+\arguments{
+\item{cn_df}{dataframe of copy number information}
+
+\item{version}{string. hg19 (default) or hg38}
+
+\item{chrom_col}{string. name of column with chromosome information.}
+}
+\description{
+Handles chromosomes specified with "chr#" or just #/X/Y.
+}
diff --git a/man/add_telomere_positions.Rd b/man/add_telomere_positions.Rd
new file mode 100644
index 0000000..cff59de
--- /dev/null
+++ b/man/add_telomere_positions.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/chromosome_layouts.R
+\name{add_telomere_positions}
+\alias{add_telomere_positions}
+\title{add telomere positions to a CN df}
+\usage{
+add_telomere_positions(cn_df, version = c("hg19", "hg38"))
+}
+\arguments{
+\item{cn_df}{dataframe of CN information, segments or read bins. Just needs
+a 'chr' column.}
+
+\item{version}{string. hg19 (default) or hg38, not that it makes a
+difference.}
+}
+\value{
+input dataframe with new columns of telomere information
+}
+\description{
+See \code{\link[=import_telos_file]{import_telos_file()}} but really just a convenience function.
+Telomere positions in the UCSC gap file are all just 10Kb, except chr17,
+which I set to 3Kb and 5Kb, again see linked function for details.
+}
diff --git a/man/fill_state_from_neighbours.Rd b/man/fill_state_from_neighbours.Rd
index e6dfd3a..4d3ffd7 100644
--- a/man/fill_state_from_neighbours.Rd
+++ b/man/fill_state_from_neighbours.Rd
@@ -8,7 +8,7 @@ fill_state_from_neighbours(state_df, cols_to_fill = c("state"))
 }
 \arguments{
 \item{state_df}{dataframe of state data with NAs for some bins that need
-to be filled in. E.g., after dlptools::running add_missing_bins_for_cells()}
+to be filled in. E.g., after dlptools::add_missing_bins_for_cells()}
 }
 \description{
 This function fills in NAs of columns with the information from an upstream
diff --git a/man/import_telos_file.Rd b/man/import_telos_file.Rd
new file mode 100644
index 0000000..fed1dca
--- /dev/null
+++ b/man/import_telos_file.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/chromosome_layouts.R
+\name{import_telos_file}
+\alias{import_telos_file}
+\title{load UCSC gap files for telomeres}
+\usage{
+import_telos_file(version = c("hg19", "hg38"))
+}
+\arguments{
+\item{version}{string. hg19 (default) or hg38}
+}
+\value{
+tibble of telomere information
+}
+\description{
+Honestly, this function is sort of pointless. The telomeres in both genome
+versions are marked as 10Kb for p and q on all chromosomes. Telomeres are
+variable and better estimates of their size exist, e.g.,
+https://www.nature.com/articles/s41467-024-48917-7
+}
+\details{
+This function just serves as a way to set up easy marking of where CNs occur
+on their respective chromosome.
+
+The only exception to the 10Kb size is chr17. It is not listed in the gap
+file for hg19. chr17 is known to have small chromosomes. Here, I set the p
+to 3000 Kb and q to 5000 Kb, inferred from the article referenced above.
+}
diff --git a/man/is_chr_used_in_chroms.Rd b/man/is_chr_used_in_chroms.Rd
new file mode 100644
index 0000000..c47a6a6
--- /dev/null
+++ b/man/is_chr_used_in_chroms.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{is_chr_used_in_chroms}
+\alias{is_chr_used_in_chroms}
+\title{detect if chromosomes labels include "chr"}
+\usage{
+is_chr_used_in_chroms(chrom_vec)
+}
+\arguments{
+\item{chrom_vec}{vector of chromosome labels, probably pulled from a
+dataframe of cn information.}
+}
+\description{
+detect if chromosomes labels include "chr"
+}
diff --git a/man/load_chrom_info_file.Rd b/man/load_chrom_info_file.Rd
index a93a162..c809b40 100644
--- a/man/load_chrom_info_file.Rd
+++ b/man/load_chrom_info_file.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/bin_filling.R
+% Please edit documentation in R/chromosome_layouts.R
 \name{load_chrom_info_file}
 \alias{load_chrom_info_file}
 \title{loading UCSC chromosome length files}
diff --git a/man/load_ucsc_centromeres.Rd b/man/load_ucsc_centromeres.Rd
index 03c8d60..6af3c7d 100644
--- a/man/load_ucsc_centromeres.Rd
+++ b/man/load_ucsc_centromeres.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/centromere_marking.R
+% Please edit documentation in R/chromosome_layouts.R
 \name{load_ucsc_centromeres}
 \alias{load_ucsc_centromeres}
 \title{load and prep default centromerefiles from UCSC}
diff --git a/man/mark_bins_overlapping_centromeres.Rd b/man/mark_bins_overlapping_centromeres.Rd
index e0bac20..cb32121 100644
--- a/man/mark_bins_overlapping_centromeres.Rd
+++ b/man/mark_bins_overlapping_centromeres.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/centromere_marking.R
+% Please edit documentation in R/chromosome_layouts.R
 \name{mark_bins_overlapping_centromeres}
 \alias{mark_bins_overlapping_centromeres}
 \title{add boolean if bin overlaps with a centromere.}
diff --git a/man/mark_segs_chromosome_span.Rd b/man/mark_segs_chromosome_span.Rd
new file mode 100644
index 0000000..d27ecbc
--- /dev/null
+++ b/man/mark_segs_chromosome_span.Rd
@@ -0,0 +1,67 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/chromosome_layouts.R
+\name{mark_segs_chromosome_span}
+\alias{mark_segs_chromosome_span}
+\title{label CN segments based on relative chromosomal positions}
+\usage{
+mark_segs_chromosome_span(
+  segs_df,
+  min_bound_distance = 5e+05,
+  min_span_of_chrom = 0.9,
+  min_span_of_arm = 0.9,
+  acro_fix_whole_chrom = FALSE
+)
+}
+\arguments{
+\item{segs_df}{dataframe of CN segments}
+
+\item{min_bound_distance}{integer. Distance to adjacent feature to be
+considered associated with that feature.}
+
+\item{min_span_of_chrom}{float. Proportion of the chromosome to cover to be
+considered a whole chromosome segment.}
+
+\item{min_span_of_arm}{float. Proportion of arm to cover to be considered an
+arm segment.}
+
+\item{acro_fix_whole_chrom.}{boolean. Whether to reset acrocentric
+chromosome CN segments to "whole-chrom" if they span futher than the Q arm.
+Honestly, probably not useful}
+}
+\value{
+input dataframe, annotated with segment scale information. Primary
+column of interest being seg_span_event.
+}
+\description{
+Uses centromere and telomere coordinates to label where a segment sits on a
+chromosome, relative to telomeres and centromeres.
+}
+\details{
+Possible categories are:
+\enumerate{
+\item telomere bound (telo-bound) - segment touches a telomere
+\item centromere bound (centro-bound) - segment touches or crosses the
+centromere
+\item arm (arm) - segment spans a whole are (*with conditions)
+\item whole chromosome (whole-chrom) - segment spans the entire chromosome
+(*with conditions)
+\item intersitial (inert) - occuring within the chromosome, not touching the
+centromere, telomeres, and not big enough to be an entire arm.
+}
+
+You can set a min_bound_distance which reflects how close a feature needs to
+be to be considered "touching". For example, we can considere a segment
+telomere bound if within traditional 1 DLP bin by setting this distance to
+500k (default). This allows for some level of measurement error.
+
+Users can also set the proportion of the arm or whole chromosome a segment
+needs to span to be considered either category. Default is spaning 90\% of
+either feature. Meaning, if the segment (end - start)/arm_length is at least
+90\% of the arm_length, the segment is considered an "arm" spanning segment.
+
+This input dataframe requires several columns to be pregenerated. Users need
+to run \code{\link[=add_chromosome_length]{add_chromosome_length()}},
+\code{\link[=add_centromere_locations]{add_centromere_locations()}}, and
+\code{\link[=add_telomere_positions]{add_telomere_positions()}}. See the the \href{https://molonc.github.io/dlptools/articles/feature_extraction.html}{features vignette} for more
+details
+}
diff --git a/vignettes/feature_extraction.Rmd b/vignettes/feature_extraction.Rmd
index b483501..b6f26fb 100644
--- a/vignettes/feature_extraction.Rmd
+++ b/vignettes/feature_extraction.Rmd
@@ -106,3 +106,25 @@ sig_feats[1:4, 1:4]
 <br>
 <br>
 <br>
+
+## More Features: ploidy informed CN change
+
+CN change relative to ploidy might be interesting to have a feature. Here we can mark 3 types of events for segments:
+
+1. CN is a gain relative to mode ploidy of sample/cell
+2. CN is a loss relative to mode ploidy of sample/cell
+3. CN is a match to mode ploidy
+
+Employs the function dlptools::mode_ploidy() to calculate mode ploidy of each sample/cell.
+
+```{r}
+dlptools::ploidy_relative_cn_state(
+  segs_df,
+  # with option just to annotate the input df
+  # annotate_input = TRUE
+  # or, alternatively, return a count matrix
+  # return_matrix = TRUE
+) |>
+  dplyr::slice_head(n = 6)
+```
+

From c4d3628e2b509cd7d9a8b908a89e4e4d9f961149 Mon Sep 17 00:00:00 2001
From: benjaminfurman <benjamin.ls.furman@gmail.com>
Date: Wed, 27 Aug 2025 16:02:54 -0700
Subject: [PATCH 4/7] feat: count features of segment span types.

---
 NAMESPACE                                     |  4 +-
 R/chromosome_layouts.R                        | 65 +++++++++++--------
 R/cn_features.R                               | 55 ++++++++++++++--
 man/SEG_CHROM_EVENT_LABELS.Rd                 | 16 +++++
 man/add_centromere_locations.Rd               |  4 +-
 ..._state.Rd => extract_ploidy_cn_feature.Rd} | 10 +--
 man/extract_segment_position_feature.Rd       | 37 +++++++++++
 man/extract_wu_features.Rd                    |  5 +-
 man/mark_segs_chromosome_span.Rd              | 10 +--
 vignettes/feature_extraction.Rmd              |  2 +-
 10 files changed, 164 insertions(+), 44 deletions(-)
 create mode 100644 man/SEG_CHROM_EVENT_LABELS.Rd
 rename man/{ploidy_relative_cn_state.Rd => extract_ploidy_cn_feature.Rd} (71%)
 create mode 100644 man/extract_segment_position_feature.Rd

diff --git a/NAMESPACE b/NAMESPACE
index bded12f..03c139b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -7,6 +7,7 @@ export(BAF_COLORS)
 export(CNV_COLOURS)
 export(DEFAULT_CONTINUOUS_COLOR_RANGE)
 export(RAW_COPY_COLORS)
+export(SEG_CHROM_EVENT_LABELS)
 export(STATE_COLORS)
 export(add_centromere_locations)
 export(add_missing_bins_for_cells)
@@ -25,6 +26,8 @@ export(convert_long_reads_to_wide)
 export(create_expected_bins)
 export(cust_mode)
 export(expand_length_to_bins)
+export(extract_ploidy_cn_feature)
+export(extract_segment_position_feature)
 export(extract_sigminer_wang_features)
 export(extract_wu_features)
 export(factor_column_mixedsort)
@@ -58,7 +61,6 @@ export(mark_mask_regions)
 export(mark_segs_chromosome_span)
 export(medicc_profiles_to_foreground)
 export(pairwise_bin_difference)
-export(ploidy_relative_cn_state)
 export(plot_bg_state_highlight)
 export(plot_fg_state_highlight)
 export(plot_heatmap_of_tip_changes)
diff --git a/R/chromosome_layouts.R b/R/chromosome_layouts.R
index b67aa44..0630ef4 100644
--- a/R/chromosome_layouts.R
+++ b/R/chromosome_layouts.R
@@ -1,3 +1,13 @@
+#' global of event labels for [dlptools::mark_segs_chromosome_span()]
+#'
+#' Set as a global, as useful in other contexts
+#' @export
+SEG_CHROM_EVENT_LABELS <- c(
+  arm = "arm", whole = "whole-chrom", telo = "telo-bound", centro = "centro-bound",
+  inter = "inter"
+)
+
+
 #' loading UCSC chromosome length files
 #' @param version default "hg19", can also load "hg38"
 #' @return tibgble of chromosome, total length, etc.
@@ -54,26 +64,27 @@ add_chromosome_length <- function(cn_df, version = c("hg19", "hg38"), chrom_col
 #'
 #' See dlptools::read_and_prep_ucsg_cenrtomeres() for details of file origins.
 #'
+#' @param cn_df dataframe of cn states for read bins or segments
 #' @param centro_file NULL or string to the path if you download yourself
 #' @param hg19 boolean to target hg19 for loading
 #' @param hg38 boolean to target hg38 for loading
 #' @return input table with centromere information added by chromosome
 #' @export
 add_centromere_locations <- function(
-    reads_df, centro_file = NULL, version = c("hg19", "hg38")) {
+    cn_df, centro_file = NULL, version = c("hg19", "hg38")) {
   version_choice <- match.arg(version)
 
   centros <- load_ucsc_centromeres(
     centro_file = centro_file, version = version_choice
   )
 
-  reads_df <- dplyr::left_join(
-    reads_df,
+  cn_df <- dplyr::left_join(
+    cn_df,
     centros,
     by = dplyr::join_by(chr == chrom)
   )
 
-  return(reads_df)
+  return(cn_df)
 }
 
 #' add boolean if bin overlaps with a centromere.
@@ -316,11 +327,10 @@ add_telomere_positions <- function(cn_df, version = c("hg19", "hg38")) {
 #' either feature. Meaning, if the segment (end - start)/arm_length is at least
 #' 90% of the arm_length, the segment is considered an "arm" spanning segment.
 #'
-#' This input dataframe requires several columns to be pregenerated. Users need
-#' to run [dlptools::add_chromosome_length()],
+#' This function runs several other functions including:
+#' [dlptools::add_chromosome_length()],
 #' [dlptools::add_centromere_locations()], and
-#' [dlptools::add_telomere_positions()]. See the the [features vignette](https://molonc.github.io/dlptools/articles/feature_extraction.html) for more
-#' details
+#' [dlptools::add_telomere_positions()].
 #'
 #' @param segs_df dataframe of CN segments
 #' @param min_bound_distance integer. Distance to adjacent feature to be
@@ -329,6 +339,7 @@ add_telomere_positions <- function(cn_df, version = c("hg19", "hg38")) {
 #' arm segment.
 #' @param min_span_of_chrom float. Proportion of the chromosome to cover to be
 #' considered a whole chromosome segment.
+#' @param version string. hg19 (default) or hg38
 #' @param acro_fix_whole_chrom. boolean. Whether to reset acrocentric
 #' chromosome CN segments to "whole-chrom" if they span futher than the Q arm.
 #' Honestly, probably not useful
@@ -340,20 +351,12 @@ mark_segs_chromosome_span <- function(
     min_bound_distance = 5e5, # given scale of DLP, this should probably be 1 bin width, at least
     min_span_of_chrom = 0.9,
     min_span_of_arm = 0.9,
+    version = c("hg19", "hg38"),
     acro_fix_whole_chrom = FALSE) {
-  required_cols <- c("teloend_p", "telostart_q", "centro_start", "centro_end", "total_length")
-  if (!all(required_cols %in% colnames(segs_df))) {
-    stop(
-      paste(
-        "required columns missing! Need:",
-        paste(required_cols, collapse = ", "),
-        "see functions: dlptools::add_chromosome_length, dlptools::add_centromere_locations, dlptools::add_telomere_positions",
-        sep = "\n"
-      )
-    )
-  }
-
   segs_df <- segs_df |>
+    add_chromosome_length(version = version) |>
+    add_centromere_locations(version = version) |>
+    add_telomere_positions(version = version) |>
     dplyr::mutate(
       # find closest telomere to the CN event
       telo_p_dist = start - teloend_p,
@@ -399,12 +402,12 @@ mark_segs_chromosome_span <- function(
       # trying to be simple and Shih et al. 2023 10.1038/s41586-023-06266-3
       # inspired
       seg_span_event = dplyr::case_when(
-        spans_chrom ~ "whole-chrom",
-        centro_bound & telo_bound & !spans_centro ~ "arm",
-        !telo_bound & !centro_bound & !spans_chrom & spans_arm ~ "arm",
-        telo_bound & !centro_bound & !spans_chrom ~ "telo-bound",
-        centro_bound | spans_centro ~ "centro-bound",
-        .default = "inter"
+        spans_chrom ~ SEG_CHROM_EVENT_LABELS["whole"], # "whole-chrom",
+        centro_bound & telo_bound & !spans_centro ~ SEG_CHROM_EVENT_LABELS["arm"],
+        !telo_bound & !centro_bound & !spans_chrom & spans_arm ~ SEG_CHROM_EVENT_LABELS["arm"],
+        telo_bound & !centro_bound & !spans_chrom ~ SEG_CHROM_EVENT_LABELS["telo"], # "telo-bound",
+        centro_bound | spans_centro ~ SEG_CHROM_EVENT_LABELS["centro"], # "centro-bound"
+        .default = SEG_CHROM_EVENT_LABELS["inter"]
       )
     )
 
@@ -423,11 +426,19 @@ mark_segs_chromosome_span <- function(
         ),
         seg_span_event = dplyr::case_when(
           !(chr %in% acro_chroms) ~ seg_span_event,
-          acro_span >= q_span ~ "whole-chrom",
+          acro_span >= q_span ~ SEG_CHROM_EVENT_LABELS["whole"],
           .default = seg_span_event
         )
       )
   }
 
+  segs_df <- dplyr::mutate(
+    segs_df,
+    seg_span_event = factor(
+      seg_span_event,
+      levels = unname(SEG_CHROM_EVENT_LABELS)
+    )
+  )
+
   return(segs_df)
 }
diff --git a/R/cn_features.R b/R/cn_features.R
index 5dbff35..7d2cbd2 100644
--- a/R/cn_features.R
+++ b/R/cn_features.R
@@ -23,7 +23,8 @@
 #' in their actual code is set to 1. This function follows their code, but can
 #' be altered.
 #'
-#' @param segs_df a dataframe of CN segments
+#' @param segs_df dataframe. CN segments
+#' @param sample_col string. Name of the column with cell_id/other sample name
 #' @param state_bin_max int. Maximum CN to consider for bins. All CNs of this
 #' value and higher are grouped together. Default of 5 follows paper.
 #' @param bin_breaks floats, how to break up segment sizes. Bins will be one
@@ -35,7 +36,7 @@
 #' @param ... can pass change_split_val to alter critical value for AA/BB split
 #' @return default return is a tibble of feature counts for each cell id.
 #' @export
-extract_wu_features <- function(segs_df, state_bin_max = 5, bin_breaks = NA, annotate_input = FALSE, return_matrix = FALSE, ...) {
+extract_wu_features <- function(segs_df, sample_col = "cell_id", state_bin_max = 5, bin_breaks = NA, annotate_input = FALSE, return_matrix = FALSE, ...) {
   # paper code: https://github.com/XSLiuLab/single-cell-CNA-signature/blob/main/code/divide_feature.R
 
   # default segment size breakpoints
@@ -61,7 +62,7 @@ extract_wu_features <- function(segs_df, state_bin_max = 5, bin_breaks = NA, ann
       !dplyr::if_any(c(cn_bin, seg_bin, seg_change, seg_shape), is.na)
     ) |>
     dplyr::group_by(
-      cell_id, cn_bin, seg_bin, seg_change, seg_shape,
+      .data[[sample_col]], cn_bin, seg_bin, seg_change, seg_shape,
       .drop = FALSE
     ) |>
     dplyr::count() |>
@@ -221,12 +222,13 @@ extract_sigminer_wang_features <- function(segs_df) {
 #' Marks CN segments as a gain or loss, relative to the mode ploidy of the
 #' sample.
 #'
-#' @param segs_df a dataframe of CN segments
+#' @param segs_df dataframe. CN segments
+#' @param sample_col string. Name of the column with cell_id/other sample name
 #' @param annotate_input boolean. return input dataframe annotating each
 #' @param return_matrix boolean. Return a cell-by-feature matrix of counts.
 #' @return tibble/dataframe of counts
 #' @export
-ploidy_relative_cn_state <- function(
+extract_ploidy_cn_feature <- function(
     segs_df,
     sample_col = "cell_id",
     annotate_input = FALSE,
@@ -266,3 +268,46 @@ ploidy_relative_cn_state <- function(
 
   return(feat_count)
 }
+
+
+#' count the segment-span-on-chromosome event types.
+#'
+#' Critical to this fuction is [dlptools::mark_segs_chromosome_span()]. It is
+#' important to read and understand that function and its arguments.
+#'
+#' This function basically just calls [dlptools::mark_segs_chromosome_span()]
+#' and summarizes the results. Arguments can be passed to that underlying
+#' function. Passing no arguments means you are happy with the defaults. See
+#' [dlptools::mark_segs_chromosome_span()] to understand what the defaults are.
+#'
+#' @param segs_df dataframe of CN segments
+#' @param sample_col string. Name of the column with cell_id/other sample name
+#' @param annotate_input boolean. return input dataframe annotating each
+#' segment.
+#' @param return_matrix boolean. Return a cell-by-feature matrix of counts.
+#' @return tibble/dataframe of counts
+#' @export
+extract_segment_position_feature <- function(
+    segs_df,
+    sample_col = "cell_id",
+    annotate_input = FALSE,
+    return_matrix = FALSE,
+    ...) {
+  segs_df <- mark_segs_chromosome_span(segs_df, ...)
+
+  if (annotate_input) {
+    return(segs_df)
+  }
+
+  seg_span_counts <- segs_df |>
+    dplyr::group_by(.data[[sample_col]], .drop = FALSE) |>
+    dplyr::count(seg_span_event) |>
+    dplyr::ungroup()
+
+  if (return_matrix) {
+    feat_mtx <- make_cellid_matrix(seg_span_counts, "seg_span_event", "n")
+    return(feat_mtx)
+  }
+
+  return(seg_span_counts)
+}
diff --git a/man/SEG_CHROM_EVENT_LABELS.Rd b/man/SEG_CHROM_EVENT_LABELS.Rd
new file mode 100644
index 0000000..d44bc73
--- /dev/null
+++ b/man/SEG_CHROM_EVENT_LABELS.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/chromosome_layouts.R
+\docType{data}
+\name{SEG_CHROM_EVENT_LABELS}
+\alias{SEG_CHROM_EVENT_LABELS}
+\title{global of event labels for \code{\link[=mark_segs_chromosome_span]{mark_segs_chromosome_span()}}}
+\format{
+An object of class \code{character} of length 5.
+}
+\usage{
+SEG_CHROM_EVENT_LABELS
+}
+\description{
+Set as a global, as useful in other contexts
+}
+\keyword{datasets}
diff --git a/man/add_centromere_locations.Rd b/man/add_centromere_locations.Rd
index 6478d90..c9a9c85 100644
--- a/man/add_centromere_locations.Rd
+++ b/man/add_centromere_locations.Rd
@@ -5,12 +5,14 @@
 \title{add centromere information to reads by chromosome}
 \usage{
 add_centromere_locations(
-  reads_df,
+  cn_df,
   centro_file = NULL,
   version = c("hg19", "hg38")
 )
 }
 \arguments{
+\item{cn_df}{dataframe of cn states for read bins or segments}
+
 \item{centro_file}{NULL or string to the path if you download yourself}
 
 \item{hg19}{boolean to target hg19 for loading}
diff --git a/man/ploidy_relative_cn_state.Rd b/man/extract_ploidy_cn_feature.Rd
similarity index 71%
rename from man/ploidy_relative_cn_state.Rd
rename to man/extract_ploidy_cn_feature.Rd
index 67fd481..ba012f7 100644
--- a/man/ploidy_relative_cn_state.Rd
+++ b/man/extract_ploidy_cn_feature.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cn_features.R
-\name{ploidy_relative_cn_state}
-\alias{ploidy_relative_cn_state}
+\name{extract_ploidy_cn_feature}
+\alias{extract_ploidy_cn_feature}
 \title{Could changes of state relative to ploidy.}
 \usage{
-ploidy_relative_cn_state(
+extract_ploidy_cn_feature(
   segs_df,
   sample_col = "cell_id",
   annotate_input = FALSE,
@@ -12,7 +12,9 @@ ploidy_relative_cn_state(
 )
 }
 \arguments{
-\item{segs_df}{a dataframe of CN segments}
+\item{segs_df}{dataframe. CN segments}
+
+\item{sample_col}{string. Name of the column with cell_id/other sample name}
 
 \item{annotate_input}{boolean. return input dataframe annotating each}
 
diff --git a/man/extract_segment_position_feature.Rd b/man/extract_segment_position_feature.Rd
new file mode 100644
index 0000000..c80553f
--- /dev/null
+++ b/man/extract_segment_position_feature.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/cn_features.R
+\name{extract_segment_position_feature}
+\alias{extract_segment_position_feature}
+\title{count the segment-span-on-chromosome event types.}
+\usage{
+extract_segment_position_feature(
+  segs_df,
+  sample_col = "cell_id",
+  annotate_input = FALSE,
+  return_matrix = FALSE,
+  ...
+)
+}
+\arguments{
+\item{segs_df}{dataframe of CN segments}
+
+\item{sample_col}{string. Name of the column with cell_id/other sample name}
+
+\item{annotate_input}{boolean. return input dataframe annotating each
+segment.}
+
+\item{return_matrix}{boolean. Return a cell-by-feature matrix of counts.}
+}
+\value{
+tibble/dataframe of counts
+}
+\description{
+Critical to this fuction is \code{\link[=mark_segs_chromosome_span]{mark_segs_chromosome_span()}}. It is
+important to read and understand that function and its arguments.
+}
+\details{
+This function basically just calls \code{\link[=mark_segs_chromosome_span]{mark_segs_chromosome_span()}}
+and summarizes the results. Arguments can be passed to that underlying
+function. Passing no arguments means you are happy with the defaults. See
+\code{\link[=mark_segs_chromosome_span]{mark_segs_chromosome_span()}} to understand what the defaults are.
+}
diff --git a/man/extract_wu_features.Rd b/man/extract_wu_features.Rd
index 402a04c..fa684d3 100644
--- a/man/extract_wu_features.Rd
+++ b/man/extract_wu_features.Rd
@@ -6,6 +6,7 @@
 \usage{
 extract_wu_features(
   segs_df,
+  sample_col = "cell_id",
   state_bin_max = 5,
   bin_breaks = NA,
   annotate_input = FALSE,
@@ -14,7 +15,9 @@ extract_wu_features(
 )
 }
 \arguments{
-\item{segs_df}{a dataframe of CN segments}
+\item{segs_df}{dataframe. CN segments}
+
+\item{sample_col}{string. Name of the column with cell_id/other sample name}
 
 \item{state_bin_max}{int. Maximum CN to consider for bins. All CNs of this
 value and higher are grouped together. Default of 5 follows paper.}
diff --git a/man/mark_segs_chromosome_span.Rd b/man/mark_segs_chromosome_span.Rd
index d27ecbc..4a0a008 100644
--- a/man/mark_segs_chromosome_span.Rd
+++ b/man/mark_segs_chromosome_span.Rd
@@ -9,6 +9,7 @@ mark_segs_chromosome_span(
   min_bound_distance = 5e+05,
   min_span_of_chrom = 0.9,
   min_span_of_arm = 0.9,
+  version = c("hg19", "hg38"),
   acro_fix_whole_chrom = FALSE
 )
 }
@@ -24,6 +25,8 @@ considered a whole chromosome segment.}
 \item{min_span_of_arm}{float. Proportion of arm to cover to be considered an
 arm segment.}
 
+\item{version}{string. hg19 (default) or hg38}
+
 \item{acro_fix_whole_chrom.}{boolean. Whether to reset acrocentric
 chromosome CN segments to "whole-chrom" if they span futher than the Q arm.
 Honestly, probably not useful}
@@ -59,9 +62,8 @@ needs to span to be considered either category. Default is spaning 90\% of
 either feature. Meaning, if the segment (end - start)/arm_length is at least
 90\% of the arm_length, the segment is considered an "arm" spanning segment.
 
-This input dataframe requires several columns to be pregenerated. Users need
-to run \code{\link[=add_chromosome_length]{add_chromosome_length()}},
+This function runs several other functions including:
+\code{\link[=add_chromosome_length]{add_chromosome_length()}},
 \code{\link[=add_centromere_locations]{add_centromere_locations()}}, and
-\code{\link[=add_telomere_positions]{add_telomere_positions()}}. See the the \href{https://molonc.github.io/dlptools/articles/feature_extraction.html}{features vignette} for more
-details
+\code{\link[=add_telomere_positions]{add_telomere_positions()}}.
 }
diff --git a/vignettes/feature_extraction.Rmd b/vignettes/feature_extraction.Rmd
index b6f26fb..b3ac2b2 100644
--- a/vignettes/feature_extraction.Rmd
+++ b/vignettes/feature_extraction.Rmd
@@ -118,7 +118,7 @@ CN change relative to ploidy might be interesting to have a feature. Here we can
 Employs the function dlptools::mode_ploidy() to calculate mode ploidy of each sample/cell.
 
 ```{r}
-dlptools::ploidy_relative_cn_state(
+dlptools::extract_ploidy_cn_feature(
   segs_df,
   # with option just to annotate the input df
   # annotate_input = TRUE

From 5686d661b061773fddc62daa8e3cfbea734755fd Mon Sep 17 00:00:00 2001
From: benjaminfurman <benjamin.ls.furman@gmail.com>
Date: Thu, 28 Aug 2025 11:35:35 -0700
Subject: [PATCH 5/7] feat: document new CN attribute functions.

Also does a bunch of refactoring to make functions more generally useable.
---
 NAMESPACE                         |   5 +-
 R/chromosome_layouts.R            |  35 ++--
 R/cn_features.R                   |  28 +---
 R/colours.R                       |   2 +
 R/ploidy.R                        |  60 +++++++
 man/SEG_CHROM_EVENT_LABELS.Rd     |  16 --
 man/extract_ploidy_cn_feature.Rd  |   7 +-
 man/mark_cn_relative_to_ploidy.Rd |  37 +++++
 man/mark_segs_chromosome_span.Rd  |   2 +-
 vignettes/feature_extraction.Rmd  | 265 +++++++++++++++++++++++++++++-
 vignettes/various_functions.Rmd   | 197 +++++++++++++++-------
 11 files changed, 534 insertions(+), 120 deletions(-)
 delete mode 100644 man/SEG_CHROM_EVENT_LABELS.Rd
 create mode 100644 man/mark_cn_relative_to_ploidy.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 03c139b..77981cd 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,10 +4,11 @@ export("%>%")
 export(ASCN_COLORS)
 export(ASCN_PHASE_COLORS)
 export(BAF_COLORS)
+export(BASE_PLOT_THEME)
 export(CNV_COLOURS)
 export(DEFAULT_CONTINUOUS_COLOR_RANGE)
+export(GEN_PLOT_COLS)
 export(RAW_COPY_COLORS)
-export(SEG_CHROM_EVENT_LABELS)
 export(STATE_COLORS)
 export(add_centromere_locations)
 export(add_missing_bins_for_cells)
@@ -57,9 +58,11 @@ export(make_corrupt_tree_heatmap)
 export(make_tree_plot_obj)
 export(map_states_to_letters)
 export(mark_bins_overlapping_centromeres)
+export(mark_cn_relative_to_ploidy)
 export(mark_mask_regions)
 export(mark_segs_chromosome_span)
 export(medicc_profiles_to_foreground)
+export(mode_ploidy)
 export(pairwise_bin_difference)
 export(plot_bg_state_highlight)
 export(plot_fg_state_highlight)
diff --git a/R/chromosome_layouts.R b/R/chromosome_layouts.R
index 0630ef4..7694179 100644
--- a/R/chromosome_layouts.R
+++ b/R/chromosome_layouts.R
@@ -1,13 +1,3 @@
-#' global of event labels for [dlptools::mark_segs_chromosome_span()]
-#'
-#' Set as a global, as useful in other contexts
-#' @export
-SEG_CHROM_EVENT_LABELS <- c(
-  arm = "arm", whole = "whole-chrom", telo = "telo-bound", centro = "centro-bound",
-  inter = "inter"
-)
-
-
 #' loading UCSC chromosome length files
 #' @param version default "hg19", can also load "hg38"
 #' @return tibgble of chromosome, total length, etc.
@@ -121,7 +111,7 @@ mark_bins_overlapping_centromeres <- function(
       ". If that is not correct, set version to 'hg38')."
     ))
 
-    reads_df <- add_centromere_locations(reads_df = reads_df)
+    reads_df <- add_centromere_locations(cn_df = reads_df)
   }
 
   reads_df <- reads_df |>
@@ -314,7 +304,7 @@ add_telomere_positions <- function(cn_df, version = c("hg19", "hg38")) {
 #' 3. arm (arm) - segment spans a whole are (*with conditions)
 #' 4. whole chromosome (whole-chrom) - segment spans the entire chromosome
 #' (*with conditions)
-#' 5. intersitial (inert) - occuring within the chromosome, not touching the
+#' 5. intersitial (inter) - occuring within the chromosome, not touching the
 #' centromere, telomeres, and not big enough to be an entire arm.
 #'
 #' You can set a min_bound_distance which reflects how close a feature needs to
@@ -353,6 +343,11 @@ mark_segs_chromosome_span <- function(
     min_span_of_arm = 0.9,
     version = c("hg19", "hg38"),
     acro_fix_whole_chrom = FALSE) {
+  event_labels <- c(
+    arm = "arm", whole = "whole-chrom", telo = "telo-bound",
+    centro = "centro-bound", inter = "inter"
+  )
+
   segs_df <- segs_df |>
     add_chromosome_length(version = version) |>
     add_centromere_locations(version = version) |>
@@ -402,12 +397,12 @@ mark_segs_chromosome_span <- function(
       # trying to be simple and Shih et al. 2023 10.1038/s41586-023-06266-3
       # inspired
       seg_span_event = dplyr::case_when(
-        spans_chrom ~ SEG_CHROM_EVENT_LABELS["whole"], # "whole-chrom",
-        centro_bound & telo_bound & !spans_centro ~ SEG_CHROM_EVENT_LABELS["arm"],
-        !telo_bound & !centro_bound & !spans_chrom & spans_arm ~ SEG_CHROM_EVENT_LABELS["arm"],
-        telo_bound & !centro_bound & !spans_chrom ~ SEG_CHROM_EVENT_LABELS["telo"], # "telo-bound",
-        centro_bound | spans_centro ~ SEG_CHROM_EVENT_LABELS["centro"], # "centro-bound"
-        .default = SEG_CHROM_EVENT_LABELS["inter"]
+        spans_chrom ~ event_labels["whole"], # "whole-chrom",
+        centro_bound & telo_bound & !spans_centro ~ event_labels["arm"],
+        !telo_bound & !centro_bound & !spans_chrom & spans_arm ~ event_labels["arm"],
+        telo_bound & !centro_bound & !spans_chrom ~ event_labels["telo"], # "telo-bound",
+        centro_bound | spans_centro ~ event_labels["centro"], # "centro-bound"
+        .default = event_labels["inter"]
       )
     )
 
@@ -426,7 +421,7 @@ mark_segs_chromosome_span <- function(
         ),
         seg_span_event = dplyr::case_when(
           !(chr %in% acro_chroms) ~ seg_span_event,
-          acro_span >= q_span ~ SEG_CHROM_EVENT_LABELS["whole"],
+          acro_span >= q_span ~ event_labels["whole"],
           .default = seg_span_event
         )
       )
@@ -436,7 +431,7 @@ mark_segs_chromosome_span <- function(
     segs_df,
     seg_span_event = factor(
       seg_span_event,
-      levels = unname(SEG_CHROM_EVENT_LABELS)
+      levels = unname(event_labels)
     )
   )
 
diff --git a/R/cn_features.R b/R/cn_features.R
index 7d2cbd2..9ecd4e9 100644
--- a/R/cn_features.R
+++ b/R/cn_features.R
@@ -217,10 +217,11 @@ extract_sigminer_wang_features <- function(segs_df) {
 }
 
 
-#' Could changes of state relative to ploidy.
+#' Count changes of state relative to ploidy.
 #'
 #' Marks CN segments as a gain or loss, relative to the mode ploidy of the
-#' sample.
+#' sample. Internally using [dlptools::mark_cn_relative_to_ploidy]. See that
+#' function for argument details.
 #'
 #' @param segs_df dataframe. CN segments
 #' @param sample_col string. Name of the column with cell_id/other sample name
@@ -229,29 +230,16 @@ extract_sigminer_wang_features <- function(segs_df) {
 #' @return tibble/dataframe of counts
 #' @export
 extract_ploidy_cn_feature <- function(
-    segs_df,
+    segs_df = NA,
     sample_col = "cell_id",
     annotate_input = FALSE,
     return_matrix = FALSE) {
-  sample_mode_ploidy <- segs_df |>
-    segs_to_reads() |>
-    mode_ploidy(sample_col = sample_col)
-
-  segs_df <- dplyr::left_join(
-    segs_df,
-    sample_mode_ploidy,
-    by = sample_col
+  segs_df <- mark_cn_relative_to_ploidy(
+    in_df = segs_df,
+    df_type = "segs",
+    sample_col = sample_col
   )
 
-  segs_df <- segs_df |>
-    dplyr::mutate(
-      cn_v_ploidy = dplyr::case_when(
-        state < mode_ploidy ~ "ploidy-loss",
-        state > mode_ploidy ~ "ploidy-gain",
-        state == mode_ploidy ~ "ploidy-match"
-      )
-    )
-
   if (annotate_input) {
     return(segs_df)
   }
diff --git a/R/colours.R b/R/colours.R
index b918b9f..98351de 100644
--- a/R/colours.R
+++ b/R/colours.R
@@ -72,6 +72,7 @@ RAW_COPY_COLORS <- circlize::colorRamp2(
   CNV_COLOURS[1:(length(CNV_COLOURS) - 1)]
 )
 
+#' @export
 GEN_PLOT_COLS <- c(
   "#4269D0FF", "#3CA951FF", "#FF725CFF",
   "#EFB118FF", "#A463F2FF",
@@ -79,6 +80,7 @@ GEN_PLOT_COLS <- c(
   "#9498A0FF", "#6CC5B0FF"
 )
 
+#' @export
 BASE_PLOT_THEME <- list(
   ggplot2::theme_minimal() +
     ggplot2::theme(
diff --git a/R/ploidy.R b/R/ploidy.R
index 9334b08..54cf561 100644
--- a/R/ploidy.R
+++ b/R/ploidy.R
@@ -54,6 +54,7 @@ weighted_ploidy <- function(
 #' @param cn_col string column name for copy number states
 #' @param chrom_col string column name for chromosomes
 #' @return tibble/dataframe of results by cell_id/sample
+#' @export
 mode_ploidy <- function(
     bin_df,
     sample_col = "cell_id",
@@ -70,3 +71,62 @@ mode_ploidy <- function(
     ) |>
     dplyr::ungroup()
 }
+
+#' Mark if CN states are gains or losses relative to cell ploidy
+#'
+#' CNs > 2 are not necessarily amplifications, and CNs < 2 are not necessarily
+#' the only states of losses. Ploidy of samples may not be diploid. This
+#' function will give you an idea if the CN you see is a gain or loss relative
+#' to the ploidy of a sample. For example, if the sample has a ploidy of 4,
+#' then a CN of 3 is a loss.
+#'
+#' Ploidy is inferred by mode CN state using [dlptools::mode_ploidy()] and
+#' states > ploidy are marked as gains, states < ploidy losses, and states
+#' matching ploidy as matched.
+#'
+#' @param in_df dataframe of CN states.
+#' @param df_type string. "reads" (default) or "segs" for CN segments, which
+#' will internally converted to bin based for mode calculation.
+#' reads in order to infer mode ploidy.
+#' @param sample_col string. Name of the column with cell_id/other sample name
+#' @return input dataframe, with new columns of information.
+#' @export
+mark_cn_relative_to_ploidy <- function(
+    in_df,
+    df_type = c("reads", "segs"),
+    sample_col = "cell_id",
+    ...) {
+  df_type <- match.arg(df_type)
+
+  event_labels <- c(
+    gain = "ploidy-gain",
+    match = "ploidy-match",
+    loss = "ploidy-loss"
+  )
+
+  if (df_type == "segs") {
+    reads_df <- segs_to_reads(in_df, ...)
+  } else if (df_type == "reads") {
+    reads_df <- in_df
+  }
+
+  sample_mode_ploidy <- mode_ploidy(reads_df, sample_col = sample_col, ...)
+
+  in_df <- dplyr::left_join(
+    in_df,
+    sample_mode_ploidy,
+    by = sample_col
+  )
+
+  in_df <- in_df |>
+    dplyr::mutate(
+      cn_v_ploidy = dplyr::case_when(
+        state < mode_ploidy ~ event_labels["loss"],
+        state > mode_ploidy ~ event_labels["gain"],
+        state == mode_ploidy ~ event_labels["match"]
+      ),
+      cn_v_ploidy = factor(cn_v_ploidy, levels = unname(event_labels))
+    )
+
+  return(in_df)
+}
diff --git a/man/SEG_CHROM_EVENT_LABELS.Rd b/man/SEG_CHROM_EVENT_LABELS.Rd
deleted file mode 100644
index d44bc73..0000000
--- a/man/SEG_CHROM_EVENT_LABELS.Rd
+++ /dev/null
@@ -1,16 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/chromosome_layouts.R
-\docType{data}
-\name{SEG_CHROM_EVENT_LABELS}
-\alias{SEG_CHROM_EVENT_LABELS}
-\title{global of event labels for \code{\link[=mark_segs_chromosome_span]{mark_segs_chromosome_span()}}}
-\format{
-An object of class \code{character} of length 5.
-}
-\usage{
-SEG_CHROM_EVENT_LABELS
-}
-\description{
-Set as a global, as useful in other contexts
-}
-\keyword{datasets}
diff --git a/man/extract_ploidy_cn_feature.Rd b/man/extract_ploidy_cn_feature.Rd
index ba012f7..066d86c 100644
--- a/man/extract_ploidy_cn_feature.Rd
+++ b/man/extract_ploidy_cn_feature.Rd
@@ -2,10 +2,10 @@
 % Please edit documentation in R/cn_features.R
 \name{extract_ploidy_cn_feature}
 \alias{extract_ploidy_cn_feature}
-\title{Could changes of state relative to ploidy.}
+\title{Count changes of state relative to ploidy.}
 \usage{
 extract_ploidy_cn_feature(
-  segs_df,
+  segs_df = NA,
   sample_col = "cell_id",
   annotate_input = FALSE,
   return_matrix = FALSE
@@ -25,5 +25,6 @@ tibble/dataframe of counts
 }
 \description{
 Marks CN segments as a gain or loss, relative to the mode ploidy of the
-sample.
+sample. Internally using \link{mark_cn_relative_to_ploidy}. See that
+function for argument details.
 }
diff --git a/man/mark_cn_relative_to_ploidy.Rd b/man/mark_cn_relative_to_ploidy.Rd
new file mode 100644
index 0000000..357bf3a
--- /dev/null
+++ b/man/mark_cn_relative_to_ploidy.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ploidy.R
+\name{mark_cn_relative_to_ploidy}
+\alias{mark_cn_relative_to_ploidy}
+\title{Mark if CN states are gains or losses relative to cell ploidy}
+\usage{
+mark_cn_relative_to_ploidy(
+  in_df,
+  df_type = c("reads", "segs"),
+  sample_col = "cell_id",
+  ...
+)
+}
+\arguments{
+\item{in_df}{dataframe of CN states.}
+
+\item{df_type}{string. "reads" (default) or "segs" for CN segments, which
+will internally converted to bin based for mode calculation.
+reads in order to infer mode ploidy.}
+
+\item{sample_col}{string. Name of the column with cell_id/other sample name}
+}
+\value{
+input dataframe, with new columns of information.
+}
+\description{
+CNs > 2 are not necessarily amplifications, and CNs < 2 are not necessarily
+the only states of losses. Ploidy of samples may not be diploid. This
+function will give you an idea if the CN you see is a gain or loss relative
+to the ploidy of a sample. For example, if the sample has a ploidy of 4,
+then a CN of 3 is a loss.
+}
+\details{
+Ploidy is inferred by mode CN state using \code{\link[=mode_ploidy]{mode_ploidy()}} and
+states > ploidy are marked as gains, states < ploidy losses, and states
+matching ploidy as matched.
+}
diff --git a/man/mark_segs_chromosome_span.Rd b/man/mark_segs_chromosome_span.Rd
index 4a0a008..e1db366 100644
--- a/man/mark_segs_chromosome_span.Rd
+++ b/man/mark_segs_chromosome_span.Rd
@@ -48,7 +48,7 @@ centromere
 \item arm (arm) - segment spans a whole are (*with conditions)
 \item whole chromosome (whole-chrom) - segment spans the entire chromosome
 (*with conditions)
-\item intersitial (inert) - occuring within the chromosome, not touching the
+\item intersitial (inter) - occuring within the chromosome, not touching the
 centromere, telomeres, and not big enough to be an entire arm.
 }
 
diff --git a/vignettes/feature_extraction.Rmd b/vignettes/feature_extraction.Rmd
index b3ac2b2..cc808d3 100644
--- a/vignettes/feature_extraction.Rmd
+++ b/vignettes/feature_extraction.Rmd
@@ -14,11 +14,14 @@ knitr::opts_chunk$set(
 )
 ```
 
-```{r setup}
+```{r setup, message=FALSE, warning=FALSE}
+library(tidyverse)
 library(dlptools)
 ```
 
-dlptools offers two feature extraction types:
+## Features Defined by Other Publications
+
+dlptools offers two externally defined feature extraction types:
 
 1. Wu et al. https://www.biorxiv.org/content/10.1101/2025.03.02.641098v1
 2. Wang et al. https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1009557
@@ -107,7 +110,7 @@ sig_feats[1:4, 1:4]
 <br>
 <br>
 
-## More Features: ploidy informed CN change
+## ploidy informed CN change
 
 CN change relative to ploidy might be interesting to have a feature. Here we can mark 3 types of events for segments:
 
@@ -128,3 +131,259 @@ dlptools::extract_ploidy_cn_feature(
   dplyr::slice_head(n = 6)
 ```
 
+<br>
+<br>
+<br>
+
+## Segment Span Types
+
+It might be useful to know where segments sit on chromosomes and how much of a chromosome they cover. We can label segments based on 5 categories:
+
+1. telomere bound (`telo-bound`) - segment touches a telomere
+2. centromere bound (`centro-bound`) - segment touches or crosses the centromere
+3. arm (`arm`) - segment spans a whole are (*with conditions)
+4. whole chromosome (`whole-chrom`) - segment spans the entire chromosome (*with conditions)
+5. intersitial (`inter`) - occuring within the chromosome, not touching the centromere, telomeres, and not big enough to be an entire arm.
+
+See `?dlptools::mark_segs_chromosome_span` for more details about this marking, as there are various options to tweak these definitions, such as:
+
+* how much of an arm a CN needs to cover to be considered an "arm-level" event
+* how much of a chromosome a segment needs to cover to be a "whole-chromosome" event
+* how close a seg needs to be to a telo/centromere to be considered "touching"
+
+We can mark a dataframe with the information:
+
+```{r, message=FALSE, warning=FALSE}
+dlptools::mark_segs_chromosome_span(
+  segs_df,
+  # see ?dlptools::mark_segs_chromosome_span for more args
+) |>
+  dplyr::relocate(
+    # key added columns
+    seg_span_event, telo_bound, centro_bound, spans_chrom, spans_arm
+  ) |>
+  dplyr::slice_head(n = 5)
+```
+
+<br>
+<br>
+
+More useful in the "feature" context, is to directly extract and count them up. This function bascially just calls the one above, with counting added:
+
+```{r, message=FALSE, warning=FALSE}
+dlptools::extract_segment_position_feature(
+  segs_df
+  # with option just to annotate the input df
+  # annotate_input = TRUE
+  # or, alternatively, return a count matrix
+  # return_matrix = TRUE
+) |>
+  dplyr::slice_head(n = 6)
+```
+
+
+## Feature Combinations
+
+With some of these feature sets, we can combine them into one monster feature set. Is this a good idea? A bad idea? Is the output even interpretable? Are all ov these even useful? You'll have to decide that.
+
+The feature sets of:
+
+1. `dlptools::extract_wu_features`
+2. `dlptools::extract_ploidy_cn_feature`
+3. `dlptools::extract_segment_position_feature`
+
+are all "atomic", in that they describe aspects of individual segments.
+
+First, we can collect labels all all features for the segments:
+
+```{r, message=FALSE, warning=FALSE}
+feat_anno_segs <- segs_df |>
+  dlptools::extract_wu_features(annotate_input = TRUE) |>
+  dlptools::extract_ploidy_cn_feature(annotate_input = TRUE) |>
+  dlptools::extract_segment_position_feature(annotate_input = TRUE) |>
+  dplyr::select(
+    cell_id, chr, start, end,
+    # these are then the feature columns
+    seg_bin, cn_bin, seg_change, seg_shape, cn_v_ploidy, seg_span_event
+  )
+
+
+dplyr::slice_head(feat_anno_segs, n = 5)
+```
+
+
+Then, we can just count each category and leave the counts per cell per feature as our feature set for signature extraction. This would yield a 21 feature set (6 core feature types with: 5, 3, 3, 2, 3, 5 categories across them)
+
+<!-- Seeing NA in AA/BB for this dataset...track down! -->
+
+
+```{r, fig.width=8, fig.height=4, message=FALSE, warning=FALSE}
+feat_cols <- c(
+  "seg_bin", "cn_bin", "seg_change", "seg_shape", "cn_v_ploidy",
+  "seg_span_event"
+)
+
+total_counts <- purrr::map(feat_cols, \(feat_col) {
+  feat_anno_segs |>
+    dplyr::group_by(cell_id, dplyr::pick(feat_col), .drop = FALSE) |>
+    dplyr::count() |>
+    dplyr::mutate(
+      feature = feat_col
+    ) |>
+    dplyr::rename(category = feat_col) |>
+    dplyr::relocate(cell_id, feature, category)
+}) |>
+  purrr::list_rbind()
+
+
+
+ggplot(
+  dplyr::filter(total_counts, cell_id %in% unique(total_counts$cell_id)
+  [1:4]),
+  aes(x = category, y = n, fill = feature)
+) +
+  geom_col(show.legend = FALSE) +
+  facet_grid(
+    cell_id ~ feature,
+    scales = "free",
+    axes = "all_x",
+    switch = "x"
+  ) +
+  theme_classic() +
+  theme(
+    axis.text.x = element_blank(),
+    axis.ticks.x = element_blank(),
+    strip.placement = "outside"
+  )
+```
+
+
+<br>
+<br>
+
+We could opt to cross all of the features into a "mega-feature-set". But, this would be:
+
+* 4 features (containing 3, 5, 2, 3 categories across them) with from `dlptools::extract_wu_features`
+* 1 feature with 3 features categories from `dlptools::extract_ploidy_cn_feature`
+* 1 feature with 5 feature categories from `dlptools::extract_segment_position_feature`
+
+```{r}
+all_feat_labels <- list(
+  levels(feat_anno_segs$seg_bin),
+  levels(feat_anno_segs$cn_bin),
+  levels(feat_anno_segs$seg_change),
+  levels(feat_anno_segs$seg_shape),
+  levels(feat_anno_segs$cn_v_ploidy),
+  levels(feat_anno_segs$seg_span_event)
+)
+
+crossed_labels <- expand.grid(all_feat_labels)
+
+crossed_labels[1:4, ]
+```
+
+
+```{r}
+nrow(crossed_labels)
+```
+
+
+making for a cross feature set with 1350 possible categories. That might be too detailed, and you should check if most categories are 0 for your data, indicating useless features. 
+
+<br>
+
+Also, probably want to ask if crossing the category actually makes sense and adds biological information. For example, here, you can't even have a small sized (< 5Mb) arm event (smallest arm is >10 Mb), so crossing size and chromosome location doesn't actually lead to fully crossed features or any gain in information (and these two are somewhat redundant). Or, does it matter if the segment shape parameter is a ploidy-gain or a ploidy-loss? Does knowing that distinguish processes or teach you anything biologically? (and that's only two parameters being crossed).
+
+<br>
+
+But, forging ahead bravely:
+
+```{r, fig.width=8, fig.height=4}
+# collect all possible feature labels
+
+crossed_counts <- feat_anno_segs |>
+  dplyr::group_by(
+    # sample ID column
+    cell_id,
+    # feature columns
+    seg_bin, cn_bin, seg_change, seg_shape, cn_v_ploidy, seg_span_event,
+    .drop = FALSE
+  ) |>
+  dplyr::count() |>
+  dplyr::mutate(
+    cross_feat = stringr::str_c(
+      seg_bin, cn_bin, seg_change, seg_shape, cn_v_ploidy, seg_span_event,
+      sep = ":"
+    )
+  )
+
+
+ggplot(
+  dplyr::filter(crossed_counts, cell_id %in% unique(total_counts$cell_id)
+  [1:4]),
+  aes(x = cross_feat, y = n)
+) +
+  geom_col() +
+  facet_grid(cell_id ~ ., axes = "all_x") +
+  theme_classic() +
+  theme(
+    axis.text.x = element_blank(),
+    axis.ticks.x = element_blank(),
+  ) +
+  xlab("Crossed Feature")
+```
+
+<br>
+<br>
+
+Or, you could do some sort of combination of cross and uncrossed? I don't know. This whole signature field is the wild west and people do all kinds of things.
+
+```{r, fig.width=8, fig.height=4, message=FALSE, warning=FALSE}
+# count the Wu features as crossed: 90 feature categories
+wu_crossed_counts <- dlptools::extract_wu_features(segs_df) |>
+  dplyr::select(cell_id, feat_cat, n) |>
+  dplyr::mutate(
+    feature_group = "Wu-crossed"
+  )
+
+# do ploidy_cn_change and segment placement as uncrossed?
+ploidy_counts <- dlptools::extract_ploidy_cn_feature(segs_df) |>
+  dplyr::rename(feat_cat = cn_v_ploidy) |>
+  dplyr::mutate(
+    feature_group = "ploidy"
+  )
+
+
+seg_pos_counts <- dlptools::extract_segment_position_feature(segs_df) |>
+  dplyr::rename(feat_cat = seg_span_event) |>
+  dplyr::mutate(
+    feature_group = "position"
+  )
+
+all_feat_counts <- purrr::list_rbind(list(
+  wu_crossed_counts, ploidy_counts, seg_pos_counts
+)) |>
+  dplyr::arrange(cell_id, feature_group) |>
+  dplyr::mutate(
+    feature_group = factor(feature_group, levels = unique(feature_group))
+  )
+
+ggplot(
+  dplyr::filter(all_feat_counts, cell_id %in% unique(total_counts$cell_id)
+  [1:4]),
+  aes(x = feat_cat, y = n, fill = feature_group)
+) +
+  geom_col(show.legend = FALSE) +
+  facet_grid(cell_id ~ feature_group, axes = "all_x", scales = "free_x", space = "free") +
+  scale_fill_manual(values = dlptools::GEN_PLOT_COLS) +
+  theme_classic() +
+  theme(
+    axis.text.x = element_blank(),
+    axis.ticks.x = element_blank(),
+  ) +
+  xlab("Mixed Features")
+```
+
+<br>
+<br>
+<br>
diff --git a/vignettes/various_functions.Rmd b/vignettes/various_functions.Rmd
index 4d11611..a70f603 100644
--- a/vignettes/various_functions.Rmd
+++ b/vignettes/various_functions.Rmd
@@ -17,8 +17,8 @@ knitr::opts_chunk$set(
 ```
 
 
-```{r}
-library(dplyr)
+```{r, warning=FALSE, message=FALSE, echo=FALSE}
+library(tidyverse)
 library(vroom)
 library(dlptools)
 ```
@@ -52,8 +52,10 @@ vroom::vroom(
 )
 ```
 
+<br>
+<br>
 
-## Marking Centromere Locations
+## Marking Centromere and Telomere Locations
 
 Centromeres are problematic for DLP data, the state calls are often inflated and highly inconsistent between adjacent bins. It's a messy region that we often want to omit. The default masking file (above) pretty much captures these regions, but sometimes we may want to be more specific. 
 
@@ -79,7 +81,7 @@ Alternatively, this can be done in parts:
 ```{r}
 # e.g., just adding the centromere information by chromosome
 dlptools::add_centromere_locations(
-  reads_df = ex_reads,
+  cn_df = ex_reads,
   version = "hg19" # default, don't need to specify. Alternative is "hg38"
 ) |>
   dplyr::select(
@@ -91,6 +93,23 @@ dlptools::add_centromere_locations(
 ```
 
 
+Also available is telomere marking, but see `?dlptools::import_telos_file` for why this isn't the most in-depth or accurate thing:
+
+```{r}
+dlptools::add_telomere_positions(
+  ex_reads
+  # version = "hg19" is the default
+) |>
+  dplyr::relocate(
+    # added columns
+    telostart_p, teloend_p, telostart_q, teloend_q
+  ) |>
+  dplyr::slice_head(n = 5)
+```
+
+<br>
+<br>
+
 ## Information From Cell IDs
 
 This is assuming our standard DLP cell ids, e.g., `AT23998-A138956A-R03-C34`.
@@ -125,6 +144,8 @@ ex_reads |>
   dplyr::slice_sample(n = 5)
 ```
 
+<br>
+<br>
 
 ## Inferring Missing Bin Data
 
@@ -235,6 +256,8 @@ dlptools::load_chrom_info_file(
 ```
 
 
+<br>
+<br>
 
 ## Reads to Segments
 
@@ -251,6 +274,9 @@ ex_segs[1:4, ]
 
 *warning:* this function will leave some unexpected gaps when dataframes have been filtered and bins removed. Inspect carfully if you have dropped bins from your dataframe.
 
+<br>
+<br>
+
 ## Segments to Reads
 
 Split segments back into bins of some size.
@@ -284,6 +310,98 @@ c(filt_reads = nrow(filt_reads), input_reads = nrow(ex_reads))
 ```
 
 <br>
+<br>
+
+## Where Segments Occur
+
+Useful to know is where segments occur relative to chromosome features like the centromere and telomeres.
+
+There are a few key arguments to this function, like setting distances for how close segments need to be to telomeres and centromeres to be consider "bound", and minimum spans of arms/chromosomes. 
+
+Please see `?dlptools::mark_segs_chromosome_span` for more information on how this works:
+
+```{r}
+seg_locations <- dlptools::mark_segs_chromosome_span(
+  ex_segs,
+  # version = "hg19" assumed, but hg38 possible too.
+)
+
+dplyr::relocate(
+  seg_locations,
+  # key added columns, more present too
+  seg_span_event, telo_bound, centro_bound, spans_chrom, spans_arm
+)
+```
+
+
+Visually, this is what we're doing:
+
+```{r, echo = FALSE, fig.width=8, fig.height=6, fig.cap="Labels of CN segments on chromosomes. Vertical lines indicate locations of telomeres and centromeres."}
+chr_attrs <- dplyr::left_join(
+  dlptools::load_ucsc_centromeres(),
+  dlptools::import_telos_file(),
+  by = dplyr::join_by(chrom == chr)
+) |>
+  dplyr::rename(
+    chr = chrom
+  ) |>
+  dplyr::select(chr, centro_start, centro_end, dplyr::contains("telo")) |>
+  tidyr::pivot_longer(cols = -chr) |>
+  dplyr::mutate(
+    plot_name = dplyr::case_when(
+      stringr::str_detect(name, "start") ~ "start",
+      stringr::str_detect(name, "end") ~ "end",
+    ),
+    section_id = dplyr::case_when(
+      stringr::str_detect(name, "centro") ~ "centro",
+      stringr::str_detect(name, "telo.*p") ~ "telo-p",
+      stringr::str_detect(name, "telo.*q") ~ "telo-q"
+    )
+  ) |>
+  dplyr::select(chr, section_id, plot_name, value)
+
+
+targ_chrs <- c(1, 2, 8, 13, 14)
+
+some_cells <- unique(seg_locations$cell_id)[1:5]
+
+seg_locations$chr <- factor(
+  seg_locations$chr,
+  levels = gtools::mixedsort(unique(seg_locations$chr))
+)
+
+ggplot(
+  dplyr::filter(seg_locations, cell_id %in% some_cells & chr %in% targ_chrs)
+) +
+  geom_vline(
+    # add genome features. Hm, maybe just do as vlines
+    data = dplyr::filter(chr_attrs, chr %in% targ_chrs),
+    # aes(xmin=start, xmax=end, ymin=-1, ymax=-2, fill=section_id)
+    aes(xintercept = value),
+    color = "black"
+  ) +
+  geom_segment(
+    aes(x = start, xend = end, y = state, color = seg_span_event),
+    linewidth = 3
+  ) +
+  facet_grid(
+    cell_id ~ chr,
+    scales = "free",
+    space = "free_x",
+    switch = "x",
+    axes = "all_x",
+  ) +
+  theme_classic() +
+  theme(
+    axis.text.x = element_blank(),
+    strip.placement = "outside",
+    strip.background = element_blank()
+  ) +
+  scale_color_manual(values = dlptools::GEN_PLOT_COLS)
+```
+
+
+
 <br>
 <br>
 
@@ -313,7 +431,24 @@ dlptools::mode_ploidy(
   dplyr::slice_head(n = 5)
 ```
 
+<br>
+
+Related, we can also infer if a CN change is a gain or loss, relative to ploidy:
 
+```{r}
+dlptools::mark_cn_relative_to_ploidy(
+  ex_reads, # or can pass segments df
+  df_type = "reads" # df_type = "segs"
+) |>
+  dplyr::relocate(
+    # key added columns
+    mode_ploidy, cn_v_ploidy
+  ) |>
+  dplyr::slice_head(n = 5)
+```
+
+<br>
+<br>
 
 ## Long to Wide Reads (or segments)
 
@@ -359,58 +494,8 @@ dlptools::ASCN_PHASE_COLORS
 # dlptools::BAF_COLORS
 ```
 
-## Data Importing
-
-If you've followed a semi-standard approach for downloading such as this:
-
-```bash
-# bash
-cd where/to/save/my_dlp/
-
-ticket="SC-8382"
-azcopy copy https://singlecellresults.blob.core.windows.net/results/${ticket}/results/annotation/ ${ticket} --recursive
-azcopy copy https://singlecellresults.blob.core.windows.net/results/${ticket}/results/hmmcopy/ ${ticket} --recursive
-
-```
-
-which produces a directory with this sort of structure:
-
-```bash
-├── SC-8382
-│   ├── annotation
-│   │   ├── metrics.csv.gz
-│   │   ├── #[...other files ...]
-│   └── hmmcopy
-│   │   ├── #[...other files ...]
-│       ├── reads.csv.gz
-│       ├── segments.csv.gz
-├── SC-8408
-│   ├── annotation
-│   │   ├── metrics.csv.gz
-│   │   ├── #[...other files ...]
-│   └── hmmcopy
-│   │   ├── #[...other files ...]
-│       ├── reads.csv.gz
-│       ├── segments.csv.gz
-├── SC-8650
-```
-
-then there are some functions that can help with loading data from these into a consistent dataframe:
-
-```{r, eval=FALSE}
-dlp_dir <- "/where/to/save/my_dlp/"
-
-# loading metrics
-metrics <- dlptools::import_dlp_files(dlp_dir, "metrics")
-
-# loading reads
-reads_df <- dlptools::import_dlp_files(dlp_dir, "reads")
-
-# loading segments...but generally this isn't a great idea
-# segs_df <- dlptools::import_dlp_files(dlp_dir, "segments")
-```
-
-
+<br>
+<br>
 
 ## Other
 

From a9d12f7b041056a9596277fb1f76883f70fa6e56 Mon Sep 17 00:00:00 2001
From: benjaminfurman <benjamin.ls.furman@gmail.com>
Date: Thu, 28 Aug 2025 11:38:49 -0700
Subject: [PATCH 6/7] chore: version bump

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 5d978bb..1d2b7ec 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: dlptools
 Title: DLPTools: Handling DLP+ Data
-Version: 0.3.17
+Version: 0.3.18
 Authors@R: 
     person("Ben", "Furman", , "bfurman@bccrc.ca", role = c("aut", "cre"))
 Description: A collections of functions for basic manipulation and plotting of

From 85fb251962603a0d061d9beea28719535529b6ab Mon Sep 17 00:00:00 2001
From: benjaminfurman <benjamin.ls.furman@gmail.com>
Date: Thu, 28 Aug 2025 11:57:12 -0700
Subject: [PATCH 7/7] bugfix: move to direct imports.

---
 vignettes/feature_extraction.Rmd | 44 +++++++++++++++++++-------------
 vignettes/various_functions.Rmd  | 23 ++++++++---------
 2 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/vignettes/feature_extraction.Rmd b/vignettes/feature_extraction.Rmd
index cc808d3..f2cc62f 100644
--- a/vignettes/feature_extraction.Rmd
+++ b/vignettes/feature_extraction.Rmd
@@ -15,7 +15,7 @@ knitr::opts_chunk$set(
 ```
 
 ```{r setup, message=FALSE, warning=FALSE}
-library(tidyverse)
+library(ggplot2)
 library(dlptools)
 ```
 
@@ -217,7 +217,7 @@ Then, we can just count each category and leave the counts per cell per feature
 <!-- Seeing NA in AA/BB for this dataset...track down! -->
 
 
-```{r, fig.width=8, fig.height=4, message=FALSE, warning=FALSE}
+```{r, message=FALSE, warning=FALSE}
 feat_cols <- c(
   "seg_bin", "cn_bin", "seg_change", "seg_shape", "cn_v_ploidy",
   "seg_span_event"
@@ -234,9 +234,9 @@ total_counts <- purrr::map(feat_cols, \(feat_col) {
     dplyr::relocate(cell_id, feature, category)
 }) |>
   purrr::list_rbind()
+```
 
-
-
+```{r, echo=FALSE, fig.width=8, fig.height=4}
 ggplot(
   dplyr::filter(total_counts, cell_id %in% unique(total_counts$cell_id)
   [1:4]),
@@ -257,7 +257,6 @@ ggplot(
   )
 ```
 
-
 <br>
 <br>
 
@@ -298,7 +297,7 @@ Also, probably want to ask if crossing the category actually makes sense and add
 
 But, forging ahead bravely:
 
-```{r, fig.width=8, fig.height=4}
+```{r}
 # collect all possible feature labels
 
 crossed_counts <- feat_anno_segs |>
@@ -316,8 +315,9 @@ crossed_counts <- feat_anno_segs |>
       sep = ":"
     )
   )
+```
 
-
+```{r, echo=FALSE, fig.width=8, fig.height=4}
 ggplot(
   dplyr::filter(crossed_counts, cell_id %in% unique(total_counts$cell_id)
   [1:4]),
@@ -333,12 +333,13 @@ ggplot(
   xlab("Crossed Feature")
 ```
 
+
 <br>
 <br>
 
 Or, you could do some sort of combination of cross and uncrossed? I don't know. This whole signature field is the wild west and people do all kinds of things.
 
-```{r, fig.width=8, fig.height=4, message=FALSE, warning=FALSE}
+```{r, message=FALSE, warning=FALSE}
 # count the Wu features as crossed: 90 feature categories
 wu_crossed_counts <- dlptools::extract_wu_features(segs_df) |>
   dplyr::select(cell_id, feat_cat, n) |>
@@ -367,23 +368,30 @@ all_feat_counts <- purrr::list_rbind(list(
   dplyr::mutate(
     feature_group = factor(feature_group, levels = unique(feature_group))
   )
+```
 
-ggplot(
+
+```{r, echo=FALSE, fig.width=8, fig.height=4}
+ggplot2::ggplot(
   dplyr::filter(all_feat_counts, cell_id %in% unique(total_counts$cell_id)
   [1:4]),
-  aes(x = feat_cat, y = n, fill = feature_group)
+  ggplot2::aes(x = feat_cat, y = n, fill = feature_group)
 ) +
-  geom_col(show.legend = FALSE) +
-  facet_grid(cell_id ~ feature_group, axes = "all_x", scales = "free_x", space = "free") +
-  scale_fill_manual(values = dlptools::GEN_PLOT_COLS) +
-  theme_classic() +
-  theme(
-    axis.text.x = element_blank(),
-    axis.ticks.x = element_blank(),
+  ggplot2::geom_col(show.legend = FALSE) +
+  ggplot2::facet_grid(
+    cell_id ~ feature_group,
+    axes = "all_x", scales = "free_x", space = "free"
   ) +
-  xlab("Mixed Features")
+  ggplot2::scale_fill_manual(values = dlptools::GEN_PLOT_COLS) +
+  ggplot2::theme_classic() +
+  ggplot2::theme(
+    axis.text.x = ggplot2::element_blank(),
+    axis.ticks.x = ggplot2::element_blank(),
+  ) +
+  ggplot2::xlab("Mixed Features")
 ```
 
+
 <br>
 <br>
 <br>
diff --git a/vignettes/various_functions.Rmd b/vignettes/various_functions.Rmd
index a70f603..b253dba 100644
--- a/vignettes/various_functions.Rmd
+++ b/vignettes/various_functions.Rmd
@@ -18,7 +18,6 @@ knitr::opts_chunk$set(
 
 
 ```{r, warning=FALSE, message=FALSE, echo=FALSE}
-library(tidyverse)
 library(vroom)
 library(dlptools)
 ```
@@ -370,34 +369,34 @@ seg_locations$chr <- factor(
   levels = gtools::mixedsort(unique(seg_locations$chr))
 )
 
-ggplot(
+ggplot2::ggplot(
   dplyr::filter(seg_locations, cell_id %in% some_cells & chr %in% targ_chrs)
 ) +
-  geom_vline(
+  ggplot2::geom_vline(
     # add genome features. Hm, maybe just do as vlines
     data = dplyr::filter(chr_attrs, chr %in% targ_chrs),
     # aes(xmin=start, xmax=end, ymin=-1, ymax=-2, fill=section_id)
-    aes(xintercept = value),
+    ggplot2::aes(xintercept = value),
     color = "black"
   ) +
-  geom_segment(
-    aes(x = start, xend = end, y = state, color = seg_span_event),
+  ggplot2::geom_segment(
+    ggplot2::aes(x = start, xend = end, y = state, color = seg_span_event),
     linewidth = 3
   ) +
-  facet_grid(
+  ggplot2::facet_grid(
     cell_id ~ chr,
     scales = "free",
     space = "free_x",
     switch = "x",
     axes = "all_x",
   ) +
-  theme_classic() +
-  theme(
-    axis.text.x = element_blank(),
+  ggplot2::theme_classic() +
+  ggplot2::theme(
+    axis.text.x = ggplot2::element_blank(),
     strip.placement = "outside",
-    strip.background = element_blank()
+    strip.background = ggplot2::element_blank()
   ) +
-  scale_color_manual(values = dlptools::GEN_PLOT_COLS)
+  ggplot2::scale_color_manual(values = dlptools::GEN_PLOT_COLS)
 ```