Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: dlptools
Title: DLPTools: Handling DLP+ Data
Version: 0.3.18
Version: 0.3.20
Authors@R:
person("Ben", "Furman", , "bfurman@bccrc.ca", role = c("aut", "cre"))
Description: A collections of functions for basic manipulation and plotting of
Expand Down
9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,20 @@ export(chip_plot)
export(compute_tip_sibling_distances)
export(convert_dists_to_pairwise)
export(convert_long_reads_to_wide)
export(count_oscillations)
export(create_chrom_arm_intervals)
export(create_chrom_window_intervals)
export(create_expected_bins)
export(cust_mode)
export(expand_length_to_bins)
export(extract_bp_per_arm)
export(extract_bp_per_window)
export(extract_breakpoints)
export(extract_changepoint)
export(extract_oscillations)
export(extract_ploidy_cn_feature)
export(extract_segment_position_feature)
export(extract_segment_sizes)
export(extract_sigminer_wang_features)
export(extract_wu_features)
export(factor_column_mixedsort)
Expand Down
64 changes: 63 additions & 1 deletion R/chromosome_layouts.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
) |>
dplyr::mutate(
chr = stringr::str_replace(chr, "chr", "")
)
) |>
dplyr::select(-misc)

return(chrom_info)
}
Expand All @@ -33,7 +34,7 @@
#' @param cn_df dataframe of copy number information
#' @param version string. hg19 (default) or hg38
#' @param chrom_col string. name of column with chromosome information.
add_chromosome_length <- function(cn_df, version = c("hg19", "hg38"), chrom_col = "chr") {

Check warning on line 37 in R/chromosome_layouts.R

View workflow job for this annotation

GitHub Actions / lint

file=R/chromosome_layouts.R,line=37,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 90 characters.
chrom_info <- load_chrom_info_file(version = version)

if (is_chr_used_in_chroms(cn_df[[chrom_col]])) {
Expand Down Expand Up @@ -95,7 +96,7 @@
#' @return input table, but with a boolean 'within_centro' column added (and
#' potentially other centromere information columns, if needed)
#' @export
mark_bins_overlapping_centromeres <- function(

Check warning on line 99 in R/chromosome_layouts.R

View workflow job for this annotation

GitHub Actions / lint

file=R/chromosome_layouts.R,line=99,col=1,[object_length_linter] Variable and function names should not be longer than 30 characters.
reads_df,
padding = 0,
bin_start_col = "start",
Expand Down Expand Up @@ -221,11 +222,11 @@

# col names by inspection and checking against here by looking up gap table
# data format description
# https://genome.ucsc.edu/cgi-bin/hgTables?hgsid=2018561924_ZAtdZC9CFEJw8BKiYeUjd2ImvhS7&clade=mammal&org=Human&db=hg19&hgta_group=allTables&hgta_track=hg19&hgta_table=gap&hgta_regionType=genome&position=chr7%3A155%2C592%2C223-155%2C605%2C565&hgta_outputType=primaryTable&hgta_outFileName=

Check warning on line 225 in R/chromosome_layouts.R

View workflow job for this annotation

GitHub Actions / lint

file=R/chromosome_layouts.R,line=225,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 291 characters.
telos <- vroom::vroom(
telos_file,
col_names = c(
"bin", "chrom", "telostart", "teloend", "ix", "n", "size", "feat_type", "bridge"

Check warning on line 229 in R/chromosome_layouts.R

View workflow job for this annotation

GitHub Actions / lint

file=R/chromosome_layouts.R,line=229,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 86 characters.
)
) |>
dplyr::filter(feat_type == "telomere") |>
Expand Down Expand Up @@ -338,7 +339,7 @@
#' @export
mark_segs_chromosome_span <- function(
segs_df,
min_bound_distance = 5e5, # given scale of DLP, this should probably be 1 bin width, at least

Check warning on line 342 in R/chromosome_layouts.R

View workflow job for this annotation

GitHub Actions / lint

file=R/chromosome_layouts.R,line=342,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 97 characters.
min_span_of_chrom = 0.9,
min_span_of_arm = 0.9,
version = c("hg19", "hg38"),
Expand Down Expand Up @@ -399,7 +400,7 @@
seg_span_event = dplyr::case_when(
spans_chrom ~ event_labels["whole"], # "whole-chrom",
centro_bound & telo_bound & !spans_centro ~ event_labels["arm"],
!telo_bound & !centro_bound & !spans_chrom & spans_arm ~ event_labels["arm"],

Check warning on line 403 in R/chromosome_layouts.R

View workflow job for this annotation

GitHub Actions / lint

file=R/chromosome_layouts.R,line=403,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 85 characters.
telo_bound & !centro_bound & !spans_chrom ~ event_labels["telo"], # "telo-bound",
centro_bound | spans_centro ~ event_labels["centro"], # "centro-bound"
.default = event_labels["inter"]
Expand Down Expand Up @@ -437,3 +438,64 @@

return(segs_df)
}

#' break a chromosome up into intervals of a defined window size
#'
#' @param window_size integer. The size of window to split the chromosome into.
#' @param genome_version string. "hg19" (default) or "hg38"
#' @return list. Named by chromosome, vectors of window starts.
#' @export
create_chrom_window_intervals <- function(
window_size,
genome_version = c("hg19", "hg38")) {
genome_version <- match.arg(genome_version)

chr_info <- suppressWarnings(
chr_info <- load_chrom_info_file(version = genome_version)
)
intervals <- purrr::map(chr_info$total_length, \(total_length) {
max_end <- total_length + window_size
intervals <- seq(1, max_end, window_size)
intervals
})
names(intervals) <- chr_info$chr
return(intervals)
}

#' create a list of intervals spanning chromosome arms
#'
#' Splits a chromosome at the middle of the centromere. Sets up intervals for
#' splitting each chromosome arm.
#'
#' @param genome_version string. "hg19" (default) or "hg38"
#' @return list. Named by chromosome, vectors of how to break a chromsome into
#' intervals of arms.
#' @export
create_chrom_arm_intervals <- function(genome_version = c("hg19", "hg38")) {
chrom_layouts <- suppressWarnings(
load_ucsc_centromeres(version = genome_version)
)
chrom_lengths <- suppressWarnings(
load_chrom_info_file(version = genome_version)
)

chrom_info <- dplyr::left_join(
chrom_lengths,
chrom_layouts,
by = dplyr::join_by("chr" == "chrom")
)

intervals <- purrr::pmap(
dplyr::select(
chrom_info,
total_length, centro_start, centro_end
),
\(total_length, centro_start, centro_end) {
middle_centro <- centro_start + round(((centro_end - centro_start) / 2))
c(1, middle_centro, total_length + 1)
}
)
names(intervals) <- chrom_info$chr

return(intervals)
}
Loading
Loading