Last active
May 11, 2024 11:05
-
-
Save mikelove/d9cdc90dc4fae7c47112e939260cee4e to your computer and use it in GitHub Desktop.
Example code for joining two range sets where second one is also attached to an SE
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(SummarizedExperiment) | |
library(plyranges) | |
# example data | |
m <- matrix(rnorm(600), nrow=100) | |
r1 <- data.frame(seqnames=1, start=1:50 * 100 + 2501, | |
width=5, id1=paste0("u",formatC(1:50,width=3,flag="0"))) |> | |
as_granges() | |
r2 <- data.frame(seqnames=1, start=1:100 * 100 + 1, | |
width=5, id2=paste0("v",formatC(1:100,width=3,flag="0"))) |> | |
as_granges() | |
rownames(m) <- r2$id2 | |
colnames(m) <- paste0("sample",1:6) | |
# this matrix is associated wit r2 (and regions id2) | |
se <- SummarizedExperiment(list(counts=m), | |
rowRanges=r2, | |
colData=DataFrame(z=1:6)) | |
# the function should do someting like this: | |
# join based on ranges | |
olap <- r1 |> | |
join_overlap_inner(r2, maxgap=100) | |
# expand the SE | |
olap_se <- se[olap$id2,] # contains duplicate rows sometimes | |
rowData(olap_se)$id1 <- olap$id1 # add IDs from the query ranges | |
rowData(olap_se)$combined <- with(rowData(olap_se), paste(id1, id2, sep="-")) | |
library(dplyr) | |
# compute something, e.g. pick the range with top variance in the assay data | |
idx <- olap_se |> | |
rowData() |> | |
as_tibble() |> | |
mutate(variance = rowVars(assay(olap_se))) |> | |
group_by(id1) |> | |
slice(which.max(variance)) |> | |
ungroup() |> | |
arrange(id1) | |
# subset to an SE that now matches the size of query ranges | |
final_se <- olap_se[ match(idx$combined, rowData(olap_se)$combined), ] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(SummarizedExperiment) | |
library(plyranges) | |
# example data | |
m <- matrix(rnorm(600), nrow=100) | |
r1 <- data.frame(seqnames=1, start=1:50 * 100 + 2501, | |
width=5, id1=paste0("u",formatC(1:50,width=3,flag="0"))) |> | |
as_granges() | |
r2 <- data.frame(seqnames=1, start=1:100 * 100 + 1, | |
width=5, id2=paste0("v",formatC(1:100,width=3,flag="0"))) |> | |
as_granges() | |
rownames(m) <- r2$id2 | |
colnames(m) <- paste0("sample",1:6) | |
# this matrix is associated wit r2 (and regions id2) | |
se <- SummarizedExperiment(list(counts=m), | |
rowRanges=r2, | |
colData=DataFrame(z=1:6)) | |
# the function should do someting like this: | |
# join based on ranges | |
olap <- r1 |> | |
join_overlap_inner(r2, maxgap=100) | |
# expand the SE | |
olap_se <- se[olap$id2,] # contains duplicate rows sometimes | |
rowData(olap_se)$id1 <- olap$id1 # add IDs from the query ranges | |
rowData(olap_se)$combined <- with(rowData(olap_se), paste(id1, id2, sep="-")) | |
library(dplyr) | |
library(tidyr) | |
library(purrr) | |
rowData(olap_se) |> | |
as_tibble() |> | |
select(id1) |> | |
bind_cols(assay(olap_se)) |> | |
nest(data = -id1) |> | |
mutate(row_ave = map(data, \(mat) { | |
colMeans(mat) | |
})) |> | |
pull(row_ave) |> | |
bind_rows() |> | |
as.matrix() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment