mikelove · May 11, 2024 11:05
diff --git a/join_se.R b/join_se.R
 library(SummarizedExperiment)
 library(plyranges)

 # example data
 m <- matrix(rnorm(600), nrow=100)
 r1 <- data.frame(seqnames=1, start=1:50 * 100 + 2501,
                 width=5, id1=paste0("u",formatC(1:50,width=3,flag="0"))) |>
  as_granges()
 r2 <- data.frame(seqnames=1, start=1:100 * 100 + 1,
                 width=5, id2=paste0("v",formatC(1:100,width=3,flag="0"))) |>
  as_granges()
 rownames(m) <- r2$id2
 colnames(m) <- paste0("sample",1:6)
 # this matrix is associated wit r2 (and regions id2)
 se <- SummarizedExperiment(list(counts=m),
                           rowRanges=r2,
                           colData=DataFrame(z=1:6))

 # the function should do someting like this:

 # join based on ranges
 olap <- r1 |>
  join_overlap_inner(r2, maxgap=100)

 # expand the SE
 olap_se <- se[olap$id2,] # contains duplicate rows sometimes
 rowData(olap_se)$id1 <- olap$id1 # add IDs from the query ranges
 rowData(olap_se)$combined <- with(rowData(olap_se), paste(id1, id2, sep="-"))

 library(dplyr)
 # compute something, e.g. pick the range with top variance in the assay data
 idx <- olap_se |>
  rowData() |>
  as_tibble() |>
  mutate(variance = rowVars(assay(olap_se))) |>
  group_by(id1) |>  
  slice(which.max(variance)) |>
  ungroup() |>
  arrange(id1)

 # subset to an SE that now matches the size of query ranges
 final_se <- olap_se[ match(idx$combined, rowData(olap_se)$combined), ]
diff --git a/join_se_rows.R b/join_se_rows.R
 library(SummarizedExperiment)
 library(plyranges)

 # example data
 m <- matrix(rnorm(600), nrow=100)
 r1 <- data.frame(seqnames=1, start=1:50 * 100 + 2501,
                 width=5, id1=paste0("u",formatC(1:50,width=3,flag="0"))) |>
  as_granges()
 r2 <- data.frame(seqnames=1, start=1:100 * 100 + 1,
                 width=5, id2=paste0("v",formatC(1:100,width=3,flag="0"))) |>
  as_granges()
 rownames(m) <- r2$id2
 colnames(m) <- paste0("sample",1:6)
 # this matrix is associated wit r2 (and regions id2)
 se <- SummarizedExperiment(list(counts=m),
                           rowRanges=r2,
                           colData=DataFrame(z=1:6))

 # the function should do someting like this:

 # join based on ranges
 olap <- r1 |>
  join_overlap_inner(r2, maxgap=100)

 # expand the SE
 olap_se <- se[olap$id2,] # contains duplicate rows sometimes
 rowData(olap_se)$id1 <- olap$id1 # add IDs from the query ranges
 rowData(olap_se)$combined <- with(rowData(olap_se), paste(id1, id2, sep="-"))

 library(dplyr)
 library(tidyr)
 library(purrr)

 rowData(olap_se) |>
  as_tibble() |>
  select(id1) |>
  bind_cols(assay(olap_se)) |>
  nest(data = -id1) |>
  mutate(row_ave = map(data, \(mat) {
    colMeans(mat)
  })) |>
  pull(row_ave) |>
  bind_rows() |>
  as.matrix()
	library(SummarizedExperiment)
	library(plyranges)

	# example data
	m <- matrix(rnorm(600), nrow=100)
	r1 <- data.frame(seqnames=1, start=1:50 * 100 + 2501,
	width=5, id1=paste0("u",formatC(1:50,width=3,flag="0"))) \|>
	as_granges()
	r2 <- data.frame(seqnames=1, start=1:100 * 100 + 1,
	width=5, id2=paste0("v",formatC(1:100,width=3,flag="0"))) \|>
	as_granges()
	rownames(m) <- r2$id2
	colnames(m) <- paste0("sample",1:6)
	# this matrix is associated wit r2 (and regions id2)
	se <- SummarizedExperiment(list(counts=m),
	rowRanges=r2,
	colData=DataFrame(z=1:6))

	# the function should do someting like this:

	# join based on ranges
	olap <- r1 \|>
	join_overlap_inner(r2, maxgap=100)

	# expand the SE
	olap_se <- se[olap$id2,] # contains duplicate rows sometimes
	rowData(olap_se)$id1 <- olap$id1 # add IDs from the query ranges
	rowData(olap_se)$combined <- with(rowData(olap_se), paste(id1, id2, sep="-"))

	library(dplyr)
	# compute something, e.g. pick the range with top variance in the assay data
	idx <- olap_se \|>
	rowData() \|>
	as_tibble() \|>
	mutate(variance = rowVars(assay(olap_se))) \|>
	group_by(id1) \|>
	slice(which.max(variance)) \|>
	ungroup() \|>
	arrange(id1)

	# subset to an SE that now matches the size of query ranges
	final_se <- olap_se[ match(idx$combined, rowData(olap_se)$combined), ]