Skip to content

Instantly share code, notes, and snippets.

View dBenedek's full-sized avatar

Benedek Dankó dBenedek

View GitHub Profile
@dBenedek
dBenedek / my_pdf_splitter.sh
Created March 10, 2024 19:48
Split PDF to multiple PDFs by a user-specifid page range
#!/bin/bash
###############################################################################
# Split PDF to the preferred PDFs by the specified pages #
###############################################################################
# Benedek Danko
# Variables:
input_file="$1"
@dBenedek
dBenedek / plot_km_survival.R
Created June 27, 2022 12:54
[KM plot] Plotting a KM survival plot #km #plot #survival
library("tidyverse")
library("survival")
library("survminer")
# Fit model on survival data:
fit01 <- survfit(Surv(time=Time_survival,
event=Status_survival) ~ as.factor(group),
data = clin_dat)
fit01_cph <- coxph(Surv(time=Time_survival,
@dBenedek
dBenedek / pheatmap_heatmap.R
Created June 27, 2022 09:27
[Create heatmap] Create heatmap with the pheatmap package #pheatmap #heatmap #plot
library("tidyverse")
library("pheatmap")
library("fgsea")
# Load gene set data:
kegg_gs <- gmtPathways("/home/rstudio/san1/BD/datasets/kegg_metabolic_human_20211026.gmt")
pathways <- data.frame(name=names(kegg_gs)) %>%
mutate(category=str_replace(as.character(name), "^\\d+\\s", ""),
category=str_replace(as.character(category), "\\s-\\s\\d+.+", "")) %>%
@dBenedek
dBenedek / run_gsva.R
Created June 27, 2022 09:21
[Run GSVA] Run GSVA on gene sets #GSVA #enrichment
library("GSVA")
library("fgsea")
library("tidyverse")
# Perform DESeq2 normalization on count data:
dds <- DESeqDataSetFromMatrix(countData = as.matrix(round(counts)),
colData = col_dat,
design = ~ 1)
dds <- estimateSizeFactors(dds)
@dBenedek
dBenedek / DE_DESeq2.R
Created June 27, 2022 09:14
[DE DESeq2] Basic differential expression with DESeq2 #DE #DESeq2 #R
library("tidyverse")
library("DESeq2")
# Create DESeq2 object:
dds <- DESeqDataSetFromMatrix(count_data,
colData = col_data,
design = ~ group)
# Run DE:
dds <- DESeq(dds)
@dBenedek
dBenedek / nonparametric_hypothesis_tests.py
Created July 13, 2021 11:51
[Nonparametric statistical hypothesis tests in Python] #python #nonparametric #hypothesis #tests #statistics
# Example of the Mann-Whitney U Test
from scipy.stats import mannwhitneyu
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
stat, p = mannwhitneyu(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably the same distribution')
else:
print('Probably different distributions')
@dBenedek
dBenedek / parametric_hypothesis_tests.py
Created July 13, 2021 11:48
[Parametric statistical hypothesis tests in Python] #python #parametric #hypothesis #tests #statistics
# Example of the Student's t-test
from scipy.stats import ttest_ind
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
stat, p = ttest_ind(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably the same distribution')
else:
print('Probably different distributions')
@dBenedek
dBenedek / stationary_tests.py
Created July 13, 2021 11:46
[Stationary tests in Python] #python #tests #stationary #statistics
# Example of the Augmented Dickey-Fuller unit root test
from statsmodels.tsa.stattools import adfuller
data = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
stat, p, lags, obs, crit, t = adfuller(data)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably not Stationary')
else:
print('Probably Stationary')
@dBenedek
dBenedek / correlation_tests.py
Created July 13, 2021 11:45
[Correlation tests in Python] #python #tests #correlation #statistics
# Example of the Pearson's Correlation test
from scipy.stats import pearsonr
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [0.353, 3.517, 0.125, -7.545, -0.555, -1.536, 3.350, -1.578, -3.537, -1.579]
stat, p = pearsonr(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably independent')
else:
print('Probably dependent')
@dBenedek
dBenedek / normality_tests.py
Created July 13, 2021 11:43
[Normality tests in Python] #python #statistics #tests #normality #scipy
# Example of the Shapiro-Wilk Normality Test
from scipy.stats import shapiro
data = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
stat, p = shapiro(data)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably Gaussian')
else:
print('Probably not Gaussian')