Skip to content

Instantly share code, notes, and snippets.

@agricolamz
Created December 3, 2024 22:27
Show Gist options
  • Save agricolamz/a2b8e847af9f4c2ef4be2efced17c687 to your computer and use it in GitHub Desktop.
Save agricolamz/a2b8e847af9f4c2ef4be2efced17c687 to your computer and use it in GitHub Desktop.
library(tidyverse)
df <- read_csv("https://raw.githubusercontent.com/LingConLab/rutul_dialectology/master/data/database.csv")
df |>
select(feature_title, feature_lexeme, value, settlement, value) |>
filter(!is.na(value),
value != "NO DATA",
value != "OTHER",
value != "\\?",
!(settlement %in% c("Tsudik", "Borch"))) |>
mutate(value = str_split(value, ";")) |>
unnest_longer(value) |>
mutate(value = str_squish(value)) |>
group_by(feature_title, feature_lexeme, settlement) |>
ungroup() |>
arrange(feature_title, feature_lexeme, settlement) |>
distinct(settlement, value, feature_title, feature_lexeme) |>
mutate(feature_lexeme = ifelse(is.na(feature_lexeme), "", feature_lexeme),
merged_value = str_c(feature_title, feature_lexeme, value)) |>
arrange(settlement) |>
select(settlement, merged_value) |>
group_by(merged_value) |>
mutate(value = 1,
shared_with_n_villages = sum(value)) |>
ungroup() |>
count(settlement, shared_with_n_villages) |>
filter(shared_with_n_villages < 12) |>
group_by(settlement) |>
mutate(cumsum = cumsum(n),
median = max(cumsum)/2,
more_then_median = if_else(cumsum >= median, "less or equal", "more")) |>
ggplot(aes(shared_with_n_villages, settlement, label = n))+
geom_tile(aes(fill = n), colour = "white") +
geom_text(aes(label = n, color = more_then_median), show.legend = FALSE) +
scale_fill_gradient(low = "lightblue", high = "navy")+
coord_fixed()+
labs(x = NULL, y = NULL)+
scale_x_continuous(breaks = 1:11)+
theme_minimal()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment