Skip to content

Instantly share code, notes, and snippets.

@agricolamz
Last active August 27, 2025 11:49
Show Gist options
  • Save agricolamz/96a610fd3e029350b2f814c13f92823d to your computer and use it in GitHub Desktop.
Save agricolamz/96a610fd3e029350b2f814c13f92823d to your computer and use it in GitHub Desktop.
# This code is licensed under the terms of the MIT license
# Author: George Moroz
# Date: 2025-08-14
# Update: 2025-08-27
# see the oficial docs: https://ruscorpora.github.io/public-api/
library(tidyverse)
my_token <- "put your token here"
lemma_for_search <- "накачать"
library(httr2)
# check authentication ----------------------------------------------------
request("https://ruscorpora.ru/api/v1/auth/check-authenticated/") |>
req_headers("Authorization" = str_glue("Bearer {my_token}")) |>
req_perform()
# <httr2_response>
# GET https://ruscorpora.ru/api/v1/auth/check-authenticated/
# Status: 200 OK
# Content-Type: application/json
# Body: In memory (4 bytes)
# PORTRAIT_WORD_INFO ------------------------------------------------------
request("https://ruscorpora.ru/api/v1/word-portrait/") |>
req_headers("Authorization" = str_glue("Bearer {my_token}")) |>
req_headers("accept" = "application/json") |>
req_headers("Content-Type" = "application/json") |>
req_body_json(list(lemma = lemma_for_search,
corpus = list(type = "MAIN"),
resultType = list("PORTRAIT_WORD_INFO"))) |>
req_perform() |>
resp_body_json() ->
result
result$propsData$items |>
map(function(i){
if(length(i$parsingFields) == 4){
tibble(text = paste0(unlist(i$parsingFields[[1]]$value), collapse = "; "),
lex = paste0(unlist(i$parsingFields[[2]]$value), collapse = "; "),
gr = paste0(unlist(i$parsingFields[[3]]$value), collapse = "; "),
sem = paste0(unlist(i$parsingFields[[4]]$value), collapse = "; "))
} else if(length(i$parsingFields) == 3){
tibble(text = paste0(unlist(i$parsingFields[[1]]$value), collapse = "; "),
lex = paste0(unlist(i$parsingFields[[2]]$value), collapse = "; "),
gr = paste0(unlist(i$parsingFields[[3]]$value), collapse = "; "))
}
}) |>
list_rbind()
# PORTRAIT_CONCORDANCE ----------------------------------------------------
request("https://ruscorpora.ru/api/v1/word-portrait/") |>
req_headers("Authorization" = str_glue("Bearer {my_token}")) |>
req_headers("accept" = "application/json") |>
req_headers("Content-Type" = "application/json") |>
req_body_json(list(lemma = lemma_for_search,
corpus = list(type = "MAIN"),
resultType = list("PORTRAIT_CONCORDANCE"))) |>
req_perform() |>
resp_body_json() ->
result
result$concordanceData$groups |>
seq_along() |>
map(function(j){
tibble(field = result$concordanceData$groups[[j]]$docs[[1]]$info$docExplainInfo$items[[1]]$parsingFields |>
map_chr("name"),
value = result$concordanceData$groups[[j]]$docs[[1]]$info$docExplainInfo$items[[1]]$parsingFields |>
map("value") |>
unlist()) |>
pivot_wider(names_from = "field", values_from = "value") |>
mutate(title = result$concordanceData$groups[[j]]$docs[[1]]$info$title,
language = result$concordanceData$groups[[j]]$docs[[1]]$snippetGroups[[1]]$snippets[[1]]$langInfo$lang,
text = result$concordanceData$groups[[j]]$docs[[1]]$snippetGroups[[1]]$snippets[[1]]$sequences[[1]]$words |>
map_chr("text") |>
str_c(collapse = "") |>
str_squish())
}) |>
list_rbind() |>
mutate(lemma = lemma_for_search) |>
relocate(title, .before = 1) |>
relocate(text, .before = 1) |>
relocate(lemma, .before = 1)
# PORTRAIT_STATS ----------------------------------------------------
request("https://ruscorpora.ru/api/v1/word-portrait/") |>
req_headers("Authorization" = str_glue("Bearer {my_token}")) |>
req_headers("accept" = "application/json") |>
req_headers("Content-Type" = "application/json") |>
req_body_json(list(lemma = lemma_for_search,
corpus = list(type = "MAIN"),
resultType = list("PORTRAIT_STATS"))) |>
req_perform() |>
resp_body_json() ->
result
result$statsData$fieldStats |>
seq_along() |>
map(function(i){
result$statsData$fieldStats[[i]]$values |>
seq_along() |>
map(function(j){
tibble(value = result$statsData$fieldStats[[i]]$values[[j]]$key$valString$v,
count = result$statsData$fieldStats[[i]]$values[[j]]$count,
docCount = result$statsData$fieldStats[[i]]$values[[j]]$docCount,
totalCount = result$statsData$fieldStats[[i]]$values[[j]]$totalCount,
totalDocCount = result$statsData$fieldStats[[i]]$values[[j]]$totalDocCount)
}) |>
list_rbind() |>
mutate(field = result$statsData$fieldStats[[i]]$field) |>
relocate(field, .before = 1)
}) |>
list_rbind()
# PORTRAIT_SKETCH ----------------------------------------------------
request("https://ruscorpora.ru/api/v1/word-portrait/") |>
req_headers("Authorization" = str_glue("Bearer {my_token}")) |>
req_headers("accept" = "application/json") |>
req_headers("Content-Type" = "application/json") |>
req_body_json(list(lemma = lemma_for_search,
corpus = list(type = "MAIN"),
resultType = list("PORTRAIT_SKETCH"))) |>
req_perform() |>
resp_body_json() ->
result
result$sketchData$collocates |>
seq_along() |>
map(function(i){
result$sketchData$collocates[[i]]$collocations |>
seq_along() |>
map(function(j){
tibble(collocate = result$sketchData$collocates[[i]]$collocations[[j]]$collocate$valString$v,
dice = result$sketchData$collocates[[i]]$collocations[[j]]$metrics[[1]]$value)
}) |>
list_rbind() |>
mutate(syntactic_relation = result$sketchData$collocates[[i]][["sketchSynRelation"]])
}) |>
list_rbind() |>
mutate(lemma = lemma_for_search)
# PORTRAIT_FREQUENCY ----------------------------------------------------
request("https://ruscorpora.ru/api/v1/word-portrait/") |>
req_headers("Authorization" = str_glue("Bearer {my_token}")) |>
req_headers("accept" = "application/json") |>
req_headers("Content-Type" = "application/json") |>
req_body_json(list(lemma = lemma_for_search,
corpus = list(type = "MAIN"),
resultType = list("PORTRAIT_FREQUENCY"))) |>
req_perform() |>
resp_body_json() ->
result
result$frequencyData$ipm
# PORTRAIT_SIMILAR ----------------------------------------------------
request("https://ruscorpora.ru/api/v1/word-portrait/") |>
req_headers("Authorization" = str_glue("Bearer {my_token}")) |>
req_headers("accept" = "application/json") |>
req_headers("Content-Type" = "application/json") |>
req_body_json(list(lemma = lemma_for_search,
corpus = list(type = "MAIN"),
resultType = list("PORTRAIT_SIMILAR"))) |>
req_perform() |>
resp_body_json() ->
result
tibble(word = result$similarData[[1]]$values |> map_chr("word"),
metics = result$similarData[[1]]$values |> map_dbl("weight"))
# PORTRAIT_MORPHEME ----------------------------------------------------
request("https://ruscorpora.ru/api/v1/word-portrait/") |>
req_headers("Authorization" = str_glue("Bearer {my_token}")) |>
req_headers("accept" = "application/json") |>
req_headers("Content-Type" = "application/json") |>
req_body_json(list(lemma = lemma_for_search,
corpus = list(type = "MAIN"),
resultType = list("PORTRAIT_MORPHEME"))) |>
req_perform() |>
resp_body_json() ->
result
tibble(glossed = result$morphemeData$morphemes |> map_chr("value"),
morph_type = result$morphemeData$morphemes |> map_chr("type"))
# PORTRAIT_WORDFORMS ----------------------------------------------------
request("https://ruscorpora.ru/api/v1/word-portrait/") |>
req_headers("Authorization" = str_glue("Bearer {my_token}")) |>
req_headers("accept" = "application/json") |>
req_headers("Content-Type" = "application/json") |>
req_body_json(list(lemma = lemma_for_search,
corpus = list(type = "MAIN"),
resultType = list("PORTRAIT_WORDFORMS"))) |>
req_perform() |>
resp_body_json() ->
result
result$wordformsData$values |>
seq_along() |>
map(function(i){
tibble(case = result$wordformsData$values[[i]]$rowLabel$v,
number = result$wordformsData$values[[i]]$columnLabel$v,
form = result$wordformsData$values[[i]]$wfValue$value,
ipm = result$wordformsData$values[[i]]$wfValue$freq$ipm,
# docs: категория от 1 до 3. Где 1 - наиболее встречаемая форма, 3 - наименее.
category = result$wordformsData$values[[i]]$wfValue$freq$category)
}) |>
list_rbind()
# PORTRAIT_COGNATES ----------------------------------------------------
request("https://ruscorpora.ru/api/v1/word-portrait/") |>
req_headers("Authorization" = str_glue("Bearer {my_token}")) |>
req_headers("accept" = "application/json") |>
req_headers("Content-Type" = "application/json") |>
req_body_json(list(lemma = lemma_for_search,
corpus = list(type = "MAIN"),
resultType = list("PORTRAIT_COGNATES"))) |>
req_perform() |>
resp_body_json() ->
result
# does not work yet
# PORTRAIT_FIRST_MENTION ----------------------------------------------------
request("https://ruscorpora.ru/api/v1/word-portrait/") |>
req_headers("Authorization" = str_glue("Bearer {my_token}")) |>
req_headers("accept" = "application/json") |>
req_headers("Content-Type" = "application/json") |>
req_body_json(list(lemma = lemma_for_search,
corpus = list(type = "MAIN"),
resultType = list("PORTRAIT_FIRST_MENTION"))) |>
req_perform() |>
resp_body_json() ->
result
result$firstMentionData$info$items[[1]]$parsingFields |>
seq_along() |>
map(function(i){
tibble(field = result$firstMentionData$info$items[[1]]$parsingFields[[i]]$name,
value = unlist(result$firstMentionData$info$items[[1]]$parsingFields[[i]]$value))
}) |>
list_rbind() |>
pivot_wider(names_from = field, values_from = value) |>
mutate(redirect_lemma = result$firstMentionData$redirectLemma,
redirect_corpus = result$firstMentionData$redirectCorpus$type)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment