agricolamz · August 27, 2025 11:49
diff --git a/ruscorpora.ru_API.R b/ruscorpora.ru_API.R
 # This code is licensed under the terms of the MIT license
 # Author: George Moroz
 # Date: 2025-08-14
 # Update: 2025-08-27
 # see the oficial docs: https://ruscorpora.github.io/public-api/

 library(tidyverse)
 my_token <- "put your token here"
 lemma_for_search <- "накачать"

 library(httr2)

 # check authentication ----------------------------------------------------
 request("https://ruscorpora.ru/api/v1/auth/check-authenticated/") |> 
  req_headers("Authorization" = str_glue("Bearer {my_token}")) |> 
  req_perform() 

 # <httr2_response>
 # GET https://ruscorpora.ru/api/v1/auth/check-authenticated/
 # Status: 200 OK
 # Content-Type: application/json
 # Body: In memory (4 bytes)

 # PORTRAIT_WORD_INFO ------------------------------------------------------

 request("https://ruscorpora.ru/api/v1/word-portrait/") |> 
  req_headers("Authorization" = str_glue("Bearer {my_token}")) |> 
  req_headers("accept" = "application/json") |> 
  req_headers("Content-Type" = "application/json") |> 
  req_body_json(list(lemma = lemma_for_search,
                     corpus = list(type = "MAIN"),
                     resultType = list("PORTRAIT_WORD_INFO"))) |>
  req_perform() |> 
  resp_body_json() ->
  result

 result$propsData$items |> 
  map(function(i){
    if(length(i$parsingFields) == 4){
      tibble(text = paste0(unlist(i$parsingFields[[1]]$value), collapse = "; "),
             lex = paste0(unlist(i$parsingFields[[2]]$value), collapse = "; "),
             gr = paste0(unlist(i$parsingFields[[3]]$value), collapse = "; "),
             sem = paste0(unlist(i$parsingFields[[4]]$value), collapse = "; "))
    } else if(length(i$parsingFields) == 3){
      tibble(text = paste0(unlist(i$parsingFields[[1]]$value), collapse = "; "),
             lex = paste0(unlist(i$parsingFields[[2]]$value), collapse = "; "),
             gr = paste0(unlist(i$parsingFields[[3]]$value), collapse = "; "))
    }
  }) |> 
  list_rbind()

 # PORTRAIT_CONCORDANCE ----------------------------------------------------

 request("https://ruscorpora.ru/api/v1/word-portrait/") |> 
  req_headers("Authorization" = str_glue("Bearer {my_token}")) |> 
  req_headers("accept" = "application/json") |> 
  req_headers("Content-Type" = "application/json") |> 
  req_body_json(list(lemma = lemma_for_search,
                     corpus = list(type = "MAIN"),
                     resultType = list("PORTRAIT_CONCORDANCE"))) |>
  req_perform() |> 
  resp_body_json() ->
  result

 result$concordanceData$groups |> 
  seq_along() |> 
  map(function(j){
    tibble(field = result$concordanceData$groups[[j]]$docs[[1]]$info$docExplainInfo$items[[1]]$parsingFields |> 
             map_chr("name"),
           value = result$concordanceData$groups[[j]]$docs[[1]]$info$docExplainInfo$items[[1]]$parsingFields |> 
             map("value") |> 
             unlist()) |> 
      pivot_wider(names_from = "field", values_from = "value") |> 
      mutate(title = result$concordanceData$groups[[j]]$docs[[1]]$info$title,
             language = result$concordanceData$groups[[j]]$docs[[1]]$snippetGroups[[1]]$snippets[[1]]$langInfo$lang,
             text = result$concordanceData$groups[[j]]$docs[[1]]$snippetGroups[[1]]$snippets[[1]]$sequences[[1]]$words |> 
               map_chr("text") |> 
               str_c(collapse = "") |> 
               str_squish())
  }) |> 
  list_rbind() |> 
  mutate(lemma = lemma_for_search) |> 
  relocate(title, .before = 1) |> 
  relocate(text, .before = 1) |> 
  relocate(lemma, .before = 1) 

 # PORTRAIT_STATS ----------------------------------------------------

 request("https://ruscorpora.ru/api/v1/word-portrait/") |> 
  req_headers("Authorization" = str_glue("Bearer {my_token}")) |> 
  req_headers("accept" = "application/json") |> 
  req_headers("Content-Type" = "application/json") |> 
  req_body_json(list(lemma = lemma_for_search,
                     corpus = list(type = "MAIN"),
                     resultType = list("PORTRAIT_STATS"))) |>
  req_perform() |> 
  resp_body_json() ->
  result

 result$statsData$fieldStats |> 
  seq_along() |> 
  map(function(i){
    result$statsData$fieldStats[[i]]$values |> 
      seq_along() |> 
      map(function(j){
        tibble(value = result$statsData$fieldStats[[i]]$values[[j]]$key$valString$v,
               count = result$statsData$fieldStats[[i]]$values[[j]]$count,
               docCount = result$statsData$fieldStats[[i]]$values[[j]]$docCount,
               totalCount = result$statsData$fieldStats[[i]]$values[[j]]$totalCount,
               totalDocCount = result$statsData$fieldStats[[i]]$values[[j]]$totalDocCount)
      }) |> 
      list_rbind() |> 
      mutate(field = result$statsData$fieldStats[[i]]$field) |> 
      relocate(field, .before = 1)
  }) |> 
  list_rbind()

 # PORTRAIT_SKETCH ----------------------------------------------------

 request("https://ruscorpora.ru/api/v1/word-portrait/") |> 
  req_headers("Authorization" = str_glue("Bearer {my_token}")) |> 
  req_headers("accept" = "application/json") |> 
  req_headers("Content-Type" = "application/json") |> 
  req_body_json(list(lemma = lemma_for_search,
                     corpus = list(type = "MAIN"),
                     resultType = list("PORTRAIT_SKETCH"))) |>
  req_perform() |> 
  resp_body_json() ->
  result

 result$sketchData$collocates |> 
  seq_along() |> 
  map(function(i){
    result$sketchData$collocates[[i]]$collocations |> 
      seq_along() |> 
      map(function(j){
      tibble(collocate = result$sketchData$collocates[[i]]$collocations[[j]]$collocate$valString$v,
             dice = result$sketchData$collocates[[i]]$collocations[[j]]$metrics[[1]]$value)
    }) |> 
      list_rbind() |> 
      mutate(syntactic_relation = result$sketchData$collocates[[i]][["sketchSynRelation"]])
  }) |> 
  list_rbind() |> 
  mutate(lemma = lemma_for_search)

 # PORTRAIT_FREQUENCY ----------------------------------------------------

 request("https://ruscorpora.ru/api/v1/word-portrait/") |> 
  req_headers("Authorization" = str_glue("Bearer {my_token}")) |> 
  req_headers("accept" = "application/json") |> 
  req_headers("Content-Type" = "application/json") |> 
  req_body_json(list(lemma = lemma_for_search,
                     corpus = list(type = "MAIN"),
                     resultType = list("PORTRAIT_FREQUENCY"))) |>
  req_perform() |> 
  resp_body_json() ->
  result

 result$frequencyData$ipm

 # PORTRAIT_SIMILAR ----------------------------------------------------

 request("https://ruscorpora.ru/api/v1/word-portrait/") |> 
  req_headers("Authorization" = str_glue("Bearer {my_token}")) |> 
  req_headers("accept" = "application/json") |> 
  req_headers("Content-Type" = "application/json") |> 
  req_body_json(list(lemma = lemma_for_search,
                     corpus = list(type = "MAIN"),
                     resultType = list("PORTRAIT_SIMILAR"))) |>
  req_perform() |> 
  resp_body_json() ->
  result

 tibble(word = result$similarData[[1]]$values |> map_chr("word"),
       metics = result$similarData[[1]]$values |> map_dbl("weight"))

 # PORTRAIT_MORPHEME ----------------------------------------------------

 request("https://ruscorpora.ru/api/v1/word-portrait/") |> 
  req_headers("Authorization" = str_glue("Bearer {my_token}")) |> 
  req_headers("accept" = "application/json") |> 
  req_headers("Content-Type" = "application/json") |> 
  req_body_json(list(lemma = lemma_for_search,
                     corpus = list(type = "MAIN"),
                     resultType = list("PORTRAIT_MORPHEME"))) |>
  req_perform() |> 
  resp_body_json() ->
  result

 tibble(glossed = result$morphemeData$morphemes |> map_chr("value"),
       morph_type = result$morphemeData$morphemes |> map_chr("type"))

 # PORTRAIT_WORDFORMS ----------------------------------------------------

 request("https://ruscorpora.ru/api/v1/word-portrait/") |> 
  req_headers("Authorization" = str_glue("Bearer {my_token}")) |> 
  req_headers("accept" = "application/json") |> 
  req_headers("Content-Type" = "application/json") |> 
  req_body_json(list(lemma = lemma_for_search,
                     corpus = list(type = "MAIN"),
                     resultType = list("PORTRAIT_WORDFORMS"))) |>
  req_perform() |> 
  resp_body_json() ->
  result

 result$wordformsData$values |> 
  seq_along() |> 
  map(function(i){
    tibble(case = result$wordformsData$values[[i]]$rowLabel$v,
           number = result$wordformsData$values[[i]]$columnLabel$v,
           form = result$wordformsData$values[[i]]$wfValue$value,
           ipm = result$wordformsData$values[[i]]$wfValue$freq$ipm,
 # docs: категория от 1 до 3. Где 1 - наиболее встречаемая форма, 3 - наименее.
           category = result$wordformsData$values[[i]]$wfValue$freq$category)
  }) |> 
      list_rbind()

 # PORTRAIT_COGNATES ----------------------------------------------------

 request("https://ruscorpora.ru/api/v1/word-portrait/") |> 
  req_headers("Authorization" = str_glue("Bearer {my_token}")) |> 
  req_headers("accept" = "application/json") |> 
  req_headers("Content-Type" = "application/json") |> 
  req_body_json(list(lemma = lemma_for_search,
                     corpus = list(type = "MAIN"),
                     resultType = list("PORTRAIT_COGNATES"))) |>
  req_perform() |> 
  resp_body_json() ->
  result

 # does not work yet

 # PORTRAIT_FIRST_MENTION ----------------------------------------------------

 request("https://ruscorpora.ru/api/v1/word-portrait/") |> 
  req_headers("Authorization" = str_glue("Bearer {my_token}")) |> 
  req_headers("accept" = "application/json") |> 
  req_headers("Content-Type" = "application/json") |> 
  req_body_json(list(lemma = lemma_for_search,
                     corpus = list(type = "MAIN"),
                     resultType = list("PORTRAIT_FIRST_MENTION"))) |>
  req_perform() |> 
  resp_body_json() ->
  result

 result$firstMentionData$info$items[[1]]$parsingFields |> 
  seq_along() |> 
  map(function(i){
    tibble(field = result$firstMentionData$info$items[[1]]$parsingFields[[i]]$name,
           value = unlist(result$firstMentionData$info$items[[1]]$parsingFields[[i]]$value))
  }) |> 
  list_rbind() |> 
  pivot_wider(names_from = field, values_from = value) |> 
  mutate(redirect_lemma = result$firstMentionData$redirectLemma,
         redirect_corpus = result$firstMentionData$redirectCorpus$type)
	# This code is licensed under the terms of the MIT license
	# Author: George Moroz
	# Date: 2025-08-14
	# Update: 2025-08-27
	# see the oficial docs: https://ruscorpora.github.io/public-api/

	library(tidyverse)
	my_token <- "put your token here"
	lemma_for_search <- "накачать"

	library(httr2)

	# check authentication ----------------------------------------------------
	request("https://ruscorpora.ru/api/v1/auth/check-authenticated/") \|>
	req_headers("Authorization" = str_glue("Bearer {my_token}")) \|>
	req_perform()

	# <httr2_response>
	# GET https://ruscorpora.ru/api/v1/auth/check-authenticated/
	# Status: 200 OK
	# Content-Type: application/json
	# Body: In memory (4 bytes)

	# PORTRAIT_WORD_INFO ------------------------------------------------------

	request("https://ruscorpora.ru/api/v1/word-portrait/") \|>
	req_headers("Authorization" = str_glue("Bearer {my_token}")) \|>
	req_headers("accept" = "application/json") \|>
	req_headers("Content-Type" = "application/json") \|>
	req_body_json(list(lemma = lemma_for_search,
	corpus = list(type = "MAIN"),
	resultType = list("PORTRAIT_WORD_INFO"))) \|>
	req_perform() \|>
	resp_body_json() ->
	result

	result$propsData$items \|>
	map(function(i){
	if(length(i$parsingFields) == 4){
	tibble(text = paste0(unlist(i$parsingFields[[1]]$value), collapse = "; "),
	lex = paste0(unlist(i$parsingFields[[2]]$value), collapse = "; "),
	gr = paste0(unlist(i$parsingFields[[3]]$value), collapse = "; "),
	sem = paste0(unlist(i$parsingFields[[4]]$value), collapse = "; "))
	} else if(length(i$parsingFields) == 3){
	tibble(text = paste0(unlist(i$parsingFields[[1]]$value), collapse = "; "),
	lex = paste0(unlist(i$parsingFields[[2]]$value), collapse = "; "),
	gr = paste0(unlist(i$parsingFields[[3]]$value), collapse = "; "))
	}
	}) \|>
	list_rbind()

	# PORTRAIT_CONCORDANCE ----------------------------------------------------

	request("https://ruscorpora.ru/api/v1/word-portrait/") \|>
	req_headers("Authorization" = str_glue("Bearer {my_token}")) \|>
	req_headers("accept" = "application/json") \|>
	req_headers("Content-Type" = "application/json") \|>
	req_body_json(list(lemma = lemma_for_search,
	corpus = list(type = "MAIN"),
	resultType = list("PORTRAIT_CONCORDANCE"))) \|>
	req_perform() \|>
	resp_body_json() ->
	result

	result$concordanceData$groups \|>
	seq_along() \|>
	map(function(j){
	tibble(field = result$concordanceData$groups[[j]]$docs[[1]]$info$docExplainInfo$items[[1]]$parsingFields \|>
	map_chr("name"),
	value = result$concordanceData$groups[[j]]$docs[[1]]$info$docExplainInfo$items[[1]]$parsingFields \|>
	map("value") \|>
	unlist()) \|>
	pivot_wider(names_from = "field", values_from = "value") \|>
	mutate(title = result$concordanceData$groups[[j]]$docs[[1]]$info$title,
	language = result$concordanceData$groups[[j]]$docs[[1]]$snippetGroups[[1]]$snippets[[1]]$langInfo$lang,
	text = result$concordanceData$groups[[j]]$docs[[1]]$snippetGroups[[1]]$snippets[[1]]$sequences[[1]]$words \|>
	map_chr("text") \|>
	str_c(collapse = "") \|>
	str_squish())
	}) \|>
	list_rbind() \|>
	mutate(lemma = lemma_for_search) \|>
	relocate(title, .before = 1) \|>
	relocate(text, .before = 1) \|>
	relocate(lemma, .before = 1)

	# PORTRAIT_STATS ----------------------------------------------------

	request("https://ruscorpora.ru/api/v1/word-portrait/") \|>
	req_headers("Authorization" = str_glue("Bearer {my_token}")) \|>
	req_headers("accept" = "application/json") \|>
	req_headers("Content-Type" = "application/json") \|>
	req_body_json(list(lemma = lemma_for_search,
	corpus = list(type = "MAIN"),
	resultType = list("PORTRAIT_STATS"))) \|>
	req_perform() \|>
	resp_body_json() ->
	result

	result$statsData$fieldStats \|>
	seq_along() \|>
	map(function(i){
	result$statsData$fieldStats[[i]]$values \|>
	seq_along() \|>
	map(function(j){
	tibble(value = result$statsData$fieldStats[[i]]$values[[j]]$key$valString$v,
	count = result$statsData$fieldStats[[i]]$values[[j]]$count,
	docCount = result$statsData$fieldStats[[i]]$values[[j]]$docCount,
	totalCount = result$statsData$fieldStats[[i]]$values[[j]]$totalCount,
	totalDocCount = result$statsData$fieldStats[[i]]$values[[j]]$totalDocCount)
	}) \|>
	list_rbind() \|>
	mutate(field = result$statsData$fieldStats[[i]]$field) \|>
	relocate(field, .before = 1)
	}) \|>
	list_rbind()

	# PORTRAIT_SKETCH ----------------------------------------------------

	request("https://ruscorpora.ru/api/v1/word-portrait/") \|>
	req_headers("Authorization" = str_glue("Bearer {my_token}")) \|>
	req_headers("accept" = "application/json") \|>
	req_headers("Content-Type" = "application/json") \|>
	req_body_json(list(lemma = lemma_for_search,
	corpus = list(type = "MAIN"),
	resultType = list("PORTRAIT_SKETCH"))) \|>
	req_perform() \|>
	resp_body_json() ->
	result

	result$sketchData$collocates \|>
	seq_along() \|>
	map(function(i){
	result$sketchData$collocates[[i]]$collocations \|>
	seq_along() \|>
	map(function(j){
	tibble(collocate = result$sketchData$collocates[[i]]$collocations[[j]]$collocate$valString$v,
	dice = result$sketchData$collocates[[i]]$collocations[[j]]$metrics[[1]]$value)
	}) \|>
	list_rbind() \|>
	mutate(syntactic_relation = result$sketchData$collocates[[i]][["sketchSynRelation"]])
	}) \|>
	list_rbind() \|>
	mutate(lemma = lemma_for_search)

	# PORTRAIT_FREQUENCY ----------------------------------------------------

	request("https://ruscorpora.ru/api/v1/word-portrait/") \|>
	req_headers("Authorization" = str_glue("Bearer {my_token}")) \|>
	req_headers("accept" = "application/json") \|>
	req_headers("Content-Type" = "application/json") \|>
	req_body_json(list(lemma = lemma_for_search,
	corpus = list(type = "MAIN"),
	resultType = list("PORTRAIT_FREQUENCY"))) \|>
	req_perform() \|>
	resp_body_json() ->
	result

	result$frequencyData$ipm

	# PORTRAIT_SIMILAR ----------------------------------------------------

	request("https://ruscorpora.ru/api/v1/word-portrait/") \|>
	req_headers("Authorization" = str_glue("Bearer {my_token}")) \|>
	req_headers("accept" = "application/json") \|>
	req_headers("Content-Type" = "application/json") \|>
	req_body_json(list(lemma = lemma_for_search,
	corpus = list(type = "MAIN"),
	resultType = list("PORTRAIT_SIMILAR"))) \|>
	req_perform() \|>
	resp_body_json() ->
	result

	tibble(word = result$similarData[[1]]$values \|> map_chr("word"),
	metics = result$similarData[[1]]$values \|> map_dbl("weight"))

	# PORTRAIT_MORPHEME ----------------------------------------------------

	request("https://ruscorpora.ru/api/v1/word-portrait/") \|>
	req_headers("Authorization" = str_glue("Bearer {my_token}")) \|>
	req_headers("accept" = "application/json") \|>
	req_headers("Content-Type" = "application/json") \|>
	req_body_json(list(lemma = lemma_for_search,
	corpus = list(type = "MAIN"),
	resultType = list("PORTRAIT_MORPHEME"))) \|>
	req_perform() \|>
	resp_body_json() ->
	result

	tibble(glossed = result$morphemeData$morphemes \|> map_chr("value"),
	morph_type = result$morphemeData$morphemes \|> map_chr("type"))

	# PORTRAIT_WORDFORMS ----------------------------------------------------

	request("https://ruscorpora.ru/api/v1/word-portrait/") \|>
	req_headers("Authorization" = str_glue("Bearer {my_token}")) \|>
	req_headers("accept" = "application/json") \|>
	req_headers("Content-Type" = "application/json") \|>
	req_body_json(list(lemma = lemma_for_search,
	corpus = list(type = "MAIN"),
	resultType = list("PORTRAIT_WORDFORMS"))) \|>
	req_perform() \|>
	resp_body_json() ->
	result

	result$wordformsData$values \|>
	seq_along() \|>
	map(function(i){
	tibble(case = result$wordformsData$values[[i]]$rowLabel$v,
	number = result$wordformsData$values[[i]]$columnLabel$v,
	form = result$wordformsData$values[[i]]$wfValue$value,
	ipm = result$wordformsData$values[[i]]$wfValue$freq$ipm,
	# docs: категория от 1 до 3. Где 1 - наиболее встречаемая форма, 3 - наименее.
	category = result$wordformsData$values[[i]]$wfValue$freq$category)
	}) \|>
	list_rbind()

	# PORTRAIT_COGNATES ----------------------------------------------------

	request("https://ruscorpora.ru/api/v1/word-portrait/") \|>
	req_headers("Authorization" = str_glue("Bearer {my_token}")) \|>
	req_headers("accept" = "application/json") \|>
	req_headers("Content-Type" = "application/json") \|>
	req_body_json(list(lemma = lemma_for_search,
	corpus = list(type = "MAIN"),
	resultType = list("PORTRAIT_COGNATES"))) \|>
	req_perform() \|>
	resp_body_json() ->
	result

	# does not work yet

	# PORTRAIT_FIRST_MENTION ----------------------------------------------------

	request("https://ruscorpora.ru/api/v1/word-portrait/") \|>
	req_headers("Authorization" = str_glue("Bearer {my_token}")) \|>
	req_headers("accept" = "application/json") \|>
	req_headers("Content-Type" = "application/json") \|>
	req_body_json(list(lemma = lemma_for_search,
	corpus = list(type = "MAIN"),
	resultType = list("PORTRAIT_FIRST_MENTION"))) \|>
	req_perform() \|>
	resp_body_json() ->
	result

	result$firstMentionData$info$items[[1]]$parsingFields \|>
	seq_along() \|>
	map(function(i){
	tibble(field = result$firstMentionData$info$items[[1]]$parsingFields[[i]]$name,
	value = unlist(result$firstMentionData$info$items[[1]]$parsingFields[[i]]$value))
	}) \|>
	list_rbind() \|>
	pivot_wider(names_from = field, values_from = value) \|>
	mutate(redirect_lemma = result$firstMentionData$redirectLemma,
	redirect_corpus = result$firstMentionData$redirectCorpus$type)