Skip to content

Instantly share code, notes, and snippets.

@debruine
Created January 20, 2026 11:46
Show Gist options
  • Select an option

  • Save debruine/d35616d423a69237992a44ba34dbe017 to your computer and use it in GitHub Desktop.

Select an option

Save debruine/d35616d423a69237992a44ba34dbe017 to your computer and use it in GitHub Desktop.
Ellmer structured data problem
# https://ellmer.tidyverse.org/articles/structured-data.html
# does not work ----
power <- ellmer::type_from_schema(
path = "https://scienceverse.org/schema/power.json"
)
type_classification <- ellmer::type_object(
"A collection of power analyses reported in a document.",
power_analyses = ellmer::type_array(power, required = TRUE),
.required = TRUE,
.additional_properties = FALSE
)
# works ----
type_classification <- ellmer::type_from_schema(
path = "https://scienceverse.org/schema/power_array.json"
)
model <- "openai/gpt-oss-20b"
prompt <-"Identify and classify power analyses from exerpts of scientific manuscripts. Use null when information is missing, do not invent values. Only use 'other' if a value not in the enumerated options can be identified. There may be no power analysis in the text."
chat <- ellmer::chat_groq(system_prompt = prompt, model = model)
text <- "An a priori power analysis was conducted to estimate the sample size required to achieve 80% power to detect a Cohen's d of 0.2 using an unpaired t-test at an alpha level of 0.05. This required a total sample size of 300 participants. A second a priori power analysis was conducted to estimate the required sample size for a secondary outcome. To achieve 80% power to detect a Cohen's f of 0.1 using a one-way ANOVA, a sample size of 350 was required. The a priori power analyses were conducted with G*Power."
data <- chat$chat_structured(text, type = type_classification)
library(testthat)
exp1 <- list(
power_type = "apriori",
statistical_test = "unpaired t-test",
statistical_test_other = NULL,
sample_size = 300L,
alpha_level = 0.05,
power = 0.8,
effect_size = 0.2,
effect_size_metric = "Cohen's d",
effect_size_metric_other = NULL,
software = "G*Power"
)
exp2 <- list(
power_type = "apriori",
statistical_test = "1-way ANOVA",
statistical_test_other = NULL,
sample_size = 350L,
alpha_level = NULL,
power = 0.8,
effect_size = 0.1,
effect_size_metric = "Cohen's f",
effect_size_metric_other = NULL,
software = "G*Power"
)
expect_equal(data$power_analyses[[1]], exp1)
expect_equal(data$power_analyses[[2]], exp2)
text <- "A power analysis showed that 242 participants in each of 2 groups was required for 80% power."
chat <- ellmer::chat_groq(system_prompt = prompt, model = model)
data <- chat$chat_structured(text, type = type_classification)
exp <- list(
power_type = "apriori",
statistical_test = NULL,
statistical_test_other = NULL,
sample_size = 484L,
alpha_level = NULL,
power = 0.8,
effect_size = NULL,
effect_size_metric = NULL,
effect_size_metric_other = NULL,
software = NULL
)
expect_equal(length(data$power_analyses), 1)
expect_equal(data$power_analyses[[1]], exp)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment