debruine · January 20, 2026 11:46
diff --git a/ellmer-structured-reprex.R b/ellmer-structured-reprex.R
 # https://ellmer.tidyverse.org/articles/structured-data.html

 # does not work ----
 power <- ellmer::type_from_schema(
  path = "https://scienceverse.org/schema/power.json"
 )

 type_classification <- ellmer::type_object(
  "A collection of power analyses reported in a document.",
  power_analyses = ellmer::type_array(power, required = TRUE),
  .required = TRUE,
  .additional_properties = FALSE
 )

 # works ----
 type_classification <- ellmer::type_from_schema(
  path = "https://scienceverse.org/schema/power_array.json"
 )

 model <- "openai/gpt-oss-20b"
 prompt <-"Identify and classify power analyses from exerpts of scientific manuscripts. Use null when information is missing, do not invent values. Only use 'other' if a value not in the enumerated options can be identified. There may be no power analysis in the text."

 chat <- ellmer::chat_groq(system_prompt = prompt, model = model)

 text <- "An a priori power analysis was conducted to estimate the sample size required to achieve 80% power to detect a Cohen's d of 0.2 using an unpaired t-test at an alpha level of 0.05. This required a total sample size of 300 participants. A second a priori power analysis was conducted to estimate the required sample size for a secondary outcome. To achieve 80% power to detect a Cohen's f of 0.1 using a one-way ANOVA, a sample size of 350 was required. The a priori power analyses were conducted with G*Power."

 data <- chat$chat_structured(text, type = type_classification)

 library(testthat)

 exp1 <- list(
  power_type = "apriori",
  statistical_test = "unpaired t-test",
  statistical_test_other = NULL,
  sample_size = 300L,
  alpha_level = 0.05,
  power = 0.8,
  effect_size = 0.2,
  effect_size_metric = "Cohen's d",
  effect_size_metric_other = NULL,
  software = "G*Power"
 )

 exp2 <- list(
  power_type = "apriori",
  statistical_test = "1-way ANOVA",
  statistical_test_other = NULL,
  sample_size = 350L,
  alpha_level = NULL,
  power = 0.8,
  effect_size = 0.1,
  effect_size_metric = "Cohen's f",
  effect_size_metric_other = NULL,
  software = "G*Power"
 )

 expect_equal(data$power_analyses[[1]], exp1)
 expect_equal(data$power_analyses[[2]], exp2)


 text <- "A power analysis showed that 242 participants in each of 2 groups was required for 80% power."

 chat <- ellmer::chat_groq(system_prompt = prompt, model = model)
 data <- chat$chat_structured(text, type = type_classification)

 exp <- list(
  power_type = "apriori",
  statistical_test = NULL,
  statistical_test_other = NULL,
  sample_size = 484L,
  alpha_level = NULL,
  power = 0.8,
  effect_size = NULL,
  effect_size_metric = NULL,
  effect_size_metric_other = NULL,
  software = NULL
 )

 expect_equal(length(data$power_analyses), 1)
 expect_equal(data$power_analyses[[1]], exp)
	# https://ellmer.tidyverse.org/articles/structured-data.html

	# does not work ----
	power <- ellmer::type_from_schema(
	path = "https://scienceverse.org/schema/power.json"
	)

	type_classification <- ellmer::type_object(
	"A collection of power analyses reported in a document.",
	power_analyses = ellmer::type_array(power, required = TRUE),
	.required = TRUE,
	.additional_properties = FALSE
	)

	# works ----
	type_classification <- ellmer::type_from_schema(
	path = "https://scienceverse.org/schema/power_array.json"
	)

	model <- "openai/gpt-oss-20b"
	prompt <-"Identify and classify power analyses from exerpts of scientific manuscripts. Use null when information is missing, do not invent values. Only use 'other' if a value not in the enumerated options can be identified. There may be no power analysis in the text."

	chat <- ellmer::chat_groq(system_prompt = prompt, model = model)

	text <- "An a priori power analysis was conducted to estimate the sample size required to achieve 80% power to detect a Cohen's d of 0.2 using an unpaired t-test at an alpha level of 0.05. This required a total sample size of 300 participants. A second a priori power analysis was conducted to estimate the required sample size for a secondary outcome. To achieve 80% power to detect a Cohen's f of 0.1 using a one-way ANOVA, a sample size of 350 was required. The a priori power analyses were conducted with G*Power."

	data <- chat$chat_structured(text, type = type_classification)

	library(testthat)

	exp1 <- list(
	power_type = "apriori",
	statistical_test = "unpaired t-test",
	statistical_test_other = NULL,
	sample_size = 300L,
	alpha_level = 0.05,
	power = 0.8,
	effect_size = 0.2,
	effect_size_metric = "Cohen's d",
	effect_size_metric_other = NULL,
	software = "G*Power"
	)

	exp2 <- list(
	power_type = "apriori",
	statistical_test = "1-way ANOVA",
	statistical_test_other = NULL,
	sample_size = 350L,
	alpha_level = NULL,
	power = 0.8,
	effect_size = 0.1,
	effect_size_metric = "Cohen's f",
	effect_size_metric_other = NULL,
	software = "G*Power"
	)

	expect_equal(data$power_analyses[[1]], exp1)
	expect_equal(data$power_analyses[[2]], exp2)


	text <- "A power analysis showed that 242 participants in each of 2 groups was required for 80% power."

	chat <- ellmer::chat_groq(system_prompt = prompt, model = model)
	data <- chat$chat_structured(text, type = type_classification)

	exp <- list(
	power_type = "apriori",
	statistical_test = NULL,
	statistical_test_other = NULL,
	sample_size = 484L,
	alpha_level = NULL,
	power = 0.8,
	effect_size = NULL,
	effect_size_metric = NULL,
	effect_size_metric_other = NULL,
	software = NULL
	)

	expect_equal(length(data$power_analyses), 1)
	expect_equal(data$power_analyses[[1]], exp)
No results found