Skip to content

Instantly share code, notes, and snippets.

@larsvilhuber
Last active April 29, 2025 16:18
Show Gist options
  • Save larsvilhuber/6b5048a22904cffd59583db95f9f0542 to your computer and use it in GitHub Desktop.
Save larsvilhuber/6b5048a22904cffd59583db95f9f0542 to your computer and use it in GitHub Desktop.
Downloading from Zenodo (2025)
# this has worked as of 2024-03-01
library(jsonlite)
library(dplyr)
library(tidyr)
library(stringr)
# Specific DOI - resolves to a fixed version
zenodo.id <- "6568295"
# We will recover the rest from Zenodo API
zenodo.api = "https://zenodo.org/api/records/"
# where to save
dataloc <- here::here("data","downloaded")
if ( ! file.exists(dataloc)) {
dir.create(dataloc,recursive = TRUE)
}
download.file(paste0(zenodo.api,zenodo.id),destfile=file.path(dataloc,"metadata.json"))
latest <- fromJSON(file.path(dataloc,"metadata.json"))
print(paste0("DOI: ",latest$links$doi))
print(paste0("Current: ",latest$links$html))
print(paste0("Latest: ",latest$links$latest_html))
# we download all the files! (Implement some selection if only interested in specific files)
file.list <- as.data.frame(latest$files$links) %>% gather()
for ( value in file.list$value ) {
print(value)
file.name <- basename(value %>% str_remove("/content"))
message(paste0("Downloading... ",file.name))
#download.file(value,destfile=file.path(dataloc,file.name))
}
# this achieves the same thing as the various R and Stata code above
pip install zenodo_get
dataloc="data/downloaded"
[[ -d $dataloc ]] || mkdir -p "$dataloc"
zenodo_get 6568295 -o "$dataloc"
// Download files from Zenodo in Stata
global zenodoid "6568295"
global zenodoapi "https://zenodo.org/api/records/"
global zenodosfx "/content"
foreach year of numlist 2018/2020 {
foreach dmon of numlist 1/12 {
if `dmon' < 10 {
local month "0`dmon'"
}
else {
local month "`dmon'"
foreach dval of numlist 1/31 {
if `dval' < 10 {
local day "0`dval'"
}
else {
local day "`dval'"
}
// now download
local filename "sucursales_`year'-`month'-`day'.csv.gz"
local url "${zenodoprefix}${zenodoid}/`filename'${zenodosfx}"
copy `url' `filename'
} // end day
} // end month
} // end year
// Alternatively, just hard-code the paths once!
copy https://zenodo.org/api/records/6568295/files/sucursales_2018-02-27.csv.gz/content sucursales_2018-02-27.csv.gz
copy https://zenodo.org/api/records/6568295/files/sucursales_2018-02-28.csv.gz/content sucursales_2018-02-28.csv.gz
copy https://zenodo.org/api/records/6568295/files/sucursales_2018-03-01.csv.gz/content sucursales_2018-03-01.csv.gz
copy https://zenodo.org/api/records/6568295/files/sucursales_2018-03-02.csv.gz/content sucursales_2018-03-02.csv.gz
copy https://zenodo.org/api/records/6568295/files/sucursales_2018-03-03.csv.gz/content sucursales_2018-03-03.csv.gz
copy https://zenodo.org/api/records/6568295/files/sucursales_2018-03-04.csv.gz/content sucursales_2018-03-04.csv.gz
copy https://zenodo.org/api/records/6568295/files/sucursales_2018-03-05.csv.gz/content sucursales_2018-03-05.csv.gz
copy https://zenodo.org/api/records/6568295/files/sucursales_2018-03-06.csv.gz/content sucursales_2018-03-06.csv.gz
copy https://zenodo.org/api/records/6568295/files/sucursales_2018-03-07.csv.gz/content sucursales_2018-03-07.csv.gz
copy https://zenodo.org/api/records/6568295/files/sucursales_2018-03-08.csv.gz/content sucursales_2018-03-08.csv.gz
copy https://zenodo.org/api/records/6568295/files/sucursales_2018-03-09.csv.gz/content sucursales_2018-03-09.csv.gz
copy https://zenodo.org/api/records/6568295/files/sucursales_2018-03-10.csv.gz/content sucursales_2018-03-10.csv.gz
copy https://zenodo.org/api/records/6568295/files/sucursales_2018-03-11.csv.gz/content sucursales_2018-03-11.csv.gz
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment