In R just do
arrow::read_parquet("https://github.com/mdsumner/dryrun/raw/refs/heads/main/data-raw/noaa_oi_025_degree_daily_sst_avhrr.parquet")$url
In python, I understand GDAL
from osgeo import ogr
In R just do
arrow::read_parquet("https://github.com/mdsumner/dryrun/raw/refs/heads/main/data-raw/noaa_oi_025_degree_daily_sst_avhrr.parquet")$url
In python, I understand GDAL
from osgeo import ogr
import os
import rasterio
dsn = "/vsis3/idea-10.7289-v5sq8xb5/www.ncei.noaa.gov/data/sea-surface-temperature-optimum-interpolation/v2.1/access/avhrr/198109/oisst-avhrr-v02r01.19810901.nc"
os.environ["AWS_S3_ENDPOINT"] = "projects.pawsey.org.au"
os.environ["AWS_NO_SIGN_REQUEST"] = "YES"
os.environ["AWS_VIRTUAL_HOSTING"] = "YES"
All working from here https://virtualizarr.readthedocs.io/en/stable/usage.html (with HTTP tab)
## on the docker image
## reticulate::use_python("/workenv/bin/python3.12")
library(bowerbird)
bl <- bb_source(
Install sooty
#install.packages("remotes")
remotes::install_cran("sooty")
Read the latest ice data we have for antarctica-amsr2-asi-s3125
.
See how the target doesn't have the "/%Y%m/" component:
Sun Aug 24 22:59:11 2025
Synchronizing dataset: NOAA OI 1/4 Degree Daily SST AVHRR
Source URL https://www.ncei.noaa.gov/data/sea-surface-temperature-optimum-interpolation/v2.1/access/avhrr/
--------------------------------------------------------------------------------------------
this dataset path is: /perm_storage/home/data/r_tmp/Rtmp7EG26o/bowerbird_files/www.ncei.noaa.gov/data/sea-surface-temperature-optimum-interpolation/v2.1/access/avhrr
visiting https://www.ncei.noaa.gov/data/sea-surface-temperature-optimum-interpolation/v2.1/access/avhrr/ ...
cnts <- rnaturalearth::ne_countries()
n <- nrow(cnts)
sf::sf_use_s2(FALSE)
pfun <-function(x) {
x <- x[1, ]
#expecting a sf cnt
op <- options(warn = -1)
on.exit(options(op))
From this URL, we remove the sidebar, and full-screen screenshot to this local PNG file, then georef from the url
https://tasmap.org/#9/-42.8830/147.0026
The file read here with rast() can be read directly with this, or itis attached below.
r <- rast("/vsicurl/https://private-user-images.githubusercontent.com/4107631/479547609-2127f648-d117-457b-ba01-6b181cdd5843.png?jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NTU2MTI0NzIsIm5iZiI6MTc1NTYxMjE3MiwicGF0aCI6Ii80MTA3NjMxLzQ3OTU0NzYwOS0yMTI3ZjY0OC1kMTE3LTQ1N2ItYmEwMS02YjE4MWNkZDU4NDMucG5nP1gtQW16LUFsZ29yaXRobT1BV1M0LUhNQUMtU0hBMjU2JlgtQW16LUNyZWRlbnRpYWw9QUtJQVZDT0RZTFNBNTNQUUs0WkElMkYyMDI1MDgxOSUyRnVzLWVhc3QtMSUyRnMzJTJGYXdzNF9yZXF1ZXN0JlgtQW16LURhdGU9MjAyNTA4MTlUMTQwMjUyWiZYLUFtei1FeHBpcmVzPTMwMCZYLUFtei1TaWduYXR1cmU9MTQ0NWJmNDk0M2NhNDUzMGI3Njg3YWQ3OWZiYmExMGYzNjY4Y2E4ZjM5OTU0YTNhNTk3YmMxZGE2NTZiMmQyYSZYLUFtei1TaWduZWRIZWFkZXJzPWhvc3QifQ.0TjLYya2nBjCNiO9po3uq
parcels <- function(address, distance = 200) {
parcel_dsn <- "/vsizip//vsicurl/listdata.thelist.tas.gov.au/opendata/data/LIST_PARCELS_HOBART.zip/list_parcels_hobart.shp"
pt <- tidygeocoder::geo(address, quiet = TRUE)
parcel_ds <- new(gdalraster::GDALVector, parcel_dsn)
on.exit(parcel_ds$close(), add = TRUE)
prj <- parcel_ds$getSpatialRef()
pp <- gdalraster::transform_xy(cbind(pt$long, pt$lat), srs_to = prj, srs_from = "EPSG:4326")
ex <- rep(pp, each = 2) + c(-1, 1, -1, 1) * distance
sf <- gdalraster::bbox_to_wkt(ex[c(1, 3, 2, 4)])
In this thread it's actually not well defined, essentially if a vertex is used twice it shouldn't be in the output. (But should we normalize on edge or vertex, or full shared boundaries?). Should islands stay in the set? (I don't think so)
With silicate, simplest case is
library(silicate)
sc <- SC(polygon)
library(dplyr)
sc$object_link_edge <- sc$object_link_edge |>
group_by(edge_) |>
do you need 500 slightly overlapping Zarr datasets?
src <- "/vsicurl/https://projects.pawsey.org.au/idea-gebco-tif/GEBCO_2024.tif"
src <- "https://projects.pawsey.org.au/idea-gebco-tif/GEBCO_2024.tif"
library(purrr) ## purrr CRAN
library(mirai) ## mirai CRAN
if (!file.exists(basename(src))) {
curl::curl_download(src, basename(src)) ## curl CRAN
}