Last active
August 29, 2015 14:07
-
-
Save raymondben/b5c4154d13ffc09c43bd to your computer and use it in GitHub Desktop.
Mapping eucalypt heights across Australia using ALA4R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## see http://untan.gl/articles/2014/10/08_using-ala4r-mapping-eucalypt-height.html | |
## load the ALA4R package | |
library(ALA4R) | |
## check that it's version 1.04 or later: the "guid" attribute on the sites_by_species data was added in that version | |
if (as.numeric(packageDescription("ALA4R",fields="Version"))<1.04) | |
stop("reinstall ALA4R to get v1.04 or later") | |
ala_config(cache_directory=file.path("~","data","ala_cache")) | |
## some other packages that we'll use | |
library(stringr) | |
library(plyr) | |
library(ggplot2) | |
## define the text searches (regular expressions) that we use to extract height information from a species profile | |
htre=data.frame(re=c('(growing to|grows to|heights of up to)[[:space:]]+([\\.[:digit:]]+)[[:space:]]*(m|metres)'),col_index=c(3),type=c("maximum"),stringsAsFactors=FALSE) | |
htre=rbind(htre,list('(tree|shrub)[[:punct:][:space:]]+(up to|to)[[:space:]]*([\\.[:digit:]]+)[[:space:]]*(m|metres)',4,"maximum")) | |
htre=rbind(htre,list('(tree|trees|shrub|shrubs|grows|grow|growing)[[:space:]]+to[[:space:]]+(roughly|about|around|over)[[:space:]]*([\\.[:digit:]]+)[[:space:]]*(m|metres)',4,"maximum")) ## text like "tree to about 15 metres, grows to about 10m" | |
htre=rbind(htre,list('(tree|trees|shrub|shrubs)[[:space:]]+(about|around|roughly|over)[[:space:]]*([\\.[:digit:]]+)[[:space:]]*(m|metres)[[:space:]]+(high|tall|in height)',4,"maximum")) ## text like "tree about 4 m high" | |
htre=rbind(htre,list('(to|may reach|can reach|up to|reaching|shrub|tree)[[:space:]]+([\\.[:digit:]]+)[[:space:]]*(m|metres)[[:space:]]+(high|tall|in height)',3,"maximum")) | |
htre=rbind(htre,list('(to|may reach|can reach|up to|reaching|shrub|tree)[[:space:]]+([\\.[:digit:]]+)[[:space:]]*(m|metres)[[:space:]]+or more (high|tall|in height)',3,"maximum")) ## e.g. "reaching 80m or more in height" | |
htre=rbind(htre,list('(to|may reach|can reach|up to|reaching|growing to|grows to)[[:space:]]+a[[:space:]]+height[[:space:]]+of[[:space:]]+([\\.[:digit:]]+)[[:space:]]*(m|metres)',3,"maximum")) | |
htre=rbind(htre,list('(to|may reach|can reach|up to|reaching|growing to|grows to)[[:space:]]+a[[:space:]]+maximum[[:space:]]+height[[:space:]]+of[[:space:]]+([\\.[:digit:]]+)[[:space:]]*(m|metres)',3,"maximum")) | |
htre=rbind(htre,list('(to|may reach|can reach|up to|reaching|growing to|shrub|shrubs|tree|trees|typically|generally|usually|commonly|about)[[:space:][:punct:]]+[\\.[:digit:]]+[[:space:]]*[–-][[:space:]]*([\\.[:digit:]]+)[[:space:]]*(m|metres)[[:space:]]+(high|tall|in height)',3,"maximum")) | |
htre=rbind(htre,list('(to|may reach|can reach|reaches|up to|reaching|growing to|shrub|shrubs|tree|trees)[[:space:][:punct:]]+(typically|generally|usually|commonly|about|of about)?[[:space:]]*[\\.[:digit:]]+[[:space:]]*[–-][[:space:]]*([\\.[:digit:]]+)[[:space:]]*(m|metres)[[:space:]]+(high|tall|in height)',4,"maximum")) ## e.g. "shrub, typically 2-3 metres high", "tree of about 6-9 metres in height", "reaches about 5-7 metres in height" | |
htre=rbind(htre,list('(height|shrub|tree|shrubs|trees)[[:space:][:punct:]]+(can range|ranging|mostly seen)[[:space:]]+between[[:space:]]*[\\.[:digit:]]+[[:space:]]*(–|and|-)[[:space:]]*([\\.[:digit:]]+)[[:space:]]*(m|metres)',5,"maximum")) ## "height can range between 2 and 25 metres", "tree ranging between 10 and 20 metres" | |
htre=rbind(htre,list('(grows to|grow to|growing to|grow)[[:space:]]+between[[:space:]]*[\\.[:digit:]]+[[:space:]]*[(m|metres)]?[[:space:]]*(–|and|-)[[:space:]]*([\\.[:digit:]]+)[[:space:]]*(m|metres)',4,"maximum")) | |
htre=rbind(htre,list('(occasionally reaching)[[:space:]]+([\\.[:digit:]]+)[[:space:]]*(m|metres)',3,"exceptional maximum")) ## e.g. "occasionally reaching 50 m" | |
## a function to apply those regular expressions | |
extract_heights=function(x){ | |
x=x$simpleProperties$value | |
x=str_replace_all(x,"\\([[:digit:][:space:]–-]+(in|inches|ft|feet)\\)","") ## some descriptions give heights in metres and then repeat in imperial units - remove these | |
out=data.frame() | |
processed_x=x | |
for (k in 1:nrow(htre)) { ## iterate through each regular expression in turn | |
this_match=str_match_all(processed_x,ignore.case(htre[k,]$re)) | |
this_match=ldply(this_match,function(z)if (is.null(nrow(z))) NULL else data.frame(type=htre[k,]$type,matched=z[,1],value=as.numeric(z[,htre[k,]$col_index]))) | |
if (nrow(this_match)>0) { | |
out=rbind(out,this_match) | |
} | |
processed_x=str_replace_all(processed_x,ignore.case(htre[k,]$re),"") ## remove matches as we go, so that we don't match multiple times on the same text | |
} | |
out | |
} | |
## download our sites-by-species matrix, using a 0.5-degree grid cell size | |
ss=sites_by_species("genus:Eucalyptus",wkt="POLYGON((110 -45,155 -45,155 -10,110 -10,110 -45))",gridsize=0.5) | |
## convert coordinates to cell centres | |
ss$longitude=ss$longitude+0.25 | |
ss$latitude=ss$latitude+0.25 | |
all_guids=attr(ss,"guid") ## the identifier of each eucalypt taxon in the ss dataframe | |
guid_heights=rep(NA,length(all_guids)) | |
for (k in 1:length(all_guids)) { | |
## iterate through each identifier and extract the mean tree height | |
x=species_info(guid=all_guids[k]) | |
temp=extract_heights(x) | |
guid_heights[k]=mean(temp$value[temp$type=="maximum"]) | |
} | |
## now, for each column in our sites-by-species matrix, calculate the average height of the species present, weighted by their prevalences | |
cellheight=apply(ss[,3:ncol(ss)],1,function(z)sum(z*guid_heights,na.rm=TRUE)/sum(z,na.rm=TRUE)) | |
## there are a few cells that lie over the ocean (errors in record location) --- almost all of these have only a single species, so we'll remove any cell with only one species (this also removes some cells over land) | |
celln=rowSums(ss[,3:ncol(ss)]>0) | |
cellheight[celln<2]=NA | |
## plot the result | |
(ggplot(data.frame(lon=ss$longitude,lat=ss$latitude,height=cellheight),aes(x=lon,y=lat))+geom_tile(mapping=aes(fill=height))+scale_fill_gradientn(limit=c(0,50),na.value="transparent",colours=c("#709959","#A3C77D","#D6ED9A","#F0E48D","#E6BA85","#C9957F","#D6AD94","#F5E1C4","#FAF1E3"))) | |
## Part 2: exploring the patterns | |
## pick some environmental layers. See ala_fields("layers") for a full list | |
env_layers=c("Precipitation - annual","Radiation - annual mean (Bio20)","Elevation ","Temperature - annual max mean","Wind speed - annual mean 9am or 3pm") | |
working_data=na.omit(xh) | |
## get the value of each of those environmental layers at each of our sample points | |
ep=intersect_points(working_data[,c("lat","lon")],env_layers) | |
working_data=na.omit(cbind(working_data,ep)) | |
## fit a random forest regression model | |
library(randomForest) | |
fit=randomForest(height~precipitationAnnual+radiationAnnualMeanBio20+elevation+temperatureAnnualMaxMean+windSpeedAnnualMean9amOr3pm,data=working_data,importance=TRUE) | |
## plot the partial effects | |
imp=importance(fit) | |
op=par(mfrow=c(3,2)) | |
for (i in sort(imp[,1],index.return=TRUE,decreasing=TRUE)$ix) { | |
thisvar=rownames(imp)[i] | |
partialPlot(fit,working_data,names(working_data)[names(working_data)==thisvar],xlab=sprintf("%s (importance %.1f)",thisvar,imp[i,1]),main="") | |
} | |
par(op) | |
## use the model to predict across Australia - first construct a lon/lat grid | |
grid_data=expand.grid(lon=seq(110,155,by=0.2),lat=seq(-45,-10,by=0.1)) | |
grid_data=cbind(grid_data,intersect_points(grid_data[,c("lat","lon")],env_layers)) ## look up environmental data | |
grid_data$height=predict(fit,newdata=grid_data) ## use model to predict height | |
(ggplot(grid_data,aes(x=lon,y=lat))+geom_tile(mapping=aes(fill=height))+scale_fill_gradientn(limit=c(0,50),na.value="transparent",colours=c("#709959","#A3C77D","#D6ED9A","#F0E48D","#E6BA85","#C9957F","#D6AD94","#F5E1C4","#FAF1E3"))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment