1. Prepare Data Layers

Thrush & Elevation Data

# Read in Thrush Presence/Absence Point Data
point.data <- read.csv(here::here("Data", "Fletcher_Fortin-2018-Supporting_Files", "data", "vath_2004.csv"), header=TRUE)
## Warning in readLines(f, n): line 1 appears to contain an embedded nul
## Warning in readLines(f, n): incomplete final line found on 'D:/Mac Documents
## 02-11-24/School/GIST/Spring
## 2022/697DR/spatial_data_in_r/._spatial_data_in_r.Rproj'
# Read in Elevation DEM
elev = raster(here::here("Data", "Fletcher_Fortin-2018-Supporting_Files", "data", "elev.gri"))

Calculate Terrain

Terrain from Raster takes an elevation layer (e.g., DEM) and returns raster layers that are calculated from elevation: including slope, aspect, topographic position index, terrain ruggedness index (TRI), roughness, and flow direction.

Stack from Raster will merge multiple raster layers together!

# 1. Create slope, aspect layers from elevation
elev.terr <- terrain(elev, opt=c("slope", "aspect"))
  
# 2. Prepare Layers for Extraction
layers        <- stack(elev, elev.terr)
names(layers) <- c("elev", "slope", "aspect")

# 3. Extract GIS data at sampling points
coords   <- cbind(point.data$EASTING, point.data$NORTHING)
land.cov <- extract(x=layers, y=coords)

# 4. Append Spatial Data to Thrush Dataset
point.data <- cbind(point.data,land.cov)

Inspect Data Layers

# 1. Subset Raster Brick to access Slope
slope_spdf = rasterToPoints(layers$slope)

# 2. Convert rasterToPoints to data.frame for GGPLOT
slope_df = data.frame(slope_spdf)

# 3. Convert Points to SF
point_sf = st_as_sf(point.data, coords = c("EASTING", "NORTHING"))

# 4. GGPLOT
ggplot() +
  geom_raster(data = slope_df, mapping = aes(x = x, y = y, fill = slope)) +
  geom_sf(data = point_sf)  +
  labs(title = "Thrush Presence/Absence and Slope", 
       fill = "Slope",
       color = "Thrush\nPresence/Absence",
       x = "EASTING", 
       y = "NORTHING") +
  scale_fill_gradientn(colours = wes_palette("IsleofDogs2", type = "continuous"))

2. Accounting for Autocorrelation in Models that Ignore Spatial Dependence

Logistic Regression Ignoring Spatial Dependence

Creating the Regression

# Check Correlations among Predictor Variables
cor(point.data[,c("elev","slope","aspect")], method="pearson")
##               elev      slope      aspect
## elev    1.00000000 0.15910701 -0.08920279
## slope   0.15910701 1.00000000  0.03533303
## aspect -0.08920279 0.03533303  1.00000000
pairs(point.data[,c("elev","slope","aspect")])

Scale from Raster allows you to scale and center rasters for modeling!

# Center and Scale Predictor Variables for Modeling
point.data$elevs   <- scale(point.data$elev,   center = T, scale = T)
point.data$slopes  <- scale(point.data$slope,  center = T, scale = T)
point.data$aspects <- scale(point.data$aspect, center = T, scale = T)

To specify a quadratic term in R, we write I(elev^2).

*This could also be accomplished through the poly()

# 1. Logistic Regression on Elevation
VATH.elev  <- glm(VATH ~ elev, family="binomial", data=point.data)

# 2. Logistic Regression with All Additive Factors
VATH.all   <- glm(VATH ~ elevs + slopes + aspects, family="binomial", data=point.data)

# 3. Logistic Regression with Elevation as a Quadratic (nonlinear) Effect
VATH.elev2 <- glm(VATH ~ elev + I(elev^2),family="binomial", data=point.data)

Regression Model Comparison with AIC

summary(VATH.elev)
## 
## Call:
## glm(formula = VATH ~ elev, family = "binomial", data = point.data)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  -1.0889     0.4140   -2.63  0.00853 **
## elev         -0.7219     0.3208   -2.25  0.02444 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 584.34  on 804  degrees of freedom
## Residual deviance: 579.10  on 803  degrees of freedom
## AIC: 583.1
## 
## Number of Fisher Scoring iterations: 5
summary(VATH.all)
## 
## Call:
## glm(formula = VATH ~ elevs + slopes + aspects, family = "binomial", 
##     data = point.data)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.04628    0.11269 -18.158   <2e-16 ***
## elevs       -0.25493    0.11606  -2.197    0.028 *  
## slopes      -0.08421    0.11172  -0.754    0.451    
## aspects     -0.13852    0.10973  -1.262    0.207    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 584.34  on 804  degrees of freedom
## Residual deviance: 576.83  on 801  degrees of freedom
## AIC: 584.83
## 
## Number of Fisher Scoring iterations: 5
summary(VATH.elev2)
## 
## Call:
## glm(formula = VATH ~ elev + I(elev^2), family = "binomial", data = point.data)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -7.984      1.990  -4.012 6.01e-05 ***
## elev          10.698      3.227   3.316 0.000915 ***
## I(elev^2)     -4.476      1.281  -3.494 0.000475 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 584.34  on 804  degrees of freedom
## Residual deviance: 560.54  on 802  degrees of freedom
## AIC: 566.54
## 
## Number of Fisher Scoring iterations: 6
# Contrast Models with AIC
AIC(VATH.elev,VATH.all,VATH.elev2)
##            df      AIC
## VATH.elev   2 583.1021
## VATH.all    4 584.8349
## VATH.elev2  3 566.5389
# Extract Coefficients and their SEs from the Best Model
glm.summary <- c(summary(VATH.elev2)$coef[2,1],
                 summary(VATH.elev2)$coef[2,2],
                 summary(VATH.elev2)$coef[3,1],
                 summary(VATH.elev2)$coef[3,2])

# Inspect
summary(VATH.elev2)$coef
##              Estimate Std. Error   z value     Pr(>|z|)
## (Intercept) -7.983676   1.989748 -4.012406 6.010296e-05
## elev        10.698244   3.226676  3.315562 9.145911e-04
## I(elev^2)   -4.476405   1.281045 -3.494340 4.752357e-04
glm.summary
## [1] 10.698244  3.226676 -4.476405  1.281045

VATH.elev2 has the lowest AIC, meaning that it is the best model fit!

Plot Predicted Variable Relationships

predict() from Raster makes a Raster object with predictions from a fitted model object (for example, obtained with lm, glm).

plogist does a random generation for the logistic distribution given a model

# Create a Template Dataset
Elev <- seq(min(point.data$elev), max(point.data$elev), length=15)
newdata <- data.frame(elev=Elev)

# Predict onto Template
glm.pred <- predict(VATH.elev2, newdata=newdata, type= "link", se=T)
ucl <- glm.pred$fit + 1.96*glm.pred$se.fit 
lcl <- glm.pred$fit - 1.96*glm.pred$se.fit

# Back-Transform from Link to Probability Scale
glm.newdata <- data.frame(newdata, 
                          pred=plogis(glm.pred$fit), 
                          lcl=plogis(lcl), # upper confidence level
                          ucl=plogis(ucl)) # lower confidence level

We multiply by 1.96 because 1.96 is the approximate value of the 97.5 percentile point of the standard normal distribution. 95% of the area under a normal curve lies within roughly 1.96 standard deviations of the mean, and due to the central limit theorem, this number is therefore used in the construction of approximate 95% confidence intervals.

# 1. Plot Elevation vs. Probability of Thrush Occurrence 
par(mfrow = c(1,2), oma=c(0,0,0,4))
plot(glm.newdata$elev, glm.newdata$pred, ylim=c(0,0.5), xlab="Elevation", ylab="Prob. Occurrence")
lines(glm.newdata$elev, glm.newdata$lcl)
lines(glm.newdata$elev, glm.newdata$ucl)

# 2. Map the model
glm.raster <- predict(model=VATH.elev2, object=layers,  type="response")
plot(glm.raster, xlab = "Longitude", ylab = "Latitude")

Inspect spatial dependence of response variable

# 1. Create Indicator Correlogram Custom Function
icorrelogram <- function(locations,z, binsize, maxdist){
  distbin <- seq(0,maxdist,by=binsize)
  Nbin <- length(distbin)-1
  moran.results <- data.frame("dist"= rep(NA,Nbin), "Morans.i"=NA,"null.lcl"=NA, "null.ucl"=NA)
  
  for (i in 1:Nbin){
    d.start<-distbin[i] 
    d.end<-distbin[i+1]
    neigh <- dnearneigh(x=locations, d1=d.start, d.end, longlat=F) # Identifies neighbors between points for different distance classes
    wts <- nb2listw(neighbours=neigh, style='B', zero.policy=T)    # Creates weights based off of neighbors
    mor.i <- moran.mc(x=z, listw=wts, nsim=200, alternative="greater", zero.policy=T)  
    # Note alternative is for P-value, so only 'significant if positive autocorrelation
    
    moran.results[i, "dist"]<-(d.end+d.start)/2 
    moran.results[i, "Morans.i"]<-mor.i$statistic                                            # Observed Moran's I
    moran.results[i, "null.lcl"]<-quantile(mor.i$res, probs = 0.025,na.rm = T) # 95% null envelope  
    moran.results[i, "null.ucl"]<-quantile(mor.i$res, probs = 0.975,na.rm = T) # 95% null envelope
  }
  return(moran.results)
}
# 2. Run Indicator Correlogram Custom Function
VATH.cor <- icorrelogram(locations=coords,     # contains EASTING and NORTHING (X/Y) of the point data
                         z=point.data$VATH,    # response variable?
                         binsize=1000, maxdist=15000)

# 3. Inspect Indicator Correlogram
round(head(VATH.cor,3),2)
##   dist Morans.i null.lcl null.ucl
## 1  500     0.34    -0.04     0.06
## 2 1500     0.10    -0.03     0.04
## 3 2500     0.01    -0.02     0.02
# 4. Plot Indicator Correlogram
plot(VATH.cor$dist, VATH.cor$Morans.i, ylim = c(-0.5, 0.5), main = "VATH Correlogram")
abline(h=0, lty = "dashed")
lines(VATH.cor$dist, VATH.cor$null.lcl, col = 2)
lines(VATH.cor$dist, VATH.cor$null.ucl, col = 2)

Inspect Spatial Dependence of GLM Residuals

# 1. Extract Residuals from Quadratic Elevation Model
VATH.elev2.res <- residuals(VATH.elev2, type="deviance")

# 2. Create Indicator Correlogram on Residuals of the Quadratic Elevation Model
corr.res <- icorrelogram(locations=coords, z=VATH.elev2.res, binsize=1000, maxdist=15000)

# 3. Plot Residual Indicator Correlogram
plot(corr.res$dist, corr.res$Morans.i, ylim = c(-0.5, 0.5), main = "VATH Residual Correlogram")
abline(h=0, lty = "dashed")
lines(corr.res$dist, corr.res$null.lcl)
lines(corr.res$dist, corr.res$null.ucl)

# 4, Contrast Results from Raw to Residuals with Mean
VATH.int <- glm(VATH ~ 1,family="binomial", data=point.data)
VATH.int.res <- residuals(VATH.int, type="deviance")

Note that rather than using correlograms, we could have used semivariograms on the residuals to interpret spatial autocorrelation in the residuals.

If we fit an intercept-only (mean) model and contrast correlograms from the raw data and the residuals of the mean model, we find that the Moran’s I is identical (r = 1). This illustrates the equivalence of considering residuals from regression models in correlograms when no predictors are considered to that of the raw data.

# 5. Create correlogram on residuals of mean model (intercept model)
corr.int.res <- icorrelogram(locations=coords, z=VATH.int.res, binsize=1000, maxdist=15000)

# 6. Determine Correlation
cor(VATH.cor$Morans.i, corr.int.res$Morans.i)
## [1] 1

Subset Data to Account for Autocorrelation

Because of the spatial dependence in the residuals, we consider either subsetting the data based on the approximate range of spatial autocorrelation or regression-like models that attempt to account for spatial autocorrelation.

# 1. Randomly Shuffle Data by Transect & Create a Shuffled Rank Vector
# Note: This essentially picks one random point from each transect
rand.vector <- with(
  point.data,
  ave(
    POINT,
    as.factor(TRANSECT),
    FUN=function(x) {sample(length(x))}))

# 2. Pick One Random Point on Transect and Remove the Rest
point.datasub <- point.data[rand.vector<=1,]

# 3. Subset Coordinates
coords.sub <- cbind(point.datasub$NORTHING, point.datasub$NORTHING)
# 4. Refit the Logistic Regression Model with New Subset
VATH.sub <- glm(VATH~elev+I(elev^2), family="binomial", data=point.datasub)

# 5. Extract Model Coefficients
glmsub.summary<-c(summary(VATH.sub)$coef[2,1],
                  summary(VATH.sub)$coef[2,2],
                  summary(VATH.sub)$coef[3,1],
                  summary(VATH.sub)$coef[3,2])
glmsub.summary
## [1]  6.951529  7.606159 -3.366618  3.115780

We lost a lot of samples, so SE went up and statistical support for elevation effects was lost!

Depending on the type of model you have, the type of residuals you want to calculate may vary! The deviance residuals are potentially more useful in GLMs in comparison to others because they are directly related to the overall deviance (and likelihood) of the model, where the sum of the deviance residuals equals the deviance of the model (-2log-likelihood).

# 1. Extract Model Residuals
VATH.sub.res <- residuals(VATH.sub, type="deviance")

# 2. Create Indicator Correlogram on Residuals of the New Subset Model
# Note that for this subset we need to use a larger lag distance than 1-km because we no longer have data points <1 km 
# Alternatively, one could just increase the first few bin sizes
corr.sub.res <- icorrelogram(locations=coords.sub, z=VATH.sub.res, binsize=2000, maxdist=15000)

# 3. Plot Residual Indicator Correlogram
plot(corr.sub.res$dist, corr.sub.res$Morans.i, ylim = c(-0.5, 0.5), main = "Subsitute Residual Correlogram")
abline(h=0, lty = "dashed")
lines(corr.sub.res$dist, corr.sub.res$null.lcl)
lines(corr.sub.res$dist, corr.sub.res$null.ucl)

3. Trend Surface Models

Trend surface analysis is the most widely used global surface-fitting procedure. The mapped data are approximated by a polynomial expansion of the geographic coordinates of the control points, and the coefficients of the polynomial function are found by the method of least squares. Each original observation is considered to be the sum of a deterministic polynomial function of the geographic coordinates plus a random error.

Polynomial Trend Surface Model

To do a Polynomial Trend Surface Model, you want to add the linear, quadratic, and cubic terms–which you can do manually or with poly(). poly(EASTING,3) and poly(NORTHING,3) would add the linear, quadratic and cubic terms instead of adding them all yourself. Note that the poly function also standardizes polynomials to be orthogonal, removing the correlation between terms (which in many situations would be preferred).

# 1. Create a Polynomial Trend Surface Model
VATH.trend <- glm(VATH~elev+I(elev^2)+EASTING+NORTHING+I(EASTING^2)+
                    I(EASTING^3)+I(NORTHING^2)+I(NORTHING^3), family="binomial", data=point.data)
# Note: This is equivalent to
VATH.trend <- glm(VATH~elev+I(elev^2)+poly(EASTING,3)+poly(NORTHING,3), family="binomial", data=point.data)

# 2. Extract Model Coefficients
trend.summary <- c(summary(VATH.trend)$coef[2,1],
                   summary(VATH.trend)$coef[2,2],
                   summary(VATH.trend)$coef[3,1],
                   summary(VATH.trend)$coef[3,2])

# 3. Extract Model Residuals
VATH.trend.res <- residuals(VATH.trend, type="deviance")

# 4. Create Indicator Correlogram on Residuals of the Polynomial Trend Surface Model
cor.trend.res <- icorrelogram(locations=coords, z=VATH.trend.res, binsize=1000, maxdist=15000)

# 5. Plot Residual Indicator Correlogram
plot(cor.trend.res$dist, cor.trend.res$Morans.i, ylim = c(-0.5, 0.5), main = "Polynomial Trend Surface Residual Correlogram")
abline(h=0, lty = "dashed")
lines(cor.trend.res$dist, cor.trend.res$null.lcl)
lines(cor.trend.res$dist, cor.trend.res$null.ucl)

GAM Trend Surface Model

The Polynomial Trend Surface Model is limited in the spatial variation in can capture. An alternative to this model is to consider a generalized additive model (GAM), where we allow spline functions to capture spatial variation. The mgcv package provides a means to automate the selection of spline variation through the use of generalized cross-validation procedures

Splines are considered for both Easting (x) and Northing ( y) coordinates with the s() command!

*This syntax defaults to automated selection of the number of knots being considered

# 1. Generate GAM Trend Surface Model
VATH.gam <- gam(VATH~elev+I(elev^2)+s(EASTING,NORTHING), family="binomial", data=point.data)

# 2. Inspect the Model
summary(VATH.gam)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## VATH ~ elev + I(elev^2) + s(EASTING, NORTHING)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -26.794      5.857  -4.575 4.77e-06 ***
## elev           9.364      4.811   1.947   0.0516 .  
## I(elev^2)     -2.752      1.898  -1.449   0.1472    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                       edf Ref.df Chi.sq  p-value    
## s(EASTING,NORTHING) 28.68  28.96  73.63 8.76e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.222   Deviance explained = 30.9%
## UBRE = -0.41984  Scale est. = 1         n = 805
# 3. Extract Model Coefficients
gam.summary <- c(summary(VATH.gam)$p.coeff[2],
                 summary(VATH.gam)$se[2],
                 summary(VATH.gam)$p.coeff[3],
                 summary(VATH.gam)$se[3])

# 4. Extract Model Residuals
VATH.gam.res <- residuals(VATH.gam, type="deviance")

# 5. Create Indicator Correlogram on Residuals of the GAM Trend Surface Model
cor.gam.res <- icorrelogram(locations=coords, z=VATH.gam.res, binsize=1000, maxdist=15000)

# 6. Plot Residual Indicator Correlogram
plot(cor.gam.res$dist, cor.gam.res$Morans.i, ylim = c(-0.5, 0.5), main = "GAM Residual Correlogram")
abline(h=0, lty = "dashed")
lines(cor.gam.res$dist, cor.gam.res$null.lcl)
lines(cor.gam.res$dist, cor.gam.res$null.ucl)

4. Eigenvector Mapping

Moran Eigenvector (ME) is intended to remove spatial autocorrelation from the residuals of generalised linear models.

Eigenvector Creation

We want to calculate a neighborhood weights matrix by using the maximum distance needed for a minimum spanning tree—the minimum set of connections needed to fully connect points across the landscape. The distance needed for a minimum spanning tree can be determined with the vegan package using the spantree function. Note that this distance could also be determined using the pcnm function and finding the threshold

# 1. Determine Threshold Distance Based on Minimum Spanning Tree
spantree.em <- spantree(dist(coords), toolong = 0)
max(spantree.em$dist)
## [1] 41351.09
# 2. Create a Neighbhorhood Matrix
dnn<- dnearneigh(coords, 0, max(spantree.em$dist))                   # List of links to neighboring points within distance range
dnn_dists <- nbdists(dnn, coords)                                    # List of distances (based on links in dnn)
dnn_sims <- lapply(dnn_dists, function(x) (1-((x/4)^2)))             # Scale distances as recommended by Dormann et al. (2007) 
ME.weight <- nb2listw(dnn, glist=dnn_sims, style="B", zero.policy=T) # Create spatial weights matrix (in list form)

# 3. Determine Eigenvector Model Selection
VATH.ME <- spatialreg::ME(VATH~elev+I(elev^2), listw=ME.weight, family="binomial", data=point.data)

Selected Eigenvector Summaries

# 1. Inspect Eigenvector Model
summary(VATH.ME)
##           Length Class  Mode   
## selection   12   -none- numeric
## vectors   2415   -none- numeric
# 2. View Selected Eigenvectors
head(VATH.ME$selection)
##   Eigenvector ZI pr(ZI)
## 0          NA NA   0.01
## 1         796 NA   0.01
## 2         804 NA   0.02
## 3         805 NA   0.14
head(fitted(VATH.ME),2)
##            vec796       vec804     vec805
## [1,] -0.003042641 -0.008629250 0.01187249
## [2,] -0.003088077 -0.008737222 0.01196633
head(VATH.ME$vectors)
##            vec796       vec804     vec805
## [1,] -0.003042641 -0.008629250 0.01187249
## [2,] -0.003088077 -0.008737222 0.01196633
## [3,] -0.002422438 -0.007061317 0.01058924
## [4,] -0.002220150 -0.006558714 0.01018355
## [5,] -0.002271660 -0.006875627 0.01031748
## [6,] -0.002232756 -0.006779663 0.01023578

Eigenvector Modeling

# 1. Create New GLM with Eigenvector Adjusted Covariates
VATH.evm <- glm(VATH~elev+I(elev^2)+fitted(VATH.ME), family="binomial", data=point.data)

# 2. Inspect Eigenvector GLM Model
summary(VATH.evm)
## 
## Call:
## glm(formula = VATH ~ elev + I(elev^2) + fitted(VATH.ME), family = "binomial", 
##     data = point.data)
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             -8.401      1.948  -4.312 1.62e-05 ***
## elev                     8.776      3.029   2.898  0.00376 ** 
## I(elev^2)               -3.112      1.168  -2.664  0.00773 ** 
## fitted(VATH.ME)vec796   14.742      3.198   4.610 4.03e-06 ***
## fitted(VATH.ME)vec804   -8.644      3.242  -2.666  0.00767 ** 
## fitted(VATH.ME)vec805   38.110      8.789   4.336 1.45e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 584.34  on 804  degrees of freedom
## Residual deviance: 499.73  on 799  degrees of freedom
## AIC: 511.73
## 
## Number of Fisher Scoring iterations: 7
# 3. Extract coefficients
evm.summary <- c(summary(VATH.evm)$coef[2,1],
                 summary(VATH.evm)$coef[2,2],
                 summary(VATH.evm)$coef[3,1],
                 summary(VATH.evm)$coef[3,2])
evm.summary
## [1]  8.776152  3.028835 -3.112276  1.168448
# 4. Extract Model Residuals
VATH.evm.res <- residuals(VATH.evm, type="deviance")

# 5. Create Indicator Correlogram on Residuals of the Eigenvector GLM Model
cor.evm.res <- icorrelogram(locations=coords, z=VATH.evm.res, binsize=1000, maxdist=15000)

# 6. Plot Residual Indicator Correlogram
plot(cor.evm.res$dist, cor.evm.res$Morans.i, ylim = c(-0.5, 0.5), main = "Eigenvector Residual Correlogram")
abline(h=0, lty = "dashed")
lines(cor.evm.res$dist, cor.evm.res$null.lcl)
lines(cor.evm.res$dist, cor.evm.res$null.ucl)

# 7. Plot Eigenvectors & Predicted Map
plot(glm.raster, xlab = "Longitude", ylab = "Latitude")
points(point.data[,c("EASTING","NORTHING")], pch=21, col="black", cex=2,lwd = 0.5,
       bg=topo.colors(6)[cut(fitted(VATH.ME)[,1],breaks = 6)])

5. Autocovariate Logistic Regression

To fit autocovariate models, we calculate new autocovariates and then use these covariates in a standard logistic regression model The type provides the weighting scheme.

The style describes how the covariate will be calculated

# 1. Create Alternative Autocovariates with 1km radius
auto1km_inv <- autocov_dist(point.data$VATH, coords, nbs = 1000, type= "inverse",       # Inverse of distance weights
                            zero.policy=T)    
## Warning in autocov_dist(point.data$VATH, coords, nbs = 1000, type = "inverse",
## : With value 1000 some points have no neighbours
auto1km     <- autocov_dist(point.data$VATH, coords, nbs = 1000, type= "one",style="B", # Binary weights
                            zero.policy=T)  
## Warning in autocov_dist(point.data$VATH, coords, nbs = 1000, type = "one", :
## With value 1000 some points have no neighbours
# 2. Contrast Autocovariates with 1km radius
cor(auto1km, auto1km_inv)
## [1] 0.9474509
# 3. Plot Autocovariates
par(mfrow = c(1,2))
plot(auto1km, auto1km_inv)
plot(jitter(auto1km, factor=0.4), auto1km_inv) # x-axis jittered to better see points

# 4. Create an Autocovariate Model
VATH.auto1km <- glm(VATH~elev+I(elev^2)+auto1km, family="binomial", data=point.data)

# 5. Inspect Autocovariate Model
summary(VATH.auto1km)
## 
## Call:
## glm(formula = VATH ~ elev + I(elev^2) + auto1km, family = "binomial", 
##     data = point.data)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -6.6518     1.9660  -3.383 0.000716 ***
## elev          6.9046     3.1222   2.211 0.027006 *  
## I(elev^2)    -2.8061     1.2106  -2.318 0.020450 *  
## auto1km       0.8665     0.1008   8.596  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 584.34  on 804  degrees of freedom
## Residual deviance: 470.99  on 801  degrees of freedom
## AIC: 478.99
## 
## Number of Fisher Scoring iterations: 6
# 6. Extract Model Coefficients
auto.summary <- c(summary(VATH.auto1km)$coef[2,1],
                  summary(VATH.auto1km)$coef[2,2],
                  summary(VATH.auto1km)$coef[3,1],
                  summary(VATH.auto1km)$coef[3,2])

# 7. Extract Model Residuals
VATH.auto.res <- residuals(VATH.auto1km, type="deviance")

# 8. Create Indicator Correlogram on Residuals of the Autocovariate Model
cor.auto.res <- icorrelogram(locations=coords, z=VATH.auto.res, binsize=1000, maxdist=15000)

# 9. Plot Residual Indicator Correlogram
plot(cor.auto.res$dist, cor.auto.res$Morans.i, ylim = c(-0.5, 0.5), main = "Autocovariate Logistic Regressive Model Residual Correlogram")
abline(h=0, lty = "dashed")
lines(cor.auto.res$dist, cor.auto.res$null.lcl)
lines(cor.auto.res$dist, cor.auto.res$null.ucl)

6. Multi-level Model

A simple multilevel model can also be fit to these data by considering transects as a random effect in the regression model. In doing so, we effectively “block” with transects, treating points within transects has having potential spatial dependence. Because this structure is not spatially explicit, we effectively assume that dependence is constant within transects (e.g., neighboring points have the same dependence as points located along the ends of the transects).

# 1. Ensure Random Effects are Factors
point.data$TRANSECT <- as.factor(point.data$TRANSECT)

# 2. Create a GLMM
VATH.glmm <- glmer(VATH~elev+I(elev^2)+(1|TRANSECT), family="binomial", data=point.data)

# 3. Inspect the GLMM
summary(VATH.glmm)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: VATH ~ elev + I(elev^2) + (1 | TRANSECT)
##    Data: point.data
## 
##      AIC      BIC   logLik deviance df.resid 
##    498.4    517.2   -245.2    490.4      801 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3520 -0.1755 -0.1541 -0.1129  5.7688 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  TRANSECT (Intercept) 4.456    2.111   
## Number of obs: 805, groups:  TRANSECT, 167
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   -8.470      3.258  -2.600  0.00934 **
## elev           9.459      5.149   1.837  0.06621 . 
## I(elev^2)     -4.043      1.977  -2.045  0.04083 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr) elev  
## elev      -0.981       
## I(elev^2)  0.946 -0.988
# 4. Extract Model Coefficients
glmm.summary <- c(summary(VATH.glmm)$coef[2,1],
                  summary(VATH.glmm)$coef[2,2],
                  summary(VATH.glmm)$coef[3,1],
                  summary(VATH.glmm)$coef[3,2])
glmm.summary
## [1]  9.458996  5.149122 -4.042973  1.976795
# 5. Extract Model Residuals
VATH.glmm.res <- resid(VATH.glmm)

# 6. Create Indicator Correlogram on Residuals of the GLMM
cor.glmm.res <- icorrelogram(locations=coords, z=VATH.glmm.res, binsize=1000, maxdist=15000)

# 7. Plot Residual Indicator Correlogram
plot(cor.glmm.res$dist, cor.glmm.res$Morans.i, ylim = c(-0.5, 0.5), main = "Multilevel Model Residual Correlogram")
abline(h=0, lty = "dashed")
lines(cor.glmm.res$dist, cor.glmm.res$null.lcl)
lines(cor.glmm.res$dist, cor.glmm.res$null.ucl)

7. Model Selection & RMSE

rmse = function(fit) return (sqrt(mean(residuals(fit)^2)))
fits_rmse = data.frame(
  model = c("Logistic Regression Ignoring Spatial Dependence", 
            "Substituted Logistic Regression Ignoring Spatial Dependence", 
            "Polynomial Trend Surface Model",
            "GAM Trend Surface Model",
            "Eigenvector Model",
            "Autocovariate Logistic Regression",
            "Multi-level Model"),
  rmse = c(
    rmse(VATH.elev2),
    rmse(VATH.sub),
    rmse(VATH.trend),
    rmse(VATH.gam),
    rmse(VATH.evm),
    rmse(VATH.auto1km),
    rmse(VATH.glmm)
  ))
fits_rmse
##                                                         model      rmse
## 1             Logistic Regression Ignoring Spatial Dependence 0.8344589
## 2 Substituted Logistic Regression Ignoring Spatial Dependence 0.7789392
## 3                              Polynomial Trend Surface Model 0.7777082
## 4                                     GAM Trend Surface Model 0.7081294
## 5                                           Eigenvector Model 0.7879001
## 6                           Autocovariate Logistic Regression 0.7649045
## 7                                           Multi-level Model 0.6214516

The lower the RMSE, the better a given model is able to fit a dataset–meaning that the Multi-level and GAM Trend Surface Model are the best models to choose from. The GAM Trend Surface Model did not successfully remove spatial dependence in the residuals while the Multi-level model did. The Multi-level model is the obvious choice, sporting both a low RMSE value and a removal of spatial dependence in the residuals.

Autocovariate and multilevel models did remove spatial dependence in the residuals by appropriately capturing the spatial scale of dependence in the data.