# Kristian Skrede Gleditsch 29 October 2007 # ksg@essex.ac.uk # # This file contains code to replicate all the examples with R code # apparing in the book An Introduction to Spatial Regression # Models in the Social Sciences by Michael D. Ward # and Kristian Skrede Gleditsch # # # This code is "enhanced" and does not always match exactly what # is given in the book In particular, some comments that do not # appear in the book have been added to provide additional # explanations, and some formatting has been changed # # The codeassumes that the relevant input files are in # the working directory. # # Last revised on 11 September 2008 # Set a working directory # Use Unix-style / or \\ for directories in R dd <- c("C:\\work\\spatialbook\\") setwd(dd) ### Replicates code reproduced in Chapter 1 # Ex 1: Plotting map with centroids and capitals # Load required libraries # These can be downloaded from the packages menu within R under Windows library(RColorBrewer) library(maptools) library(spdep) library(sp) library(rgdal) # Read a Robinson projection map from an ESRI shapefile rob.shp <- read.shape("wg2002worldmap.shp") # Indicate the id codes for each polygon/country rob.map <- Map2poly(rob.shp,region.id = unique(as.character(rob.shp$att.data$FIPS_CNTRY))) # Indicate the map projection tr <- readShapePoly("wg2002worldmap", IDvar="FIPS_CNTRY", proj4string=CRS("+proj=robin +lon 0=0")) # Extract the relevant variables and exclude missing data ct <- na.omit(rob.shp$att.data[,c(1, 18:20)]) # Assign relevant variable/column names colnames(ct) <- c("ID","x", "y", "City_POP") ct$x <- as.numeric(as.character(ct$x)) ct$y <- as.numeric(as.character(ct$y)) # Add coordinates coordinates(ct) <- c("x", "y") proj4string(ct) <- CRS("+proj=longlat +datum=WGS84") # Tranform the coordinates to the robinson projection ct_rb <- spTransform(ct, CRS=CRS("+proj=robin +lon0=0")) # Replot the map itself without a bounding box plot(rob.map, border="Grey", forcefill=T, xaxt="n", yaxt="n", bty="n", lwd=.000000000125, las=1, ylab="", main="Centroids and Capitals", xlab="") # Add the centroids points(coordinates(tr), pch=19,cex=.5, col="grey") # Add the capitals points(coordinates(ct_rb), pch=19, cex=.5, col="black") # Add segments between centroids and capitals tr_or <- coordinates(tr) rownames(tr_or) <- as.character((attributes(tr)$data)$FIPS_CNTRY) ct_rb_or <- coordinates(ct_rb) rownames(ct_rb_or) <- as.character(ct_rb$ID) # Delete Kiribati (91), as longitude extrends across international day line coor_dif <- cbind(tr_or[-91,], ct_rb_or[rownames(tr_or),][-91,]) x1 <- coor_dif[,1] x2 <- coor_dif[,3] y1 <- coor_dif[,2] y2 <- coor_dif[,4] segments(x1, y1, x2, y2,col="slategray4") # Ex 2. Spatial correlation test for democracy and development example # Load data # # This archive contains four objects: # # sldv contains the data on democracy, gdp per capita, and population # mdd2 contains the minimum distance data # nblist is a list of connectivities based on a 200 km threshold # (see below for code to generate this) # wmat is a row normalized connectivty matrix based on a 200 km threshold # (see below for code to generate this) source("chapter1data.R") # Estimate OLS ols1.fit <- glm(democracy ~ log(gdp.2002/population), data=sldv) # Load spdep library for moran.test() library(spdep) # Moran's I test under randomization moran.test(resid(ols1.fit),nb2listw(nblist)) # Moran's I test for residuals from a regression model # based on weighted residuals lm.morantest(ols1.fit,nb2listw(nblist)) # Ex. 3 Create a plot of standardized democracy against its spatial lag # with regression line and rug added # Print to a PDF file # pdffilename <- c("file name and path") # pdf(file=pdffilename, width = 5.0, height = 5.0,family="Times") # Extract residuals from OLS regresion dem <- (resid(ols1.fit)) # Create a standardized democracy variable ds <- (dem-mean(dem))/sqrt(var(dem)) # Create a spatial lag ds.slag <- as.vector(wmat%*%ds) # Standardize spatial lag ds.slag <- (ds.slag-mean(ds.slag))/sqrt(var(ds.slag)) # Plot democracy against its spatial lag plot(ds, ds.slag, xlim=c(-3,3), ylim=c(-3,3), pch=20, las=1, xlab="standardized democracy", ylab="spatial lag of standardized democracy", bty="n") # Rehression of spatial lag of democracy on democracy - slope is the Moran's I reg1 <- lm(ds.slag~ds) # Establish a grid for a confidence interval xgrid <- seq(-3,1.5,length.out=158) x0 <- list(ds=xgrid) pred.out <- predict(reg1, x0, interval="confidence") # Put 1 and 2 sd boxes on the plot lines( c(-2,-2,+2,+2,-2), c(-2,+2,+2,-2,-2) ) lines( c(-1,-1,+1,+1,-1), c(-1,+1,+1,-1,-1) ) lines( c(-2,+2), c(0,0) ) lines( c(0,0), c(-2,+2) ) # Add some text for context text(-2.5,3,"(low,high)"); text(2.5,3,"(high,high)") text(-2.5,-3,"(low,low)"); text(2.5,-3,"(high,low)") polygon(x=c(-1,0,0,-1), y=c(-1,-1,0,0), col = "slategray3") polygon(x=c(0,1,1,0), y=c(0,0,1,1), col = "slategray3") # Plot the c.i. region polygon(x=c(xgrid, rev(xgrid)), y=c(pred.out[,3], rev(pred.out[,2])), col="slategray3", border=T) # Put the data on the plot points(ds, ds.slag, pch=20) # Add densities sldensity <- density(ds.slag) lines(sldensity$y+2, sldensity$x, lty=2, col="slategray4") ddensity <- density(ds) lines(ddensity$x, ddensity$y+2, lty=2, col="slategray4",xlim=c(-2,2)) # Add regression line to the plot lines(xgrid, pred.out[,1], type="l", lty=2, col="gray80", lwd=2) # Add rugs for the data densities on the two sides rug(jitter(ds, factor=2), col="slategray3") rug(ds.slag, side=2, col="slategray3") # Label a group of points explicitly text(-2., -2.3, "Oil Exporters", col="slategray4") # Turn off the device, i.e., close PDF file # dev.off() # P. 33 # sldv is the data.frame # mdd2 is the minimum distance data.frame nblist <- vector(mode="list",length=dim(sldv)[1]) attr(nblist,"region.id") <- sldv$tla attr(nblist,"class") <- "nb" nbnms <- data.frame(sldv$tla,c(1:dim(sldv)[1])) names(nbnms) <-c ("acr","nm") min200 <- mdd2[mdd2$mindist<=200,] # Create a index of the isolates nodata <- setdiff(sldv$tla,unique(c(min200$ida,min200$idb))) # Find neighbors for each row in the sldv for(i in 1:dim(sldv)[1]){ temp <- min200[min200$ida==sldv$tla[i] | min200$idb==sldv$tla[i],] cty <- unique(c(temp$ida,temp$idb)) cty <- setdiff(cty,sldv$tla[i]) nblist[[i]] <- nbnms[match(cty,nbnms$acr),"nm"] } # wmat is the row standardized weights matrix wmat <- matrix(0,ncol=dim(sldv)[1],nrow=dim(sldv)[1]) rownames(wmat) <- colnames(wmat) <- sldv$tla for (i in 1:dim(min200)[1]){ wmat[min200$ida[i],min200$idb[i]] <- 1 } wmat<-wmat/rowSums(wmat) # calculate the spatial lag of democracy democracy.spatial.lag <- as.vector(wmat%*%sldv$democracy) #### Chapter 2 source("chapter2adata.R") # P. 47 sldv.fit <- lagsarlm(democracy ~ log(gdp.2002/population), data=sldv, nb2listw(nblist), method="eigen", quiet=FALSE) summary(sldv.fit) moran.test(resid(sldv.fit),nb2listw(nblist)) # P. 50-52 # Code to calculate equilibrium effect of changes in GDP per capita # Create vector to store the estimate for each states ee.est <- rep(NA,dim(sldv)[1]) # Assign the country name labels names(ee.est) <- sldv$tla # Create a null vector to use in loop svec <- rep(0,dim(sldv)[1]) # Create a N x N identity matrix eye <- matrix(0,nrow=dim(sldv)[1],ncol=dim(sldv)[1]) diag(eye) <- 1 # Loop over 1:n states and store effect of change in # each state i in ee.est[i] for(i in 1:length(ee.est)){ cvec <- svec cvec[i] <- 1 res <- solve(eye - 0.56315 * wmat) %*% cvec * 0.99877 ee.est[i] <- res[i] } # Russia example of impact on other states (observation 120) cvec <- rep(0,dim(sldv)[1]) cvec[120] <- 1 # Store estimates for impact of change in Russia in rus.est eye <- matrix(0,nrow=dim(sldv)[1],ncol=dim(sldv)[1]) diag(eye) <- 1 rus.est <- solve(eye - 0.56315 * wmat) %*% cvec*0.99877 # Find ten highest values of rus.est vector rus.est <- round(rus.est,3) rus.est <- data.frame(sldv$tla,rus.est) rus.est[rev(order(rus.est$rus.est)),][1:10,] # P. 53 # Impact of change in $y$ to 10 in China R code # China is observation 32 cvec <- rep(0,dim(sldv)[1]) cvec[32] <- 10 # Store estimates of change in China in chn.est chn.est <- c(cbind (0, 0, wmat%*%cvec) %*% c(summary(sldv.fit)$Coef[,1],summary(sldv.fit)$rho)) chn.est <- round(chn.est,3) # Find all states where non-zero impact chn.est <- data.frame(sldv$tla,chn.est) # P. 56-7 shin <- read.csv("italyturnout.csv",sep=",",header=T) tr <- readShapePoly("turnout", IDvar="FID_1", proj4string=CRS("+proj=robin +lon 0=0")) dnn50km <- dnearneigh(coordinates(tr), 0, 50000) summary(dnn50km) # wmat is the weights matrix wmat <- nb2mat(dnn50km,style="W") sldv.fit <- lagsarlm(turnout ~ log(gdpcap), data=shin, nb2listw(dnn50km), method="eigen", quiet=FALSE) summary(sldv.fit) # p. 58-9 # Extract estimated rho rho <- coef(sldv.fit)[3] # Extract estimated beta beta <- coef(sldv.fit)[1:2] # Create a X matrix X <- cbind(1,log(shin$gdpcap)) # Create an alternative X matrix, changing value for # Reggio Calabria-Sbarre (obs 432) Xs <- X Xs[432]<- log(35) # Create an identity matrix I <- diag(length(shin$gdpcap)) # Find equilibrium effect by looking at # the difference in expected value for the # the two X matrices Ey <- solve(I - rho*wmat)%*%(X%*%beta) EyS <- solve(I - rho*wmat)%*%(Xs%*%beta) dif <- EyS-Ey # P. 61 library(foreign) library(maptools) library(network) library(spdep) library(sp) library(rgdal) setwd(dd) # read in 2004 presidential votes presvote <- read.table("2004presvote.csv",sep=",",header=T) # read in shape files for 48 US States plus District of Columbia # will create a MAP OBJECT usa.shp <- read.shape("48_states.shp") # use equal area projection (Robinson) usaall <- merge(usa.shp$att.data, presvote, by.x = "STATE_NAME", by.y = "State", sort = F) # Create a distance matrix from original polygon shape file tr <- readShapePoly("48_states.shp", IDvar="ObjectID", proj4string=CRS("+proj=robin +lon 0=0")) centroids <- coordinates(tr) # Create Polygons in a SPATIAL OBJECT us48polys <- Map2poly(usa.shp, region.id = as.character(usa.shp$att.data$STATE_NAME)) # Create neighbors, list, and matrix objects from polygon centroids us48.nb <- poly2nb(us48polys, row.names = as.character(usa.shp$att.data$STATE_NAME)) us48.listw <- nb2listw(us48.nb, style = "B") us48.mat <- (nb2mat(us48.nb, style="B")) # plots the network among the centroids colnames(us48.mat) <- rownames(us48.mat) <- usa.shp$att.dat$STATE_ABBR usa <- network(us48.mat,directed=F) set.seed(123) # plot network first; then add state boundaries plot.network(usa, displayisolates=T, displaylabels=F, boxed.labels=F, coord=centroids, label.col="gray20", usearrows=F, edge.col=rep("gray60",190), vertex.col="gray30", edge.lty=1) plot(us48polys,bty="n", border="slategray3" ,forcefill =TRUE,xaxt="n",yaxt="n", lwd=.000000000125,las=1,ylab="",xlab="",add=T) # P. 62 library(RColorBrewer) # now plot the Bush:Kerry vote ratio bk <- usaall$Bush/usaall$Kerry # set up five categories and assign colors breaks <-round(quantile((bk), seq(0,1,1/5), na.rm=TRUE),1) # cols <- brewer.pal(length(breaks), "Greys") cols <- brewer.pal(length(breaks), "Greys") # use findInterval to color states by bk variable ### KSG again, this will need to be filled out plot(us48polys, bty="n", border="slategray3", forcefill=TRUE, xaxt="n", yaxt="n",lwd=.000000000125,las=1,ylab="",xlab="") plot(us48polys, bty="n",col=cols[findInterval(bk, breaks, all.inside=T)], forcefill=T, add=T) # P. 72 # data and variables as employed in chapter 2. sem.fit <- errorsarlm(democracy ~ log(gdp.2002/population), data=sldv, nb2listw(nblist), method="eigen", quiet=FALSE) summary(sem.fit) logLik(sem.fit) # P. 78 source("chapter3data.R") tab3.sem <- errorsarlm(logtrade ~ logdem + logapop + logbpop + logargdppc + logbrgdppc + logs + logdist + logmid, data=logdat98,na.action=na.omit, nb2listw(dlist,style="W"), method="eigen") summary(tab3.sem) logLik(tab3.sem)