############################################################################################################################################# ## Geographically Weighted Path Analysis (GWPath) ## ## Author: Elisa B. Pereira (elisabpereira@gmail.com) ## ## Arguments ## - data_spatial ## An object of the classes SpatialPolygonsDataFrame, RasterStack or data.frame containing the variables. If the object is ## a data.frame if should contain long-lat coordinates ## ## - regressions ## A list containing all the regression formulas to be used in the path model ## e.g. reg1 <- A ~ B + C + D ## reg2 <- C ~ E + D ## reg3 <- E ~ B ## regression <- c(reg1, reg2, reg3) ## ATENTION: the path model can not be cyclical! ## ## - correlations ## A list of correlations between predictors ## e.g. cor1 <- B ~ D ## correlations <- c(cor1) ## If no correlation is to be estimated, enter an empty vector: correlations <- c() ## ## - indirect ## A list of vectors containing all the indirect paths of interest. Each vector sould be in orderd from the response to the ## predictor variable ## eg. ind1 <- c("A", "C", "E") ## ind2 <- c("A", "C", "D") ## ind3 <- c("A", "E", "B") ## indirect <- list(ind1, ind2, ind3) ## ## - band ## Bandwidth size as in the spgwr::gwr function. If data_spatial is a spatial object, the bandwidth should be informed in the same ## unit as the spatial object (e.g. km, decimal degrees). If data_spatial is a data.frame the user should inform if coordinates are in ## decimal degrees by setting longlat = T and informing the bandwidth in km. ## Bandwidth selection using AIC can be performed using spgwr::gwr.sel ## ## - weight ## Geographical weighting function, as in spgwr::gwr. It accepts gwr.Gauss default and gwr.bisquare ## ## - adapt ## NULL (default) or a value from 0 to 1 setting the proportion of observations to be included in the weighting scheme, as in spgwr::gwr ## ## - use_spatial ## TRUE if GWR should be performed using spatial object, in which case the bandwidth should be in the same unit as the spatial object. ## FALSE if GWR should be performed using data.frame with coordinates, in which case you need to set the argument longlat ## ## - longlat ## When performing GWR with a data.frame, use latlong=T to inform bandwidth in km or longlat=F to inform bandwidth in decimal degrees ############################################################################################################################################# GWpath <- function(data_spatial, regressions, correlations, indirect, band, weight, adapt = NULL, use_spatial, longlat){ library(raster) library(sp) library(rgdal) library(rgeos) library(maptools) library(spgwr) full_equations <- c(regressions, correlations) ### 1. Organize data # Object class if(class(data_spatial)[1] != "SpatialPolygonsDataFrame" & class(data_spatial)[1] != "SpatialPointsDataFrame"){ if(class(data_spatial)[1] == "RasterStack"){ data_spatial <- raster::rasterToPolygons(data_spatial, fun=NULL, n=4, na.rm=TRUE, digits=12, dissolve=FALSE) warning('Spatial data is a RasterStack. It was automatically converted to SpatialPolygonsDataFrame with 4 nodes for each polygon') } if(class(data_spatial)[1] == "data.frame" | class(data_spatial)[1] == "matrix"){ if(sum(colnames(data_spatial) %in% c("Long", "Lat", "long", "lat", "LONG", "LAT", "X", "Y", "1", "2", "x-Coordinate", "y-Coordinate")) != 2){ stop('Spatial data is a data.frame. Coordinates were not identifyied. Please, rename the coordinates columns using "Long" and "Lat"') } else{ longit <- c("Long", "long", "LONG", "X", "1", "x-Coordinate") latit <- c("Lat", "lat", "LAT", "Y", "2", "y-Coordinate") data_spatial <- apply(data_spatial, 2, as.numeric) coords <- data_spatial[,c(which(colnames(data_spatial) %in% longit), which(colnames(data_spatial) %in% latit))] data_spatial <- as.data.frame(data_spatial[,-c(which(colnames(data_spatial) %in% longit), which(colnames(data_spatial) %in% latit))]) coordinates(data_spatial) <- coords warning('Spatial data is a data.frame. It was automatically converted to SpatialPointsDataFrame') } } } # Order and subset variables. Remove NA data_spatial@data <- data_spatial@data[,order(names(data_spatial@data))] variables <- vector() for(i in 1: length(full_equations)){ variables <- c(variables, as.character(full_equations[[i]][[2]]), attr(terms(full_equations[[i]]), "term.labels")) variables <- sort(unique(variables)) } data_spatial <- data_spatial[,variables] remove <- which(rowSums(is.na(data_spatial@data)) > 0) if(length(remove) != 0){ data_spatial <- data_spatial[-remove,] } # Standardize variables data_spatial@data <- as.data.frame(scale(data_spatial@data)) coords <- sp::coordinates(data_spatial) ### 2. Create objects to save the results results_stationary <- matrix(NA, nrow = 1) results_stationary <- as.data.frame(results_stationary) results_GWPath <- as.data.frame(coords) ### 3. Correlations if(length(correlations) != 0){ for(i in 1:length(correlations)){ vars <- rownames(attr(terms(correlations[[i]]), "factors")) # Stationary results_stationary[,paste("correl", vars[1], vars[2], sep = "_")] <- cor(data_spatial@data[,vars[1]], data_spatial@data[,vars[2]]) # GWPath descriptive_stats <- gw.cov(x = data_spatial, vars = vars, adapt = adapt, bw = band, gweight = weight, cor = TRUE) results_GWPath[,paste("correl", vars[1], vars[2], sep = "_")] <- as.data.frame(descriptive_stats$SDF)[,paste("cor", ".", vars[1], ".", vars[2], ".", sep = "")] } } ### 4. Regressions if(use_spatial == T){ GWR_results <- lapply(regressions, gwr, data = data_spatial, bandwidth = band, gweight = weight, adapt = adapt) } if(use_spatial == F){ if(longlat == F){ GWR_results <- lapply(regressions, gwr, data = data_spatial@data, coords = coords, bandwidth = band, gweight = weight, adapt = adapt, longlat = F) } if(longlat == T){ GWR_results <- lapply(regressions, gwr, data = data_spatial@data, coords = coords, bandwidth = band, gweight = weight, adapt = adapt, longlat = T) } } for(i in 1:length(regressions)){ predictor <- attr(terms(regressions[[i]]), "term.labels") response <- as.character(regressions[[i]][[2]]) # LM (stationary) linear <- lm(regressions[[i]], data_spatial@data) # linear model results_GWPath[,paste("-", "residuals_stationary_path_", response, ".", sep = "")] <- linear$residuals # store Lm residuals with GWPath results_stationary[,paste("-", "R2_", response, ".", sep = "")] <- summary(linear)$r.squared # GWR (non-stationary) GWR_results_df <- as.data.frame(GWR_results[[i]]$SDF) results_GWPath[,paste("-", "R2_", response, ".", sep = "")] <- GWR_results_df$localR2 results_GWPath[,paste("-", "residuals_GWPath_", response, ".", sep = "")] <- data_spatial@data[,response] - GWR_results_df$pred # Direct paths for(j in 1:length(predictor)){ # LM results_stationary[,paste("-direct_", response, "~", predictor[j], ".", sep = "")] <- linear$coefficients[[predictor[j]]] # coefficients # GWR results_GWPath[,paste("-direct_", response, "~", predictor[j], ".", sep = "")] <- GWR_results_df[,predictor[j]] } if(i == length(regressions)){ results_stationary <- results_stationary[,-1] } } # Indirect and total paths # Indirect for(i in 1:length(indirect)){ path <- indirect[[i]] value_global <- unlist(lapply(1:(length(path)-1), function(x) results_stationary[grepl(paste("-direct_", path[x], "~", path[x+1], sep = ""), names(results_stationary), fixed = TRUE)])) results_stationary[1,paste("-indirect_", paste(path, collapse = "~"), ".", sep = "")] <- prod(value_global) value_local <- matrix(unlist(lapply(1:(length(path)-1), function(x) results_GWPath[grepl(paste("-direct_", path[x], "~", path[x+1], sep = ""), names(results_GWPath), fixed = TRUE)])), ncol = length(path)-1, byrow = F) results_GWPath[,paste("-indirect_", paste(path, collapse = "~"), ".", sep = "")] <- apply(value_local, 1, prod) } # Total for(i in 1:length(regressions)){ response <- as.character(regressions[[i]][[2]]) dir <- colnames(results_stationary)[which(grepl(paste("-direct_",response,"~",sep = ""), names(results_stationary), fixed = TRUE) == T)] # direct results ind <- colnames(results_stationary)[which(grepl(paste("-indirect_",response,"~",sep = ""), names(results_stationary), fixed = TRUE) == T)] # indirect results for(j in 1:length(variables)){ name <- paste("-total_path_", response, "~", variables[j], sep = "") dir_value <- results_stationary[,dir][grepl(paste(variables[j], ".", sep = ""), dir, fixed = TRUE)] indir_value <- results_stationary[,ind][grepl(paste(variables[j], ".", sep = ""), ind,fixed = TRUE)] if(length(dir_value) != 0 | length(indir_value) != 0){ results_stationary[1,name] <- sum(cbind(if(length(indir_value) == 0){0} else indir_value, if(length(dir_value) == 0){0} else dir_value)) dir_value <- results_GWPath[,dir][grepl(paste(variables[j], ".", sep = ""), dir, fixed = TRUE)] indir_value <- results_GWPath[,ind][grepl(paste(variables[j], ".", sep = ""), ind, fixed = TRUE)] results_GWPath[,name] <- rowSums(cbind(if(length(indir_value) == 0){matrix(0, nrow = nrow(data_spatial))} else indir_value, if(length(dir_value) == 0){matrix(0, nrow = nrow(data_spatial))} else dir_value)) } } } # Calculate the highest absolute coefficient in each grid via direct, indirect and total paths resp_var <- unlist(lapply(regressions, function(x) as.character(x[[2]]))) main_variable <- sec_main_variable <- as.data.frame(coords) for(i in 1:length(resp_var)){ name_dir <- paste("-main_variable_", resp_var[i], "_direct", sep = "") sec_name_dir <- paste("-sec_main_variable_", resp_var[i], "_direct", sep = "") dir <- results_GWPath[grepl(paste("-direct_",resp_var[i],"~",sep = ""), names(results_GWPath), fixed = TRUE)] # direct results if(ncol(dir)>1){ main_variable[,name_dir]<- as.matrix(apply(dir, 1, function(x) names(sort(abs(x), T)[1]))) sec_main_variable[,sec_name_dir]<- as.matrix(apply(dir, 1, function(x) names(sort(abs(x), T)[2]))) } name_indir <- paste("-main_variable_", resp_var[i], "_indirect", sep = "") sec_name_indir <- paste("-sec_main_variable_", resp_var[i], "_indirect", sep = "") ind <- results_GWPath[grepl(paste("-indirect_",resp_var[i],"~",sep = ""), names(results_GWPath), fixed = TRUE)] # indirect results if(ncol(ind)>1){ main_variable[,name_indir]<- as.matrix(apply(ind, 1, function(x) names(sort(abs(x), T)[1]))) sec_main_variable[,sec_name_indir]<- as.matrix(apply(ind, 1, function(x) names(sort(abs(x), T)[2]))) } name_total <- paste("-main_variable_", resp_var[i], "_total", sep = "") sec_name_total <- paste("-sec_main_variable_", resp_var[i], "_total", sep = "") total <- results_GWPath[grepl(paste("-total_path_",resp_var[i],"~",sep = ""), names(results_GWPath), fixed = TRUE)] # indirect results if(ncol(total)>1){ main_variable[,name_total]<- as.matrix(apply(total, 1, function(x) names(sort(abs(x), T)[1]))) sec_main_variable[,sec_name_total]<- as.matrix(apply(total, 1, function(x) names(sort(abs(x), T)[2]))) } name_total <- paste("-main_variable_", resp_var[i], "_all_paths", sep = "") sec_name_total <- paste("-sec_main_variable_", resp_var[i], "_all_paths", sep = "") all <- cbind(dir, ind, total) main_variable[,name_total] <- as.matrix(apply(all, 1, function(x) names(sort(abs(x), T)[1]))) sec_main_variable[,sec_name_total] <- as.matrix(apply(all, 1, function(x) names(sort(abs(x), T)[2]))) } # Remove columns with na or 0 results_GWPath <- results_GWPath[,!is.na(colSums(results_GWPath))] results_stationary <- results_stationary[,!is.na(colSums(results_stationary))] results_GWPath <- results_GWPath[,colSums(results_GWPath)!=0] results_stationary <- results_stationary[,colSums(results_stationary)!=0] # Edit column names colnames(results_GWPath) <- gsub("-", "", colnames(results_GWPath)) colnames(results_stationary) <- gsub("-", "", colnames(results_stationary)) colnames(main_variable) <- gsub("-", "", colnames(main_variable)) colnames(sec_main_variable) <- gsub("-", "", colnames(sec_main_variable)) # Calculate mean and standard deviation of GWPath coefficients variation_coefficients <- matrix(NA, nrow = 2, ncol = ncol(results_GWPath), dimnames = list(c("mean", "standard_deviation"), colnames(results_GWPath))) variation_coefficients[1,] <- apply(results_GWPath, 2, mean) variation_coefficients[2,] <- apply(results_GWPath, 2, sd) # Generate spatial object main_variable <- cbind.Spatial(data_spatial, main_variable[,-c(1:2)], sec_main_variable[,-c(1:2)]) data_spatial <- cbind.Spatial(data_spatial, results_GWPath[,-c(1:2)]) return(list(GWPath_coefficients = data_spatial, Stationary_coefficients = results_stationary, Summary_statistics_GWPath = variation_coefficients[,-c(1:2)], Main_variable_GWPath = main_variable)) } # 1. DATA AND MODEL SPECIFICATION # Load data library(readxl) data_spatial <- data.frame(read_excel("data.xls")) colnames(data_spatial) <- data_spatial[1,] data_spatial <- data_spatial[-1,] # Set regressions { # Describe regressions reg1 <- Population_Density ~ River_Density + Ecoregion_Richness + Topographic_Complexity + Climate_Change_Velocity + Preciptation_Constancy + Temperature_Constancy reg2 <- Language_Diversity ~ River_Density + Ecoregion_Richness + Topographic_Complexity + Climate_Change_Velocity + Preciptation_Constancy + Population_Density + Carrying_Capacity_with_Group_Size_Limits reg3 <- Carrying_Capacity_with_Group_Size_Limits ~ Population_Density #cor1 <- c() # e.g. cor1 <- B ~ D # Concatenate regressions regressions <- c(reg1, reg2, reg3) correlations <- c() # Specify indirect paths ind1 <- c("Language_Diversity", "Population_Density", "River_Density") ind2 <- c("Language_Diversity", "Population_Density", "Ecoregion_Richness") ind3 <- c("Language_Diversity", "Population_Density", "Topographic_Complexity") ind4 <- c("Language_Diversity", "Population_Density", "Climate_Change_Velocity") ind5 <- c("Language_Diversity", "Population_Density", "Preciptation_Constancy") ind6 <- c("Language_Diversity", "Population_Density", "Temperature_Constancy") ind7 <- c("Language_Diversity", "Carrying_Capacity_with_Group_Size_Limits", "Population_Density", "River_Density") ind8 <- c("Language_Diversity", "Carrying_Capacity_with_Group_Size_Limits", "Population_Density", "Ecoregion_Richness") ind9 <- c("Language_Diversity", "Carrying_Capacity_with_Group_Size_Limits", "Population_Density", "Topographic_Complexity") ind10 <- c("Language_Diversity", "Carrying_Capacity_with_Group_Size_Limits", "Population_Density", "Climate_Change_Velocity") ind11 <- c("Language_Diversity", "Carrying_Capacity_with_Group_Size_Limits", "Population_Density", "Preciptation_Constancy") ind12 <- c("Language_Diversity", "Carrying_Capacity_with_Group_Size_Limits", "Population_Density", "Temperature_Constancy") ind13 <- c("Carrying_Capacity_with_Group_Size_Limits", "Population_Density", "River_Density") ind14 <- c("Carrying_Capacity_with_Group_Size_Limits", "Population_Density", "Ecoregion_Richness") ind15 <- c("Carrying_Capacity_with_Group_Size_Limits", "Population_Density", "Topographic_Complexity") ind16 <- c("Carrying_Capacity_with_Group_Size_Limits", "Population_Density", "Climate_Change_Velocity") ind17 <- c("Carrying_Capacity_with_Group_Size_Limits", "Population_Density", "Preciptation_Constancy") ind18 <- c("Carrying_Capacity_with_Group_Size_Limits", "Population_Density", "Temperature_Constancy") indirect <- list(ind1, ind2, ind3, ind4, ind5, ind6, ind7, ind8, ind9, ind10, ind11, ind12, ind13, ind14, ind15, ind16, ind17, ind18) } div_linguas_band_8 <- GWpath(data_spatial, regressions, correlations, indirect, band = 8, weight = gwr.Gauss, adapt = NULL, use_spatial = F, longlat = F)