### ============================= ### ### Project: The Island Biogeography of Human Population Size ### ### Purpose: Preliminary, global & regional analyses ### ### Author: Dr. Fabio Mologni & Kevin C. Burns ### ### Date: 2022-12-06 ### ### ============================= ### working directory setwd("C:/folder name") ### ============================= ### Print more records options(max.print=10000) ### ============================= ### packages library(ggplot2) library(tidyverse) library(GGally) library(car) library(lme4) library(lmerTest) ### ============================= ### csv GLOBAL <- read_csv("./file name.csv") ### =============================== ### =========== GLOBAL ============ ### =============================== hist(GLOBAL$population) hist(GLOBAL$area) hist(GLOBAL$elevation) hist(GLOBAL$isolation) hist(log(GLOBAL$population)) hist(log(GLOBAL$area)) hist(log(1+GLOBAL$elevation)) hist(log(1+GLOBAL$isolation)) corr.matrix = tibble(log(GLOBAL$area), log(1+GLOBAL$elevation), log(1+GLOBAL$isolation)) ggcorr(data = corr.matrix, method = c("pairwise.complete.obs", "pearson"), label = TRUE, label_size = 4, label_round=3) vif(lm(log(1+population)~log(area)+log(1+elevation)+log(1+isolation), data=GLOBAL)) global.lmer = lmer(log(1+population) ~ log(area)+log(1+elevation)+log(1+isolation)+ +(1|archipelago), data=GLOBAL) summary(global.lmer) global.quasi = glm(population~log(area)+log(1+elevation)+log(1+isolation), family=quasipoisson, data=GLOBAL) summary(global.quasi) anova(global.quasi) ### ================================= ### =========== REGIONAL ============ ### ================================= ##1. SEYCHELLES (SEY) SEY <- subset(GLOBAL, archipelago == 'sey') hist((SEY$population)) hist((SEY$area)) hist((SEY$elevation)) hist(log(SEY$area)) hist(log(SEY$elevation)) corr.sey = tibble(log(SEY$area), log(SEY$elevation)) ggcorr(data = corr.sey, method = c("pairwise.complete.obs", "pearson"), label = T, label_size = 4, label_round=3) vif(lm(population~log(area)+log(elevation), data=SEY)) SEY.poisson = glm(population~log(area)+log(elevation), family=poisson, data=SEY) summary(SEY.poisson) SEY.quasi = glm(population~log(area)+log(elevation), family=quasipoisson, data=SEY) summary(SEY.quasi) anova(SEY.quasi) ##_________________________________________________________________ ##2. SAMOA (SAM) SAM <- subset(GLOBAL, archipelago == 'sam') hist((SAM$population)) hist((SAM$area)) hist((SAM$elevation)) hist(log(SAM$area)) hist(log(1+SAM$elevation)) corr.sam = tibble(log(SAM$area), log(1+SAM$elevation)) ggcorr(data = corr.sam, method = c("pairwise.complete.obs", "pearson"), label = T, label_size = 4, label_round=3) vif(lm(population~log(area)+log(1+elevation), data=SAM)) SAM.poisson = glm(population~log(area)+log(1+elevation), family=poisson, data=SAM) summary(SAM.poisson) SAM.quasi = glm(population~log(area)+log(1+elevation), family=quasipoisson, data=SAM) summary(SAM.quasi) anova(SAM.quasi) ##_________________________________________________________________ ##3. HAWAII (HAW) HAW <- subset(GLOBAL, archipelago == 'haw') hist(HAW$population) hist(HAW$area) hist(HAW$elevation) hist(log(HAW$area)) hist(log(1+HAW$elevation)) corr.haw = tibble(log(HAW$area), log(1+HAW$elevation)) ggcorr(data = corr.haw, method = c("pairwise.complete.obs", "pearson"), label = TRUE, label_size = 4, label_round=3) vif(lm(population~log(area)+log(1+elevation), data=HAW)) HAW.poisson = glm(population~log(area), family=poisson, data=HAW) summary(HAW.poisson) HAW.quasi = glm(population~log(area), family=quasipoisson, data=HAW) summary(HAW.quasi) anova(HAW.quasi) ##_________________________________________________________________ ##4. VIRGIN ISLANDS (VIR) VIR <- subset(GLOBAL, archipelago == 'vir') hist((VIR$population)) hist((VIR$area)) hist((VIR$elevation)) hist((VIR$first_mainland)) hist(log(VIR$area)) hist(log(1+VIR$elevation)) corr.vir = tibble(log(VIR$area), log(1+VIR$elevation), VIR$first_mainland) ggcorr(data = corr.vir, method = c("pairwise.complete.obs", "pearson"), label = TRUE, label_size = 4, label_round=3) vif(lm(population~log(area)+log(1+elevation)+first_mainland, data=VIR)) VIR.poisson = glm(population~log(area)+first_mainland, family=poisson, data=VIR) summary(VIR.poisson) VIR.quasi = glm(population~log(area)+first_mainland, family=quasipoisson, data=VIR) summary(VIR.quasi) anova(VIR.quasi) ##_________________________________________________________________ ##5. NORTHERN NEW ZEALAND (NNZ) NNZ <- subset(GLOBAL, archipelago == 'nz') hist((NNZ$population)) hist((NNZ$area)) hist((NNZ$elevation)) hist((NNZ$first_mainland)) hist(log(NNZ$area)) hist(log(NNZ$elevation)) hist(sqrt(NNZ$first_mainland)) corr.nnz = tibble(log(NNZ$area), log(NNZ$elevation), sqrt(NNZ$first_mainland)) ggcorr(data = corr.nnz, method = c("pairwise.complete.obs", "pearson"), label = TRUE, label_size = 4, label_round=3) vif(lm(population~log(area)+log(elevation)+sqrt(first_mainland), data=NNZ)) NNZ.poisson = glm(population~log(area)+sqrt(first_mainland), family=poisson, data=NNZ) summary(NNZ.poisson) NNZ.quasi = glm(population~log(area)+sqrt(first_mainland), family=quasipoisson, data=NNZ) summary(NNZ.quasi) anova(NNZ.quasi) ##_________________________________________________________________ ##6. ZHOUSHAN (ZHO) ZHO <- subset(GLOBAL, archipelago == 'zho') hist(ZHO$population) hist(ZHO$area) hist(ZHO$elevation) hist(ZHO$first_mainland) hist(log(ZHO$area)) hist(log(ZHO$elevation)) corr.zho = tibble(log(ZHO$area), log(ZHO$elevation), ZHO$first_mainland) ggcorr(data = corr.zho, method = c("pairwise.complete.obs", "pearson"), label = TRUE, label_size = 4, label_round=3) vif(lm(population~log(area)+log(elevation)+first_mainland, data=ZHO)) ZHO.poisson = glm(population~log(area)+first_mainland, family=poisson, data=ZHO) summary(ZHO.poisson) ZHO.quasi = glm(population~log(area)+first_mainland, family=quasipoisson, data=ZHO) summary(ZHO.quasi) anova(ZHO.quasi) ##_________________________________________________________________ ##7. KURIL ISLANDS (KUR) KUR <- subset(GLOBAL, archipelago == 'kur') hist((KUR$population)) hist((KUR$area)) hist((KUR$elevation)) hist((KUR$first_mainland)) hist(log(KUR$area)) hist(log(1+KUR$elevation)) corr.transf.matrix = tibble(log(KUR$area), log(1+KUR$elevation), KUR$first_mainland) ggcorr(data = corr.transf.matrix, method = c("pairwise.complete.obs", "pearson"), label = TRUE, label_size = 4, label_round=3) vif(lm(population~log(area)+log(1+elevation)+first_mainland, data=KUR)) KUR.poisson = glm(population~log(area)+log(1+elevation)+first_mainland, family=poisson, data=KUR) summary(KUR.poisson) KUR.quasi = glm(population~log(area)+log(1+elevation)+first_mainland, family=quasipoisson, data=KUR) summary(KUR.quasi) anova(KUR.quasi) ##_________________________________________________________________ ##8. CHANNEL ISLANDS (CHA) CHA <- subset(GLOBAL, archipelago == 'cha') hist(CHA$population) hist(CHA$area) hist(CHA$elevation) hist(CHA$first_mainland) hist(CHA$second_maninland) hist(log(CHA$area)) hist(log(CHA$elevation)) corr.transf.matrix = tibble(log(CHA$area), log(CHA$elevation), CHA$first_mainland, CHA$second_maninland) ggcorr(data = corr.transf.matrix, method = c("pairwise.complete.obs", "pearson"), label = TRUE, label_size = 4, label_round=3) vif(lm(population~log(area)+log(elevation)+first_mainland+second_maninland, data=CHA)) CHA.poisson = glm(population~log(area)+first_mainland+second_maninland, family=poisson, data=CHA) summary(CHA.poisson) CHA.quasi = glm(population~log(area)+first_mainland+second_maninland, family=quasipoisson, data=CHA) summary(CHA.quasi) anova(CHA.quasi) ##_________________________________________________________________ ##9. IONIAN ISLANDS (ION) ION <- subset(GLOBAL, archipelago == 'ion') hist((ION$population)) hist((ION$area)) hist((ION$elevation)) hist((ION$first_mainland)) hist((ION$second_maninland)) hist(log(ION$area)) hist(log(ION$elevation)) hist(log(ION$first_mainland)) hist(log(ION$second_maninland)) corr.transf.matrix = tibble(log(ION$area), log(ION$elevation), log(ION$first_mainland), log(ION$second_maninland)) ggcorr(data = corr.transf.matrix, method = c("pairwise.complete.obs", "pearson"), label = TRUE, label_size = 4, label_round=3) vif(lm(population~log(area)+log(elevation)+log(first_mainland)+log(second_maninland), data=ION)) ION.poisson = glm(population~log(area)+log(first_mainland)+log(second_maninland), family=poisson, data=ION) summary(ION.poisson) ION.quasi = glm(population~log(area)+log(first_mainland)+log(second_maninland), family=quasipoisson, data=ION) summary(ION.quasi) anova(ION.quasi) ##_________________________________________________________________ ##10. SHETLAND (SHE) SHE <- subset(GLOBAL, archipelago == 'she') hist(SHE$population) hist(SHE$area) hist(SHE$elevation) hist(SHE$first_mainland) hist(SHE$second_maninland) hist(log(SHE$area)) hist(log(SHE$elevation)) corr.she = tibble(log(SHE$area), log(SHE$elevation), SHE$first_mainland, SHE$second_maninland) ggcorr(data = corr.she, method = c("pairwise.complete.obs", "pearson"), label = TRUE, label_size = 4, label_round=3) vif(lm(population~log(area)+log(elevation)+first_mainland+second_maninland, data=SHE)) SHE.poisson = glm(population~log(area)+first_mainland+second_maninland, family=poisson, data=SHE) summary(SHE.poisson) SHE.quasi = glm(population~log(area)+first_mainland+second_maninland, family=quasipoisson, data=SHE) summary(SHE.quasi) anova(SHE.quasi) summary (resid(SHE.quasi, type='deviance'))