Temporal analysis ZIM.R ## load libraries for zero-inflated model and likelihood ratio test library(pscl) library(lmtest) ### IMPORT DATA ### ## Set working directory setwd("C:/Users/Trudi/Documents/RIGHT WHALES/SRWs/PhD/DATA ANALYSIS/Ch3 Temporal analysis") ## Read in spreadsheet TempData <- read.table(file.choose("C:/Users/Trudi/Documents/RIGHT WHALES/SRWs/PhD/ DATA ANALYSIS/Ch3 Temporal analysis - DSG data/Analysis DSG-1 Laurie Aug11-May12/ DSGAnalysisSummary.txt"), header=T, sep="\t") attach(TempData) ## Look at data View(TempData) ## Look at structure of the dataframe str(TempData) ## Declare qualitative variables as factors DSGnumber = as.factor(DSGnumber) Folder = as.factor(Folder) RWcalls = as.factor(RWcalls) Season = as.factor(Season) Date = as.factor(Date) Month = as.factor(Month) Time= as.factor(Time) DielPeriod = as.factor(DielPeriod) ### EXAMINE DATA BASICS### ## Mean call rate per hour by Season tapply(TempData$CallsPerHr, TempData$Season, mean) ## Mean call rate by month tapply(TempData$CallsPerHr, TempData$Month, mean) ### FIT A MODEL TO THE DATA### ## We observed many zeros in the data, so use zero-inflated (ZI) model rather than GLM ## ## In ZI we can have many zeros from either structure zero (false zero) or ## true zero (count zero) ## So we can use either Zero-Inflated Poisson(ZIP) or Zero-Inflated Neg-Bin(ZINB)? ## For all calls ## ZI is a two component mixture model ("count part" | "inflation part") ## one looks at whether there are calls or not (binomial distribution) ## the other looks at rates of calls given that calls are present (poisson or neg-bin) ## Create formula for model f1 <- formula(CallsPerHr ~ Season * DielPeriod |Season + DielPeriod ) ## Compare ZI models with poisson and neg-binomial distributions Zip1 <- zeroinfl(f1, dist="poisson", link="logit",data=TempData) Nb1 <- zeroinfl(f1, dist="negbin", link="logit",data=TempData) ## See which of 2 models fit data best ## Likelihood ratio test H0: k = 0 lrtest(Zip1,Nb1) ## This test shows that Model 2, the Neg-Bin model is better ## Check out other possible models (using a Neg-Binomial distribution) ## No interaction term for count part f2 <- formula(CallsPerHr ~ Season + DielPeriod | Season + DielPeriod) Nb2 <- zeroinfl(f2, dist = "negbin", link = "logit", data = TempData) ## Observing true zero is constant the Season and diel period f3 <- formula(CallsPerHr ~ Season + DielPeriod |1) Nb3 <- zeroinfl(f3, dist = "negbin", link = "logit", data = TempData) ## Compare 3 models (with negative binomial distribution) using AIC ## Choose model with lowest AIC by at least 2 points rbind(AIC(Nb1), AIC(Nb2), AIC(Nb3)) ## Model Nb2 and Nb1 are the 2 best models but Nb1 is much more complex ## And only 2.6 points better, so use Nb2, which was ## CallsPerHr ~ Season + DielPeriod | Season + DielPeriod AIC(Nb2) - AIC(Nb1) ## Change reference levels for model to Winter and Dusk ## Run model Nb2 again with new reference levels ## WinterLevel <- relevel(TempData$Season, ref="Winter") ## DuskLevel <- relevel(TempData$DielPeriod, ref="Dusk") ffinal <- formula(CallsPerHr ~ Season + DielPeriod | Season + DielPeriod) ZIMfinal <- zeroinfl(ffinal, dist = "negbin", link = "logit", data = TempData) summary(ZIMfinal) str(ZIMfinal) ### ZI MODEL FOR UPCALLS ONLY ffinalUp <- formula(UpcallsPerHr ~ Season + DielPeriod | Season + DielPeriod) ZIMfinalUp <- zeroinfl(ffinalUp, dist = "negbin", link = "logit", data = TempData) summary(ZIMfinalUp) str(ZIMfinalUp)