Temporal analysis ZIM.R

## load libraries for zero-inflated model and likelihood ratio test
library(pscl)
library(lmtest)


### IMPORT DATA ###

## Set working directory
setwd("C:/Users/Trudi/Documents/RIGHT WHALES/SRWs/PhD/DATA ANALYSIS/Ch3 Temporal analysis")

## Read in spreadsheet
TempData <- read.table(file.choose("C:/Users/Trudi/Documents/RIGHT WHALES/SRWs/PhD/
DATA ANALYSIS/Ch3 Temporal analysis - DSG data/Analysis DSG-1 Laurie Aug11-May12/
DSGAnalysisSummary.txt"), header=T, sep="\t")

attach(TempData)

## Look at data
View(TempData)

## Look at structure of the dataframe
str(TempData)

## Declare qualitative variables as factors

DSGnumber = as.factor(DSGnumber)
Folder = as.factor(Folder)
RWcalls = as.factor(RWcalls)
Season = as.factor(Season)
Date = as.factor(Date)
Month = as.factor(Month)
Time= as.factor(Time)
DielPeriod = as.factor(DielPeriod)


### EXAMINE DATA BASICS###
## Mean call rate per hour by Season
tapply(TempData$CallsPerHr, TempData$Season, mean)

## Mean call rate by month
tapply(TempData$CallsPerHr, TempData$Month, mean)

 
### FIT A MODEL TO THE DATA###

## We observed many zeros in the data, so use zero-inflated (ZI) model rather than GLM
## 
## In ZI we can have many zeros from either structure zero (false zero) or 
## true zero (count zero)
## So we can use either Zero-Inflated Poisson(ZIP) or Zero-Inflated Neg-Bin(ZINB)?


## For all calls
## ZI is a two component mixture model ("count part" | "inflation part")
## one looks at whether there are calls or not (binomial distribution) 
## the other looks at rates of calls given that calls are present (poisson or neg-bin)


## Create formula for model 
f1 <- formula(CallsPerHr ~ Season * DielPeriod |Season + DielPeriod )

## Compare ZI models with poisson and neg-binomial distributions


Zip1 <- zeroinfl(f1, dist="poisson", link="logit",data=TempData)
Nb1 <- zeroinfl(f1, dist="negbin", link="logit",data=TempData)

## See which of 2 models fit data best
## Likelihood ratio test H0: k = 0

lrtest(Zip1,Nb1)    

## This test shows that Model 2, the Neg-Bin model is better

## Check out other possible models (using a Neg-Binomial distribution)
## No interaction term for count part
f2 <- formula(CallsPerHr ~ Season + DielPeriod | Season + DielPeriod)
Nb2 <- zeroinfl(f2, dist = "negbin", link = "logit", data = TempData)


## Observing true zero is constant the Season and diel period
f3 <- formula(CallsPerHr ~ Season + DielPeriod |1)
Nb3 <- zeroinfl(f3, dist = "negbin", link = "logit", data = TempData)


## Compare 3 models (with negative binomial distribution) using AIC
## Choose model with lowest AIC by at least 2 points
rbind(AIC(Nb1), AIC(Nb2), AIC(Nb3)) 

## Model Nb2 and Nb1 are the 2 best models but Nb1 is much more complex
## And only 2.6 points better, so use Nb2, which was
## CallsPerHr ~ Season + DielPeriod | Season + DielPeriod
AIC(Nb2) - AIC(Nb1)  

## Change reference levels for model to Winter and Dusk 
## Run model Nb2 again with new reference levels
## WinterLevel <- relevel(TempData$Season, ref="Winter")
## DuskLevel <- relevel(TempData$DielPeriod, ref="Dusk")

ffinal <- formula(CallsPerHr ~ Season + DielPeriod | Season + DielPeriod)
ZIMfinal <- zeroinfl(ffinal, dist = "negbin", link = "logit", data = TempData)

summary(ZIMfinal)
str(ZIMfinal)

### ZI MODEL FOR UPCALLS ONLY

ffinalUp <- formula(UpcallsPerHr ~ Season + DielPeriod | Season + DielPeriod)
ZIMfinalUp <- zeroinfl(ffinalUp, dist = "negbin", link = "logit", data = TempData)

summary(ZIMfinalUp)
str(ZIMfinalUp)