# load packages
library("caper")
library("car")
library("MuMIn")

# load data
data<-read.table("Powell_etal_2019_data_for_analysis.txt", header=T, sep="\t")

# examine data
str(data) # 48 species, 12 columns

# load tree
tree<-read.nexus("consensusTree_10kTrees_Primates_Version3.nex.txt")

# examine tree
str(tree)

# create comparative data
comp_data<-comparative.data(tree, data, names.col="Species", vcv=TRUE, na.omit=FALSE)

# check for dropped species
comp_data$dropped # none dropped from data

### check VIFs for rest-of-brain measures ###

# create rest of brain measures
data$RB_neocortex<-data$Brain-data$Neocortex # neocortex
data$RB_cerebellum<-data$Brain-data$Cerebellum # cerebellum
data$RB_NC_CB<-data$Brain-(data$Neocortex+data$Cerebellum) # neocortex + cerebellum

# calculate VIFs
vif(vifmodel<-glm(log10(Neocortex)~log10(RB_neocortex)+log10(Body), na.action=na.omit, data=data))
vif(vifmodel<-glm(log10(Cerebellum)~log10(RB_cerebellum)+log10(Body), na.action=na.omit, data=data))
vif(vifmodel<-glm(log10(Neocortex)~log10(RB_NC_CB)+log10(Body), na.action=na.omit, data=data))
vif(vifmodel<-glm(log10(Cerebellum)~log10(RB_NC_CB)+log10(Body), na.action=na.omit, data=data))

### run global models ###

## whole brain

# create global model
brain_global<-pgls(log10(Brain)~log10(Gestation)+log10(Weaning)+log10(Juvenile_period)+log10(Adult_lifespan)+log10(Body), comp_data, lambda="ML")

# summarise results and check diagnostics
summary(brain_global)
length(brain_global$y) # check sample size correct
par(mfrow=c(2,2))
plot(brain_global)
range(vif(vifmodel<-glm(brain_global$formula, na.action=na.omit, data=data))) # note - same for all models

# effect sizes
10^ 0.502289 # gestation
10^ 0.069354 # lactation

## neocortex

# create global model
NC_global<-pgls(log10(Neocortex)~log10(Gestation)+log10(Weaning)+log10(Juvenile_period)+log10(Adult_lifespan)+log10(Body), comp_data, lambda="ML")

# summarise results and check diagnostics
summary(NC_global)
length(NC_global$y) # check sample size correct
par(mfrow=c(2,2))
plot(NC_global)

# effect sizes
10^0.614240 # gestation
10^0.115164 # lactation

## cerebellum

# create global model
CB_global<-pgls(log10(Cerebellum)~log10(Gestation)+log10(Weaning)+log10(Juvenile_period)+log10(Adult_lifespan)+log10(Body), comp_data, lambda="ML")

# summarise results and check diagnostics
summary(CB_global)
length(CB_global$y) # check sample size correct
par(mfrow=c(2,2))
plot(CB_global)

# effect sizes
10^0.268073 # juvenile period

### model selection ###

# create function to extract R-squareds from each candidate model
R2<-function(x){
summary(x)$r.squared
}

## whole brain

# run global model with lambda fixed at the ML value
brain_global_fix<-pgls(log10(Brain)~log10(Gestation)+log10(Weaning)+log10(Juvenile_period)+log10(Adult_lifespan)+log10(Body), comp_data, lambda=1)

# use the function 'dredge' to run all combinations of terms in the global model and rank models by BIC
brain_models<-dredge(brain_global_fix, fixed="log10(Body)", rank=BIC, extra=c("R2")) # keeping body mass in all models

# view model selection table
brain_models

## neocortex

# run global model with lambda fixed at the ML value
NC_global_fix<-pgls(log10(Neocortex)~log10(Gestation)+log10(Weaning)+log10(Juvenile_period)+log10(Adult_lifespan)+log10(Body), comp_data, lambda=1)

# use the function 'dredge' to run all combinations of terms in the global model and rank models by BIC
NC_models<-dredge(NC_global_fix, fixed="log10(Body)", rank=BIC, extra=c("R2")) # keeping body mass in all models

# view model selection table
NC_models

## cerebellum

# run global model with lambda fixed at the ML value
CB_global_fix<-pgls(log10(Cerebellum)~log10(Gestation)+log10(Weaning)+log10(Juvenile_period)+log10(Adult_lifespan)+log10(Body), comp_data, lambda=0.000001) # lambda fixed at smallest possible value (zero is not permitted)

# use the function 'dredge' to run all combinations of terms in the global model and rank models by BIC
CB_models<-dredge(CB_global_fix, fixed="log10(Body)", rank=BIC, extra=c("R2")) # keeping body mass in all models

# view model selection table
CB_models


### taxonomic differences ###

## gestation

# run & compare models 
gest_intercepts_only<-pgls(log10(Gestation)~log10(Body)+Ape, comp_data, lambda="ML") # different intercepts
gest_slopes<-pgls(log10(Gestation)~log10(Body)*Ape, comp_data, lambda=1) # different slopes and intercepts (lambda fixed to same value as intercepts only model)
BIC(gest_intercepts_only, gest_slopes) 

# summarise results of supported model and check diagnostics
summary(gest_intercepts_only)
length(gest_intercepts_only$y) # check sample size correct
par(mfrow=c(2,2))
plot(gest_intercepts_only)

## lactation

# run & compare models 
lact_intercepts_only<-pgls(log10(Weaning)~log10(Body)+Ape, comp_data, lambda="ML") # different intercepts
lact_slopes<-pgls(log10(Weaning)~log10(Body)*Ape, comp_data, lambda=0.051) # different slopes and intercepts (lambda fixed to same value as intercepts only model)
BIC(lact_intercepts_only, lact_slopes) 

# summarise results of supported model and check diagnostics
summary(lact_intercepts_only)
length(lact_intercepts_only $y) # check sample size correct
par(mfrow=c(2,2))
plot(lact_intercepts_only)

## juvenile period

# run & compare models 
juv_intercepts_only<-pgls(log10(Juvenile_period)~log10(Body)+Ape, comp_data, lambda="ML") # different intercepts
juv_slopes<-pgls(log10(Juvenile_period)~log10(Body)*Ape, comp_data, lambda=0.257) # different slopes and intercepts (lambda fixed to same value as intercepts only model)
BIC(juv_intercepts_only, juv_slopes) 

# summarise results of supported model and check diagnostics
summary(juv_intercepts_only)
length(juv_intercepts_only$y) # check sample size correct
par(mfrow=c(2,2))
plot(juv_intercepts_only)

## adult lifespan 

# run & compare models 
AL_intercepts_only<-pgls(log10(Adult_lifespan)~log10(Body)+Ape, comp_data, lambda= 0.000001) # different intercepts (note: lambda fixed to same as the different slopes model due to optimisation errors)
AL_slopes<-pgls(log10(Adult_lifespan)~log10(Body)*Ape, comp_data, lambda="ML") # different slopes and intercepts
BIC(AL_intercepts_only, AL_slopes) 

# summarise results of supported model and check diagnostics
summary(AL_intercepts_only)
length(AL_intercepts_only$y) # check sample size correct
par(mfrow=c(2,2))
plot(AL_intercepts_only)

## re-run global cerebellum model without apes

data_no_apes<-subset(data, Ape=="No")
comp_data_no_apes<-comparative.data(tree, data_no_apes, names.col="Species", vcv=TRUE, na.omit=FALSE)
CB_global_no_apes<-pgls(log10(Cerebellum)~log10(Gestation)+log10(Weaning)+log10(Juvenile_period)+log10(Adult_lifespan)+log10(Body), comp_data_no_apes, lambda="ML")

# summarise results and check diagnostics
summary(CB_global_no_apes)
length(CB_global_no_apes$y) 
par(mfrow=c(2,2))
plot(CB_global_no_apes)

## re-run the global CB model 1000 times removing 5 random (non-ape) species each time

# create empty list for the results
loop_results<-list()

# run the loop
for (i in 1:1000){ 
	remove<-as.character(sample(subset(data, Ape=="No")$Species, 5)) # select 5 non-apes to remove
	subset<-droplevels(subset(data, !(Species %in% remove))) # subset the data excluding the 5 to remove
	compdata<-comparative.data(tree, subset, Species, vcv=TRUE, warn.dropped=FALSE, na.omit=FALSE) # make a new comparative object with the data subset
	loop_results[[i]]<-summary(model<-pgls(log10(Cerebellum)~log10(Gestation)+log10(Weaning)+log10(Juvenile_period)+log10(Adult_lifespan)+log10(Body), compdata, lambda="ML")) # run the model
	print(i) # print the iteration number
}

# run a second loop to extract p-values
pvals<-vector()

# function to extract p-values for juvenile period from the models
p_JP<-function(x){ 
	x$coefficients[("log10(Juvenile_period)"),("Pr(>|t|)")]
}

# run the second loop
for (i in 1:length(loop_results)){
	pvals[i]<-p_JP(loop_results[[i]]) 
} 

# summarise the p values 
hist(pvals, breaks=100, col="grey", las=1, main="", xlab="p-values")
abline(v=0.05, col="red", lty=2, lwd=2)
length(which(pvals>=0.05))/length(pvals)*100 # percentage of sig p-vals at alpha=0.05

### figure 1

# create a figure showing all estimates + 95% CI for life history traits on brain volume measures together 

predictors<-c("Gestation length", "Weaning age", "Juvenile period", "Adult lifespan")
brainvols<-c("Whole brain", "Neocortex", "Cerebellum")

par(mfrow=c(1,1))
par(mar=c(5,10,5,5))
plot(1, ylim=c(0.5,3.5), xlim=c(0.5, 7.5), type="n", xaxt="n", yaxt="n", ylab="", xlab="Life history traits")
title(ylab="Brain volumes", line=7)
axis(1, at=c(1,3,5,7), predictors)
axis(2, at=c(1,2,3), rev(brainvols), las=1)
abline(v=c(1,3,5,7), lty=2, col="grey") # mark 'zeroes'

# whole brain
brain_means<-as.numeric(c(summary(brain_global)$coefficients[2:5,1])) # estimates
brain_SEs<-as.numeric(c(summary(brain_global)$coefficients[2:5,2])) # SEs
brain_lowCI<-brain_means-(1.96*brain_SEs) # lower 95% CI
brain_upCI<-brain_means+(1.96*brain_SEs) # upper 95% CI
points(brain_means+c(1,3,5,7), c(3,3,3,3), pch=19, col="blue") # add the estimates as points
arrows(x0=brain_lowCI+c(1,3,5,7), x1=brain_upCI+c(1,3,5,7), y0=3, length=0.05, angle=90, code=3, col="blue") # add the CIs as arrows

# neocortex
NC_means<-as.numeric(c(summary(NC_global)$coefficients[2:5,1])) 
NC_SEs<-as.numeric(c(summary(NC_global)$coefficients[2:5,2])) 
NC_lowCI<-NC_means-(1.96*NC_SEs)
NC_upCI<-NC_means+(1.96*NC_SEs)
points(NC_means+c(1,3,5,7), c(2,2,2,2), pch=19, col="blue") 
arrows(x0=NC_lowCI+c(1,3,5,7), x1=NC_upCI+c(1,3,5,7), y0=2, length=0.05, angle=90, code=3, col="blue")

# cerebellum
CB_means<-as.numeric(c(summary(CB_global)$coefficients[2:5,1])) 
CB_SEs<-as.numeric(c(summary(CB_global)$coefficients[2:5,2])) 
CB_lowCI<-CB_means-(1.96*CB_SEs)
CB_upCI<-CB_means+(1.96*CB_SEs)
points(CB_means+c(1,3,5,7), c(1,1,1,1), pch=19, col="blue") 
arrows(x0=CB_lowCI+c(1,3,5,7), x1=CB_upCI+c(1,3,5,7), y0=1, length=0.05, angle=90, code=3, col="blue")


### figure 2

## lactation duration in apes versus non-apes

# plot results
palette(c("blue", "green"))
plot(log10(Weaning)~log10(Body), col=as.factor(Ape), comp_data$data, pch=19, las=1, ylab="Log10 weaning age", xlab="Log10 body mass")
abline(0.914861+0.274493, 0.396960, lwd=1, col="green") # apes
abline(0.914861, 0.396960, lwd=1, col="blue") # non-apes
legend("topleft", c("Apes", "Non-apes"), col=c("green", "blue"), pch=19)

# plot results
palette(c("blue", "green"))
plot(log10(Juvenile_period)~log10(Body), col=as.factor(Ape), comp_data$data, pch=19, las=1, ylab="Log10 juvenile period", xlab="Log10 body mass")
abline(2.324284 + 0.236202, 0.186023, lwd=1, col="green") # apes
abline(2.324284, 0.186023, lwd=1, col="blue") # non-apes
legend("topleft", c("Apes", "Non-apes"), col=c("green", "blue"), pch=19)

### exploring reasons for zero lambda estimates in cerebellum models

## likelihood profile for lambda in the global neocortex model
NC_profile<-pgls.profile(NC_global, which="lambda", N=100, param.CI=T)
plot(NC_profile$x, NC_profile$logLik, type="l", ylab="Log likelihood", xlab="Lambda", las=1)
abline(v= NC_profile$ci$opt, col="red", lwd=2) # ML value
abline(v= NC_profile$ci$ci.val[1], col="red", lty=2) # lower 95% CI
abline(v= NC_profile$ci$ci.val[2], col="red", lty=2) # upper 95% CI

CB_profile<-pgls.profile(CB_global, which="lambda", N=100, param.CI=T)
plot(CB_profile$x, CB_profile$logLik, type="l", ylab="Log likelihood", xlab="Lambda", las=1)
abline(v= CB_profile$ci$opt, col="red", lwd=2) # ML value
abline(v= CB_profile$ci$ci.val[1], col="red", lty=2) # lower 95% CI
abline(v= CB_profile$ci$ci.val[2], col="red", lty=2) # upper 95% CI

## run the candidate model set for cerebellum volume estimating lambda by maximum likelihood

# function to extract lambda from models
lambda<-function(x){
as.numeric(x$param.CI$lambda$opt)
}

# run the models
CB_models_ML<-dredge(CB_global, fixed="log10(Body)", rank=BIC, extra=c("lambda","R2")) # keeping body mass in all models

# view model selection table
CB_models_ML

## plot cerebellum volume against other structures

# plot CB against NC
plot(log10(Cerebellum)~log10(Neocortex), data, las=1, pch=19, col="grey", ylab="Log10 cerebellum", xlab="Log10 neocortex")
plot(log10(Cerebellum)~log10(Brain), data, las=1, pch=19, col="grey", ylab="Log10 cerebellum", xlab="Log10 brain")

## re-run the cerebellum global model removing one species at a time

# create empty object for the results
outlier_results<-list()

# run the loop
for (i in 1:length(rownames(comp_data$data))){
	sample<-data[-c(i),] # remove species i
	comp_sample<-comparative.data(tree, sample, names.col="Species", vcv=TRUE, na.omit=FALSE)
	outlier_results[[i]]<-summary(pgls(log10(Cerebellum)~log10(Gestation)+log10(Weaning)+log10(Juvenile_period)+log10(Adult_lifespan)+log10(Body), comp_sample, lambda="ML"))
}

# report the lambdas from each model
unlist(lapply(outlier_results, lambda))