##### V1 = RMR ; V2 = RPOA ### PGLS ### M. G. Faure-Brac's script ### PACKAGES install.packages('caper') library(caper) ### DATA # /!\ Taxa should respect the same appearance order in both tree and table /!\ # Tree - Use only NEXUS files (.nex) ; the used file is phylogeny_synapsids.nex tree <- read.tree(text=write.tree(read.nexus(file=file.choose()))) # Trait - Use only Text with tab as separator (.txt) files # Table should have the first column named "species" and the following one V1 to Vn, with V1 the response variable and all following the predictors # The used file is data.txt trait <- read.table(file=file.choose(),header=TRUE,stringsAsFactor=FALSE) ### DATASET # Association of both tree and numerical data in one set data <- comparative.data(phy=tree, data=trait, names.col=species, vcv=T, na.omit=F, warn.dropped=T) ### PGLS # We run a first PGLS with raw data reg <- pgls(V1~V2, data=data, lambda="ML") # We test the normality of the residuals # I prefer to run a Shapiro test because it is less sensitive to little sample than a Lilliefors test. shapiro.test(reg$residuals) # Results : W = 0.79857 ; p-value = 0.001939 # The distribution of the residuals of our analyses are not following a normal law. We proceed to a new PGLS with log transformed data. # RMR is now log transformed. We used a log1p (log +1) for RPOA because of the null values. reg <- pgls(log(V1)~log1p(V2), data=data, lambda="ML") shapiro.test(reg$residuals) # Results : W = 0.93855; p-value = 0.3014 # The distribution of the residuals now follow a normal law. summary(reg) # Results : R² = 0.7866; p-value = 2.092e-06 ### PEM SYNAPSIDA ### M. G. Faure-Brac's script, from L. Legendre and G. Guénard's work ### PACKAGES install.packages('MPSEM') library(MPSEM) ### DATASET # We use the same objects and data as in the PGLS part # Acquisition of location in influence matrix of NA taxa grloc <- getGraphLocations(tree,trait[is.na(trait[,"V1"]),"species"]) # Association of the number of the relative position in the table / tree of a taxon with its name sporder <- match(attr(grloc$x,"vlabel")[grloc$x$vertex$species],trait[,"species"]) ### PEM FIRST ACQUISITION # A first part of the analyse runs data without transformation to obtain model's residuals and test their normality # Creation of eigenvectors and calculation of all models PEMfs <- list() # eigenvectors and predictors PEMfs[["V2"]] <- PEM.fitSimple(y=(trait[sporder,"V1"]),x=(trait[sporder,"V2"]),w=grloc$x,d="distance",sp="species",lower=0,upper=1) # eigenvectors only PEMfs[["none"]] <- PEM.fitSimple(y=(trait[sporder,"V1"]),x=NULL,w=grloc$x,d="distance",sp="species",lower=0,upper=1) # AICc calculation : This test select the best subset of eigenvector to perform the inference of the predicted variable PEMAIC <- list() # AICc for models with predictors PEMAIC[["V2"]] <- lmforwardsequentialAICc(y=(trait[sporder,"V1"]),x=(trait[sporder,"V2",drop=FALSE]),object=PEMfs[["V2"]]) # AICc for models without predictors PEMAIC[["none"]] <- lmforwardsequentialAICc(y=(trait[sporder,"V1"]),object=PEMfs[["V2"]]) ### TRANSFORMATION TEST # The normality test is performed on models' residuals, obtained with the AICc calculation # I prefer to run a Shapiro test because it is less sensitive to little sample than a Lilliefors test. print(shapiro.test(PEMAIC$V2$residuals)) ### RESULTS OF NORMALITY TEST # RPOA + phylogeny : p-value = 0.0001791* # As our residuals do not follow a normal law, we proceed to a log1p transformation (log1p because of null values) # RMR will be log transformed until now ### PEM SECOND ACQUISITION # Creation of eigenvectors and calculation of all models with transformed data PEMfs <- list() # eigenvectors and predictors PEMfs[["V2"]] <- PEM.fitSimple(y=log(trait[sporder,"V1"]),x=log1p(trait[sporder,"V2"]),w=grloc$x,d="distance",sp="species",lower=0,upper=1) # eigenvectors only PEMfs[["none"]] <- PEM.fitSimple(y=log(trait[sporder,"V1"]),x=NULL,w=grloc$x,d="distance",sp="species",lower=0,upper=1) # AICc calculation : This test select the best subset of eigenvector to perform the inference of the predicted variable PEMAIC <- list() # AICc for models with predictors PEMAIC[["V2"]] <- lmforwardsequentialAICc(y=log(trait[sporder,"V1"]),x=log1p(trait[sporder,"V2",drop=FALSE]),object=PEMfs[["V2"]]) # AICc for models without predictors PEMAIC[["none"]] <- lmforwardsequentialAICc(y=log(trait[sporder,"V1"]),object=PEMfs[["V2"]]) # Verification of normality print(shapiro.test(PEMAIC$V2$residuals)) # RPOA + phylogeny : p-value = 0.5401 # Now the residuals follow a normal law, I pursuie the prediction # AICc result : first column, AICc score ; second column, R². Best model maximise the first column and minimise the second one for(m in c("V2","none")) cat(m,summary(PEMAIC[[m]])$adj,PEMAIC[[m]]$AICc,"\n") rm(m) # Model metric : finding values of "a". "psi" always equal 1 print(PEMfs$V2$a) # a = 0.4503192 # RPOA + phylogeny : R² = 0.9731499 ; AICc = 22.09361 # phylogeny : R² = 0.9731734 ; AICc = 43.4455 # Best model is RPOA + phylogeny (V2) ### RMR PREDICTION # Prediction with a 95% interval of confidence. /!\ results are log transformed atr <- log1p(trait[is.na(trait[,"V1"]),"V2",drop=FALSE]) ypred <- predict(object=PEMfs[["V2"]],targets=grloc,lmobject=PEMAIC[["V2"]],newdata=atr,interval="confidence") ### PLOT # Crossed validation yfit <- numeric(length(PEMAIC[["V2"]]$fitted.values)) yfit[sporder] <- PEMAIC[["V2"]]$fitted.values # Plot tools X11(width=7.25,height=4.5) layout(matrix(c(1,1,2),1L,3L)) par(mar=c(4,1,1,1)) plot(tree,cex=1.25) lab <- (range(trait[,"V1"],na.rm=TRUE)) lab <- c(floor(lab[1L]),ceiling(lab[2L])) plot(NA,ylim=c(1L,length(tree$tip.label)),xlim=lab,axes=FALSE,xlab="Metabolic rate (mLO2.h-1.g-0,67)") lab <- lab[1L]:lab[2L] axis(1,at=lab,label=lab) # Position of data points(y=match(trait[!is.na(trait[,"V1"]),"species"],tree$tip.label),x=(trait[!is.na(trait[,"V1"]),"V1"]),pch=22,bg="white",cex=2) points(y=match(trait[!is.na(trait[,"V1"]),"species"],tree$tip.label),x=(exp(yfit[sporder])),pch=4,cex=2) points(y=match(trait[is.na(trait[,"V1"]),"species"],tree$tip.label),x=(exp(ypred$values)),pch=22,bg="black",cex=2) # margin of errors arrows(y0=match(trait[is.na(trait[,"V1"]),"species"],tree$tip.label),y1=match(trait[is.na(trait[,"V1"]),"species"],tree$tip.label),x0=(exp(ypred$values)),x1=(exp(ypred$upper)),length=0.05,angle=90) arrows(y0=match(trait[is.na(trait[,"V1"]),"species"],tree$tip.label),y1=match(trait[is.na(trait[,"V1"]),"species"],tree$tip.label),x0=(exp(ypred$values)),x1=(exp(ypred$lower)),length=0.05,angle=90) ### ANCESTRAL STATES ### M. G. Faure-Brac's script ### PACKAGES install.packages('phytools') library(phytools) ### DATA # We keep the same tree but we have to use the results of the PEM (in the file ancData.txt) ancdata <- read.table(file=file.choose(),header=TRUE,stringsAsFactor=FALSE) ### DATASET # Creation of a vector, as recquired by the function 'fastAnc' ancvect <- c(ancdata$V1) names(ancvect) <- c(ancdata$species) ### ANCESTRAL STATES # Inference of ancestral states is performed in a ML simulation, estimating the 95-percent confidence intervals fastAnc(tree, ancvect, method="ML", CI=TRUE)