# Daniel Baller rm(list=ls()) # clearing library(nls2) library(ggpubr) library(rootSolve) # Reading in data ---- PIN=read.csv("C:/Users/daniel.baller/Documents/Research/data/NEW/For Dan PIN Data.csv",header=T) #Healthy and PRE Data UPenn1=read.csv("C:/Users/daniel.baller/Documents/Research/data/NEW/For Dan U Penn.csv",header=T) #reading in data UPenn2=read.csv("C:/Users/daniel.baller/Documents/Research/data/NEW/For Dan U Penn2.csv",header=T) #reading in data UPenn = merge(UPenn1, UPenn2, by = "Study.ID") ST.Barnabas=read.csv("C:/Users/daniel.baller/Documents/Research/data/NEW/For Dan Found all the data for the graph.csv",header=T) #Lots of data, only use ST. B Nadav=read.csv("C:/Users/daniel.baller/Documents/Research/data/NEW/For Dan Nadav R03 Dataset.csv",header=T) #healthy6 at term birth using values greater than 1 WUSTL = read.csv("C:/Users/daniel.baller/Documents/Research/data/NEW/For Dan WUSTL.csv",header=T) # Setting up the different data sets ---- # Starting with UPenn Data to get normal pregnancy fit UPenn.days = c(UPenn$GA.days.Visit.1, UPenn$GA.Days.Visit.2) # Days of pregnancy (Gestational Day) normal pregnancy UPenn.Vol = c(UPenn$Placental.volume.Visit.1, UPenn$Placental.Volume.Visit.2) #placenta volume normal pregnancy UPenn.Vol = UPenn.Vol[-which(UPenn.days == 0)] #cleaning for days =0 UPenn.days = UPenn.days[-which(UPenn.days == 0)] #cleaning for days =0 UPenn.Vol = UPenn.Vol[-which(UPenn.days > 365)] #cleaning for days > 365 UPenn.days = UPenn.days[-which(UPenn.days > 365)] #cleaning for days > 365 UPenn.days = UPenn.days[-which(is.na(UPenn.Vol) == T)] #cleaning for days = n/A UPenn.Vol = UPenn.Vol[-which(is.na(UPenn.Vol)==T)] #cleaning for volume = n/A #Nadav Data days.Nadav = Nadav$Gestatational.Age.in.Days.at.term[which(Nadav$Placental.Weight..at.term>1)] # Days of pregnancy (Gestational Day) normal pregnancy Vol.Nadav = Nadav$Placental.Weight..at.term[which(Nadav$Placental.Weight..at.term>1)] #placenta volume normal pregnancy UPenn.Vol = c(UPenn.Vol, Vol.Nadav) UPenn.days = c(UPenn.days, days.Nadav) # PIN Healthy Data days.PIN.normal = PIN$GestationalDays[which((PIN$GDM..1.Y.+PIN$PIH+PIN$Preeclampsia+PIN$C_HYPER+PIN$C_PRE_DM)==0)] # Days of pregnancy (Gestational Day) normal pregnancy PIN.volume = PIN$Weightaftercuttingcordmembrane[which((PIN$GDM..1.Y.+PIN$PIH+PIN$Preeclampsia+PIN$C_HYPER+PIN$C_PRE_DM)==0)] #placenta volume normal pregnancy PIN.volume = PIN.volume[-which(days.PIN.normal < 259)] # removing preterm births days.PIN.normal = days.PIN.normal[-which(days.PIN.normal < 259)] # removing preterm births days.PIN.normal = days.PIN.normal[-which(PIN.volume < 0)] # removing missing values (-99) PIN.volume = PIN.volume[-which(PIN.volume < 0)] # removing missing values (-99) # ST Barnabas Data days.ST.Barnabas = ST.Barnabas$GA.1 # Days of pregnancy (Gestational Day) normal pregnancy Vol.ST.Barnabas = ST.Barnabas$St..Barnabas #placenta volume normal pregnancy days.ST.Barnabas = days.ST.Barnabas[-which(is.na(Vol.ST.Barnabas) == T)] #cleaning for volume = n/A Vol.ST.Barnabas = Vol.ST.Barnabas[-which(is.na(Vol.ST.Barnabas)==T)] #cleaning for volume = n/A #WUSTL Data days.WUSTL = WUSTL$Gestational.Age..weeks..at.US*7 Vol.WUSTL = WUSTL$placentalvolcm3 ######not healthy data sets ---- # PIN Preeclampsia Data days.PIN.Pre = PIN$GestationalDays[which(PIN$Preeclampsia==1)] # Days of pregnancy (Gestational Day) preeclampsia pregnancy Vol.PIN.Pre = PIN$Weightaftercuttingcordmembrane[which(PIN$Preeclampsia==1)] #placenta volume preeclampsia pregnancy # PIN PIH Data days.PIN.PIH = PIN$GestationalDays[which(PIN$PIH==1)] # Days of pregnancy (Gestational Day) preeclampsia pregnancy Vol.PIN.PIH = PIN$Weightaftercuttingcordmembrane[which(PIN$PIH==1)] #placenta volume preeclampsia pregnancy days.PIN.PIH = days.PIN.PIH[-which(Vol.PIN.PIH < 0)] # removing missing values (-99) Vol.PIN.PIH = Vol.PIN.PIH[-which(Vol.PIN.PIH < 0)] # removing missing values (-99) # Creating X Values ---- x = seq(0,300,.01) #x values for placenta function int.day = seq(0,300,1) # x values using int for each day # Finding K and r for normal pregnancies---- #Combining healthy data Healthy.volume = c(UPenn.Vol, Vol.ST.Barnabas) Healthy.days = c(UPenn.days, days.ST.Barnabas) alldata = data.frame(Healthy.days, Healthy.volume) m<-nls(Healthy.volume~(7102*K*exp(-84*r))/((125*exp(-r*Healthy.days)*K)+(7102*exp(-84*r))-(7102*exp(-r*Healthy.days))),data = alldata, start = list(K=1700,r=.032)) summary(m) c.m = coef(m) # Normal Pregnancy Parameters K = c.m[1] #4.909e+02 # estimated K value from nonlinear least squares fit r = c.m[2] # estimated r value from nonlinear least squares fit # Finding K and r for normal pregnancies with Healthy PINS Data and WUSTL---- # Combining healthy data UPENN, ST B, PINS, WUSTL Healthy.volume2 = c(UPenn.Vol, Vol.ST.Barnabas, PIN.volume,Vol.WUSTL) Healthy.days2 = c(UPenn.days, days.ST.Barnabas, days.PIN.normal,days.WUSTL) alldata2 = data.frame(Healthy.days2, Healthy.volume2) m2<-nls(Healthy.volume2~(7102*K_a*exp(-84*r_a))/((125*exp(-r_a*Healthy.days2)*K_a)+(7102*exp(-84*r_a))-(7102*exp(-r_a*Healthy.days2))),data = alldata2, start = list(K_a=1700,r_a=.032)) summary(m2) c.m2 = coef(m2) K_a = c.m2[1] #4.909e+02 # estimated K value from nonlinear least squares fit r_a = c.m2[2] # estimated r value from nonlinear least squares fit PvT_a = function(x) {(7102*K_a*exp(-84*r_a))/((125*exp(-r_a*x)*K_a)+(7102*exp(-84*r_a))-(7102*exp(-r_a*x)))} # function for expected placenta volume based on fitted values # Finding K_2 and r_2 for preeclampsia pregnancies ---- not.healthy.day = c(days.PIN.Pre,days.PIN.PIH) not.healthy.vol = c(Vol.PIN.Pre,Vol.PIN.PIH) not.healthy = data.frame(not.healthy.day,not.healthy.vol) q<-nls2(not.healthy.vol~(7102*K_2*exp(-84*r_2))/((125*exp(-r_2*not.healthy.day)*K_2)+(7102*exp(-84*r_2))-(7102*exp(-r_2*not.healthy.day))),data = not.healthy,start = list(K_2=1700,r_2=.032)) summary(q) c.q = coef(q) # Preeclampsia Pregnancy Parameters K_2 = c.q[1] r_2 = c.q[2] PvT.Pre = function(x) {(7102*K_2*exp(-84*r_2))/((125*exp(-r_2*x)*K)+(7102*exp(-84*r_2))-(7102*exp(-r_2*x)))} # function for expected placenta volume based on fitted values PvT.Pre2 = function(x) {PvT.Pre(x)-K_2/2} IP2 = uniroot(PvT.Pre2,c(0,360)) inflection.point.pre = IP2$root#inflection point for preclamsia pregnancies # Finding K_2 and r_2 GAM ---- all.volume = c(Healthy.volume2,Vol.PIN.Pre,Vol.PIN.PIH) all.days = c(Healthy.days2, days.PIN.Pre,days.PIN.PIH) type = c(rep(0,length(Healthy.volume2)),rep(1,length(days.PIN.Pre)+length(days.PIN.PIH))) all.data.gam = data.frame(all.volume,all.days,type) q<-nls(all.volume~(7102*K_2*exp(-84*r_2))/((125*exp(-r_2*all.days)*K_2)+(7102*exp(-84*r_2))-(7102*exp(-r_2*all.days)))+type, data = all.data.gam, start = list(K_2=1700,r_2=.032, type = 0)) summary(q) c.q = coef(q) # Preeclampsia Pregnancy Parameters K_2 = c.q[1] r_2 = c.q[2] # Estimating the curve for normal pregnancies ---- PvT = function(x) {(7102*K*exp(-84*r))/((125*exp(-r*x)*K)+(7102*exp(-84*r))-(7102*exp(-r*x)))} # function for expected placenta volume based on fitted values # Calculating the inflection point PvT2 = function(x) {PvT(x)-K/2} IP = uniroot(PvT2,c(0,360)) inflection.point = IP$root # Plotting everything together ---- plot(UPenn.days, UPenn.Vol, main = "Placenta volume measurements over pregnancy",ylim=c(0, 1000), ylab = "Placent Volume(ml)", xlab = "Day of measurement", col = "blue", pch = 18, xlim = c(0,300)) points(days.ST.Barnabas, Vol.ST.Barnabas, col = "red", pch = 17) points(days.PIN.normal, PIN.volume, col = "Green", pch = 19) #Blob covers everything points(days.WUSTL, Vol.WUSTL,col = "light blue", pch = 15) lines(x,PvT(x))#plotting expected placenta volume based on normal values lines(x,PvT_a(x), col = "red") #plotting expected placenta volume based on normal values and PINS Healthy points(inflection.point,PvT(inflection.point), col = "yellow", pch = 16, cex = 2) #plotting inflection point points(days.PIN.Pre, Vol.PIN.Pre, col = "pink", pch = 17) #Plotting preeclampsia data points(days.PIN.PIH, Vol.PIN.PIH, col = "pink", pch = 19) #Plotting preeclampsia data lines(x,PvT.Pre(x), col = "orange") #plotting expected placenta volume based on preeclamsia values points(inflection.point.pre,PvT.Pre(inflection.point.pre), col = "brown", pch = 16, cex = 2) #plotting inflection point #GGPLOT ---- library("ggplot2") y.inflection.point = PvT(inflection.point) all.volume.ggplot = c(UPenn.Vol,Vol.ST.Barnabas,PIN.volume,Vol.WUSTL,Vol.PIN.Pre,Vol.PIN.PIH) all.days.ggplot = c(UPenn.days,days.ST.Barnabas, days.PIN.normal,days.WUSTL,days.PIN.Pre,days.PIN.PIH) all.data.set = c(rep("UPenn",length(UPenn.Vol)),rep("ST. Barnabas",length(Vol.ST.Barnabas)),rep("PIN Healthy",length(PIN.volume)),rep("WUSTL",length(Vol.WUSTL)),rep("PIN Pre",length(Vol.PIN.Pre)),rep("PIN PIH",length(Vol.PIN.PIH))) all.data.ggplot = data.frame(all.volume.ggplot,all.days.ggplot,all.data.set) ggplot(data = all.data.ggplot,aes(x = all.days.ggplot, y = all.volume.ggplot, color = all.data.set))+ geom_point(aes(shape = all.data.set))+ scale_shape_manual(values = c(18,17,19,15,17,19))+ # geom_jitter(height = 1, width = 1)+ xlab("Gestational Days")+ ylab("Placenta Volume")+ stat_function(fun = PvT, geom = "line", color = "black")+ geom_point(aes(x = inflection.point, y = y.inflection.point, colour = "yellow", size = 2)) ?geom_line #estimating the curve for preeclampsia pregnancies PvT.simp.Pre = function(x) {1127660/(1702.97+75669.5*exp(-r_2*x))} # simplified function for expected placenta volume based on fitted values lines(x,PvT.simp.Pre(x)) #plotting expected placenta volume based on preeclamsia values inflection.point.pre = 223.176 #inflection point for preclamsia pregnancies calculated using mathematica points(inflection.point.pre,PvT.simp.Pre(inflection.point.pre), col = "Orange", pch = 16, cex = 2) #plotting inflection point # calculating average distance from normal points to the projected line diference = (UPenn.Vol-PvT.simp(UPenn.days)) mean(abs(diference)) # calculating average distance from preeclampsia points to the projected line diference.pre = (UPenn.Vol.Pre-PvT.simp(days.UPenn.Pre)) mean(abs(diference.pre)) # Smooth scatter plot of normal and preeclaimsia data smoothScatter(c(UPenn.days, days.UPenn.Pre), c(UPenn.Vol, UPenn.Vol.Pre), colramp = colorRampPalette(c("white", blues9))) lines(x,PvT.simp(x)) #plotting expected placenta volume based on normal values points(inflection.point,PvT.simp(inflection.point), col = "yellow", pch = 16, cex = 2) #plotting inflection point lines(x,PvT.simp.Pre(x)) #plotting expected placenta volume based on preeclamsia values points(inflection.point.pre,PvT.simp.Pre(inflection.point.pre), col = "Orange", pch = 16, cex = 2) #plotting inflection point #plotting data using rounded day measurements rounded.days=round(days,0) # rounding day measurement to whole numbers plot(rounded.days,volume, main = "Placenta volume measurements over pregnancy", ylab = "Placent Volume(ml)", xlab = "Rounded Day of measurement") lines(int.day,eq2(int.day)) #plotting log transform of data for normal pregnancies plot(UPenn.days, log(UPenn.Vol), main = "Log Placenta volume measurements over pregnancy", ylab = "LogPlacent Volume (ml)", xlab = "Day of measurement", col = "blue", pch = 18) lines(x,log(PvT.simp(x))) points(days.UPenn.Pre, log(UPenn.Vol.Pre), col = "green", pch = 17) #Plotting preeclampsia data lines(x,log(PvT.simp.Pre(x))) points(inflection.point,log(PvT.simp(inflection.point)), col = "yellow", pch = 16, cex = 2) #plotting inflection point points(inflection.point.pre,log(PvT.simp.Pre(inflection.point.pre)), col = "Orange", pch = 16, cex = 2) #plotting inflection point # calculating average distance from log of normal points to the projected line log.diference = (log(UPenn.Vol)-log(PvT.simp(UPenn.days))) mean(abs(log.diference)) # calculating average distance from log of preeclampsia points to the projected line log.diference.pre = (log(UPenn.Vol.Pre)-log(PvT.simp(days.UPenn.Pre))) mean(abs(log.diference.pre)) #smooth scatter of log values smoothScatter(days, log(volume)) lines(x,log(PvT.simp(x))) lines(x,log(PvT.simp.Pre(x))) points(inflection.point,log(PvT.simp(inflection.point)), col = "yellow", pch = 16, cex = 2) #plotting inflection point points(inflection.point.pre,log(PvT.simp.Pre(inflection.point.pre)), col = "Orange", pch = 16, cex = 2) #plotting inflection point # Two sided t-test on residuals and log residuals t.test(diference, diference.pre, alternative = "two.sided", var.equal = FALSE) t.test(diference, diference.pre, alternative = "two.sided", var.equal = TRUE) t.test(log.diference, log.diference.pre, alternative = "two.sided", var.equal = FALSE) t.test(log.diference, log.diference.pre, alternative = "two.sided", var.equal = TRUE) # Testing for the same variance res.ftest <- var.test(diference,diference.pre) res.ftest res.ftest.log <- var.test(log.diference,log.diference.pre) res.ftest.log # Testing for normality shapiro.test(diference) shapiro.test(diference.pre) shapiro.test(log.diference) shapiro.test(log.diference.pre) ################################################################################################# # Two sided t-test on absolte value of residuals t.test(abs(diference), abs(diference.pre), alternative = "two.sided", var.equal = FALSE) t.test(abs(diference), abs(diference.pre), alternative = "two.sided", var.equal = TRUE) t.test(abs(log.diference), abs(log.diference.pre), alternative = "two.sided", var.equal = FALSE) t.test(abs(log.diference), abs(log.diference.pre), alternative = "two.sided", var.equal = TRUE) # Testing for the same variance res.ftest <- var.test(abs(diference),abs(diference.pre)) res.ftest res.ftest.log <- var.test(abs(log.diference),abs(log.diference.pre)) res.ftest.log # Testing for normality shapiro.test(abs(diference)) shapiro.test(abs(diference.pre)) shapiro.test(abs(log.diference)) shapiro.test(abs(log.diference.pre)) ################################################################################################## # one sided t test on residuals and log residuals t.test(diference, diference.pre, alternative = "less", var.equal = FALSE) t.test(diference, diference.pre, alternative = "less", var.equal = TRUE) t.test(log.diference, log.diference.pre, alternative = "less", var.equal = FALSE) t.test(log.diference, log.diference.pre, alternative = "less", var.equal = TRUE) # same with abs Value t.test(abs(diference), abs(diference.pre), alternative = "less", var.equal = FALSE) t.test(abs(diference), abs(diference.pre), alternative = "less", var.equal = TRUE) t.test(abs(log.diference), abs(log.diference.pre), alternative = "less", var.equal = FALSE) t.test(abs(log.diference), abs(log.diference.pre), alternative = "less", var.equal = TRUE) ?t.test ## #two-samples Wilcoxon test comparing the means of two independent samples (x & y) wilcox.test(diference, diference.pre) wilcox.test(log.diference, log.diference.pre) wilcox.test(abs(diference), abs(diference.pre)) wilcox.test(abs(log.diference), abs(log.diference.pre)) #one sided ?wilcox.test wilcox.test(diference, diference.pre, alternative = "less") wilcox.test(log.diference, log.diference.pre, alternative = "less") wilcox.test(abs(diference), abs(diference.pre), alternative = "less") wilcox.test(abs(log.diference), abs(log.diference.pre), alternative = "less") nsim = simulate(m, nsim = 100) ?simulate rm(K,r) p = nls2(UPenn$U.Penn~(7102*(K+(b*UPenn$PreeClampsia..IUGR)))*exp(-84*(r+(d*UPenn$PreeClampsia..IUGR)))/((125*exp((-r+(d*UPenn$PreeClampsia..IUGR))*UPenn$GA..days..Visit.1)*(K+b*UPenn$PreeClampsia..IUGR))+(7102*exp(-84*(r+d*UPenn$PreeClampsia..IUGR)))-(7102*exp((-r+d*UPenn$PreeClampsia..IUGR)*UPenn$GA..days..Visit.1))),data = UPenn,start = list(K=1700,r=.032, b= 0, d=0)) summary(p) confint(m) confint(q) library(MASS) ?mvrnorm() g = mvrnorm(n=10, c(K,r), vcov(m))-mvrnorm(n=10, c(K_2, r_2), vcov(q)) g combn ?gam library(gam) ?gam airquality ?ggplot2