####################################################################################################################################### #This is code to replicate the analyses reported in the manuscript: #"Heightened condition dependence of the sexual transcriptome as a function of genetic quality in Drosophila melanogaster head tissue" #Antonino Malacrinò, Christopher M. Kimber, Martin Brengdahl and Urban Friberg # #DOI: 10.1098/rspb.2019.0819 # #code developed by Antonino Malacrinò (antonino.malacrino@gmail.com) | 2018-2019 ####################################################################################################################################### ##Load required libraries---- library("DESeq2") library("data.table") library("Rmisc") library("dplyr") library("rstanarm") ##Load data and metadata---- #GEO accession GSE60571 data <- read.table(file.choose(), sep="\t", header = T, row.names = 1) metadata <- read.table(file.choose(), sep="\t", header = T) #select "Metadata.txt" genes.to.delete <- read.table(file.choose(), sep="\t", header = T) #select "Genes_to_delete.txt" metadata <- metadata[which(!metadata$Code=="ED4685"),] metadata <- metadata[which(!metadata$Code2=="Control_OreR"),] #use "Control_OreR" to have w1118 as control, or "Control_w1118" to have OregonR as control data <- data[ ,(colnames(data) %in% metadata$Sample_number)] list.del <-unique(c(as.character(metadata$Code))) #Create a list of deletion names list.del <-list.del[list.del != "Control"] ##DESeq model---- #"Code" indicates a specific line #"Type" indicates either "Deletion" or "Control" cds <- DESeqDataSetFromMatrix(data, metadata, ~ Sex + Type) cds$group <- factor(paste0(cds$Code, cds$Sex, cds$Type)) design(cds) <- ~ group dds <-DESeq(cds, betaPrior=FALSE) countdata <- estimateSizeFactors(cds) cdsBlind <- estimateDispersions(countdata) vstdata <- varianceStabilizingTransformation(cdsBlind) ##Classify sex-biased genes in Control-------- vstdata$combined = factor(paste0(vstdata$Sex, "_", vstdata$Code)) vstPerLvl_sex <- sapply(levels(vstdata$combined), function(lvl) rowMeans(assay(vstdata)[,vstdata$combined == lvl])) vstPerLvl_sex <- setDT(as.data.frame(vstPerLvl_sex), keep.rownames = TRUE)[] vstPerLvl_sex2 <- vstPerLvl_sex[, c("rn", "Female_Control", "Male_Control")] res.de <- results(dds, contrast=c("group", "ControlFemaleControl", "ControlMaleControl")) res.de <- as.data.frame(res.de) res.de <- setDT(res.de, keep.rownames = TRUE)[] res.de <- res.de[,c("rn", "log2FoldChange", "padj")] results.merged <- merge(res.de, vstPerLvl_sex2, by="rn", all.x=TRUE) results.merged$bias <- NA results.merged$bias <- ifelse(results.merged$"Female" > results.merged$"Male" & results.merged$padj < 0.01 & abs(results.merged$log2FoldChange) < 3.5, "Female_biased", ifelse(results.merged$"Male" > results.merged$"Female" & results.merged$padj < 0.01 & abs(results.merged$log2FoldChange) < 3.5, "Male_biased", "Unbiased")) colnames(results.merged)[1] <- "Gene" results.merged$bias[is.na(results.merged$bias)] <- "Unbiased" #This will classify genes with low power as "unbiased", but since they lack log2FC values, they will not be considered in the downstream analyses results.merged.summ <- melt(results.merged, id.vars = c("Gene", "bias"), measure.vars = c("Female_Control", "Male_Control")) results.merged.summ <- summarySE(results.merged.summ, measurevar="value", groupvars=c("bias","variable")) #Build a summary table sb.genes <- results.merged[,c("Gene", "bias")] #Dataframe with a list of Genes and their SB ##Calculate the number of sex bias for each deletion (Table 1)-------- sb_calculator <- function(x){ c1 <- results(dds, contrast=c("group", paste0(x, "Male","Deletion"), paste0(x, "Female","Deletion"))) c1 <- as.data.frame(c1) c1 <- setDT(c1, keep.rownames = TRUE)[] c1 <- c1[,c("rn", "log2FoldChange", "padj")] list <- c("rn", paste("Male_", x, sep=""), paste("Female_", x, sep="")) vstPerLvl_sex <- vstPerLvl_sex[, ..list] aaa <- as.data.frame(vstPerLvl_sex) colnames(aaa) <- c("Gene", "Male", "Female") c1$bias <- ifelse(c1$padj < 0.01 & aaa$Female > aaa$Male & abs(c1$log2FoldChange) < 3.5,"Female_biased", ifelse(c1$padj < 0.01 & aaa$Female < aaa$Male & abs(c1$log2FoldChange) < 3.5,"Male_biased", "Unbiased")) gen.del <- genes.to.delete[which(genes.to.delete$Deletion == x),] c1$bias[c1$rn %in% gen.del$Gene_deleted] <- NA c1 <- c1[,c("bias")] return(c1) } deletion.sbb <- sapply(list.del, function(x){ Deletion <- sb_calculator(x) return(Deletion)}, simplify = FALSE,USE.NAMES = TRUE) deletion.sb2 <- do.call(cbind, deletion.sbb) deletion.sb2 <- cbind(sb.genes, deletion.sb2) sum.sexbiasedg <- data.frame(deletion = character(11), #Build a summary table fb = numeric(11), mb = numeric(11)) sum.sexbiasedg$deletion <- colnames(deletion.sb2[,c(3:13)]) sum.sexbiasedg$deletion <- gsub("\\..*","",sum.sexbiasedg$deletion) sum.sexbiasedg$fb <- apply(deletion.sb2[, c(3:13)], 2, function(x) length(which(x == "Female_biased"))) sum.sexbiasedg$mb <- apply(deletion.sb2[, c(3:13)], 2, function(x) length(which(x == "Male_biased"))) sum.sexbiasedg$total <- apply(sum.sexbiasedg[, c(2,3)], 1, function(x) sum(x)) #Test for differences in number of SB genes between deletions and control #Make sure that the number of SB genes in the control are correct fb.test <- sum.sexbiasedg %>% rowwise() %>% mutate( test_stat = chisq.test(c(fb, 208))$statistic, p_val = chisq.test(c(fb, 208))$p.value) fb.test$p.adj <- p.adjust(fb.test$p_val, method = "fdr", n = 11) mb.test <- sum.sexbiasedg %>% rowwise() %>% mutate( test_stat = chisq.test(c(mb, 156))$statistic, p_val = chisq.test(c(mb, 156))$p.value) mb.test$p.adj <- p.adjust(mb.test$p_val, method = "fdr", n = 11) tot.test <- sum.sexbiasedg %>% rowwise() %>% mutate( test_stat = chisq.test(c(total, 364))$statistic, p_val = chisq.test(c(total, 364))$p.value) tot.test$p.adj <- p.adjust(tot.test$p_val, method = "fdr", n = 11) #Calculate the number of sex bias for each deletion in common with control sb_calculator <- function(x){ c1 <- results(dds, contrast=c("group", paste0(x, "Male","Deletion"), paste0(x, "Female","Deletion"))) c1 <- as.data.frame(c1) c1 <- setDT(c1, keep.rownames = TRUE)[] c1 <- c1[,c("rn", "log2FoldChange", "padj")] list <- c("rn", paste("Male_", x, sep=""), paste("Female_", x, sep="")) vstPerLvl_sex <- vstPerLvl_sex[, ..list] aaa <- as.data.frame(vstPerLvl_sex) colnames(aaa) <- c("Gene", "Male", "Female") c1$bias <- ifelse(c1$padj < 0.01 & aaa$Female > aaa$Male & abs(c1$log2FoldChange) < 3.5,"Female_biased", ifelse(c1$padj < 0.01 & aaa$Female < aaa$Male & abs(c1$log2FoldChange) < 3.5,"Male_biased", "Unbiased")) gen.del <- genes.to.delete[which(genes.to.delete$Deletion == x),] c1$bias[c1$rn %in% gen.del$Gene_deleted] <- NA c1 <- c1[,c("bias")] return(c1) } deletion.sbb <- sapply(list.del, function(x){ Deletion <- sb_calculator(x) return(Deletion)}, simplify = FALSE,USE.NAMES = TRUE) deletion.sb2 <- do.call(cbind, deletion.sbb) deletion.sb2 <- cbind(sb.genes, deletion.sb2) sb.genes <- results.merged[,c("Gene", "bias")] deletion.sb2 <- do.call(cbind, deletion.sbb) deletion.sb2 <- cbind(sb.genes, deletion.sb2) distrib.mb <- subset(deletion.sb2, bias == "Male_biased") distrib.fb <- subset(deletion.sb2, bias == "Female_biased") sum.losingSB <- data.frame(deletion = character(11), fb = numeric(11), mb = numeric(11)) sum.losingSB$deletion <- colnames(distrib.mb[,c(3:13)]) #Build a summary table sum.losingSB$fb <- apply(distrib.fb[, c(3:13)], 2, function(x) length(which(x == "Female_biased"))) sum.losingSB$mb <- apply(distrib.mb[, c(3:13)], 2, function(x) length(which(x == "Male_biased"))) sum.losingSB$total <- apply(sum.losingSB[, c(2,3)], 1, function(x) sum(x)) ##Contrast deletion lines towards control (Figure 1)-------- contrast_calculator <- function(x, y){ c1 <- results(dds, contrast=c("group", paste0(x, y,"Deletion"),paste0("Control",y,"Control"))) c1 <- as.data.frame(c1) c1 <- setDT(c1, keep.rownames = TRUE)[] c1 <- c1[,c("rn", "log2FoldChange")] gen.del <- genes.to.delete[which(genes.to.delete$Deletion == x),] c1$log2FoldChange[c1$rn %in% gen.del$Gene_deleted] <- NA c1 <- c1[,c("log2FoldChange")] return(c1)} contrast.ge.cd.m <- sapply(list.del, function(x){ Deletion <- contrast_calculator(x, "Male") return(Deletion)}, simplify = FALSE,USE.NAMES = TRUE) contrast.ge.cd.f <- sapply(list.del,function(x){ Deletion <- contrast_calculator(x, "Female") return(Deletion)}, simplify = FALSE,USE.NAMES = TRUE) female.ge.cd2 <- do.call(cbind, contrast.ge.cd.f) female.ge.cd2 <- cbind(sb.genes, female.ge.cd2) male.ge.cd2 <- do.call(cbind, contrast.ge.cd.m) male.ge.cd2 <- cbind(sb.genes, male.ge.cd2) female.ge.melted <- melt(female.ge.cd2, id.vars = c("Gene", "bias")) male.ge.melted <- melt(male.ge.cd2, id.vars = c("Gene", "bias")) final.ge.cd.melted <- cbind(female.ge.melted, male.ge.melted) final.ge.cd.melted <- final.ge.cd.melted[,c(1,2,3,4,8)] colnames(final.ge.cd.melted) <- c("Gene", "Bias", "Deletion", "Female", "Male") final.ge.cd.melted <- final.ge.cd.melted[complete.cases(final.ge.cd.melted),] final.ge.cd.melted <- melt(final.ge.cd.melted, id.vars = c("Gene", "Bias", "Deletion")) final.ge.cd.melted$value[abs(final.ge.cd.melted$value) > 3.5] <- NA #This will give data to plot Figure 1 ##Bayesian model (Fig. 1)---- m <- stan_glmer(value ~ variable * Bias + (1|Deletion), data = final.ge.cd.melted, cores = 16, seed = 100) ##Correlation between degree of condition dependence and degree of sex-bias (Figure 2)---- #Calculate SB in control line c1 <- results(dds, contrast=c("group", "ControlMaleControl", "ControlFemaleControl")) c1 <- as.data.frame(c1) c1 <- setDT(c1, keep.rownames = TRUE)[] c1 <- c1[,c("rn", "log2FoldChange")] c1[which(abs(c1$log2FoldChange) > 3.5),] <- NA c2 <- results(dds, contrast=c("group", "ControlFemaleControl", "ControlMaleControl")) c2 <- as.data.frame(c2) c2 <- setDT(c2, keep.rownames = TRUE)[] c2 <- c2[,c("rn", "log2FoldChange")] c2[which(abs(c2$log2FoldChange) > 3.5),] <- NA #Contrast deletion towards control to get CD contrast_calculator <- function(x, y){ c1 <- results(dds, contrast=c("group", paste0("Control",y,"Control"),paste0(x, y,"Deletion"))) c1 <- as.data.frame(c1) c1 <- setDT(c1, keep.rownames = TRUE)[] c1 <- c1[,c("rn", "log2FoldChange")] gen.del <- genes.to.delete[which(genes.to.delete$Deletion == x),] c1$log2FoldChange[c1$rn %in% gen.del$Gene_deleted] <- NA c1[which(abs(c1$log2FoldChange) > 3.5),] <- NA c1 <- c1[,c("log2FoldChange")] return(c1)} contrast.ge.cd.m <- sapply(list.del, function(x){ Deletion <- contrast_calculator(x, "Male") return(Deletion)}, simplify = FALSE,USE.NAMES = TRUE) contrast.ge.cd.f <- sapply(list.del,function(x){ Deletion <- contrast_calculator(x, "Female") return(Deletion)}, simplify = FALSE,USE.NAMES = TRUE) female.ge.cd2 <- do.call(cbind, contrast.ge.cd.f) female.ge.cd2 <- cbind(sb.genes, female.ge.cd2) male.ge.cd2 <- do.call(cbind, contrast.ge.cd.m) male.ge.cd2 <- cbind(sb.genes, male.ge.cd2) cod.depend.males = sb.genes cod.depend.males$CD <- rowMeans(male.ge.cd2[, 3:13]) cod.depend.males$SB <- c1$log2FoldChange cod.depend.males$SB2 <- c2$log2FoldChange #This will give data to plot Figure 2A and 2B cod.depend.females = sb.genes cod.depend.females$CD <- rowMeans(female.ge.cd2[, 3:13]) cod.depend.females$SB <- c1$log2FoldChange cod.depend.females$SB2 <- c2$log2FoldChange #This will give data to plot Figure 2C and 2D cor.test( ~ CD + SB, data = cod.depend.males[which(bias == "Male_biased")], method = "pearson", continuity = FALSE) cor.test( ~ CD + SB2, data = cod.depend.males[which(bias == "Female_biased")], method = "pearson", continuity = FALSE) cor.test( ~ CD + SB, data = cod.depend.females[which(bias == "Male_biased")], method = "pearson", continuity = FALSE) cor.test( ~ CD + SB2, data = cod.depend.females[which(bias == "Female_biased")], method = "pearson", continuity = FALSE) ##Tissue specificity analysis (ESM7) t.sp <- read.table(file.choose(), header = T, sep = "\t") #select "Tissue_specificity.txt" t.sp <- t.sp[which(t.sp$Gene!="---"),] row_sub = apply(t.sp[, 2:16], 1, function(row) all(row >1 )) t.sp <- t.sp[row_sub,] t.sp[, 2:16] <- log(t.sp[, 2:16]) tau<-function(x){ t<-sum(1-x/max(x))/(length(x)-1) return(t) } t.sp$TAU <- NA t.sp$TAU <- apply(t.sp[, 2:15], 1, tau) t.sp$Tissue <-colnames(t.sp[, 2:15])[apply(t.sp[, 2:15],1,which.max)] cod.depend.females2 <- cod.depend.females[,c("Gene", "CD")] cod.depend.males2 <- cod.depend.males[,c("Gene", "CD")] t.sp2 <- t.sp[,c("Gene", "TAU")] t.sp2 <- merge(t.sp2, sb.genes, by = "Gene") t.sp2 <- merge(t.sp2, cod.depend.females2, by = "Gene") t.sp2 <- merge(t.sp2, cod.depend.males2, by = "Gene") m.f <- lm(abs(CD.x) ~ TAU, t.sp2, na.action=na.exclude) rm.f <-as.data.frame(residuals(m.f)) m.m <- lm(abs(CD.y) ~ TAU, t.sp2, na.action=na.exclude) rm.m <-as.data.frame(residuals(m.m)) residuals.plot <- t.sp2[,c("Gene", "bias")] residuals.plot <- cbind(residuals.plot, rm.f) residuals.plot <- cbind(residuals.plot, rm.m) colnames(residuals.plot) <- c("Gene", "bias", "Res.f", "Res.m") # residuals.plot is the dataset used for ESM7 residuals.plot2 <- melt(residuals.plot, id.vars = c("Gene", "bias")) m <- stan_glm(value ~ variable * bias, data = residuals.plot2, family = gaussian, cores = 16, seed = 100)