--- title: "Pan-Genome R Notebook" output: html_notebook --- ```{r} library("ape") library("vegan") library("philentropy") library("ggplot2") setwd("~/Desktop/Intersection_OMCL/") pan_matrix <- read.table(file = "pangenome_matrix_t0.tab",stringsAsFactors = TRUE, header = TRUE, row.names = 1) sp <- specaccum(pan_matrix,method="random",permutations=100) plot(sp,ci.type="poly",col="blue",lwd=2,ci.lty=0,ci.col="lightblue",xlab="Genomes",ylab="Proteins",main="Gene accumulation plot") ``` ```{r} # Rarefaction curve with box and whiskers for the resamplings plot(sp,ci.type="poly",col="blue",lwd=2,ci.lty=0,ci.col="lightblue",xlab="Genomes",ylab="Proteins",main="Gene accumulation plot") boxplot(sp,col="yellow",add=TRUE) ``` ```{r} #to plot to file jpeg('rarefaction_plot.jpg') plot(sp,ci.type="poly",col="blue",lwd=2,ci.lty=0,ci.col="lightblue",xlab="Genomes",ylab="Proteins",main="Gene accumulation plot") boxplot(sp,col="yellow",add=TRUE) #dev.off() ``` ```{r} # Make a histogram based on gene presence absence # Version one very awkward n_row <- nrow(pan_matrix) #count number of rows n_col <- ncol(pan_matrix) #Ditto columns HG <- vector(mode="numeric", length = 0) for(col in 1:n_col){ col_count = 0 for(row in 1:n_row){ if(pan_matrix[row,col] > 0){ col_count = col_count + 1 } } HG[col] <- col_count } t_HG <- t(HG) hist(HG,col="red",xlab="Present in # Genomes",ylab="# HGs",main="Histogram of HG Frequenies") ``` ```{r} # Make a histogram based on gene presence absence # Version two (after googling R and histogram) qplot(HG, geom="histogram", binwidth = 0.5, main = "Histogram of HG Frequenies", xlab="Present in # Genomes", ylab="# HGs", col=I("blue"), fill=I("red"), alpha=I(.6) ) ``` To plot this to file ```{r} #to plot to file jpeg('histogram_plot2.jpg') qplot(HG, geom="histogram", binwidth = 0.5, main = "Histogram of HG Frequenies", xlab="Present in # Genomes", ylab="# HGs", col=I("blue"), fill=I("red"), alpha=I(.6) ) #dev.off() ``` ```{r} #make a distance matrix tree based on gene presence absence r_names <- row.names(pan_matrix) matrix_dist <- distance(pan_matrix, method = "jaccard") row.names(matrix_dist) <- r_names tree_bionj <- bionj(matrix_dist) plot(tree_bionj) ``` Using gene presence absence create a PCoA plot ```{r} pcoa <- wcmdscale(matrix_dist) pcoa_names <- row.names(pcoa) plot(pcoa,pch=1,cex=2, col=c("red")) text(pcoa, pcoa_names,cex=0.9) ``` When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Cmd+Shift+K* to preview the HTML file). The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike *Knit*, *Preview* does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.