---
title: "Pan-Genome R Notebook"
output: html_notebook
---
```{r}
library("ape")
library("vegan")
library("philentropy")
library("ggplot2")
setwd("~/Desktop/Intersection_OMCL/")
pan_matrix <- read.table(file = "pangenome_matrix_t0.tab",stringsAsFactors = TRUE, header = TRUE, row.names = 1)
sp <- specaccum(pan_matrix,method="random",permutations=100)
plot(sp,ci.type="poly",col="blue",lwd=2,ci.lty=0,ci.col="lightblue",xlab="Genomes",ylab="Proteins",main="Gene accumulation plot")
```
```{r}
# Rarefaction curve with box and whiskers for the resamplings
plot(sp,ci.type="poly",col="blue",lwd=2,ci.lty=0,ci.col="lightblue",xlab="Genomes",ylab="Proteins",main="Gene accumulation plot")
boxplot(sp,col="yellow",add=TRUE)
```
```{r}
#to plot to file
jpeg('rarefaction_plot.jpg')
plot(sp,ci.type="poly",col="blue",lwd=2,ci.lty=0,ci.col="lightblue",xlab="Genomes",ylab="Proteins",main="Gene accumulation plot")
boxplot(sp,col="yellow",add=TRUE)
#dev.off()
```
```{r}
# Make a histogram based on gene presence absence
# Version one very awkward
n_row <- nrow(pan_matrix) #count number of rows
n_col <- ncol(pan_matrix) #Ditto columns
HG <- vector(mode="numeric", length = 0)
for(col in 1:n_col){
col_count = 0
for(row in 1:n_row){
if(pan_matrix[row,col] > 0){
col_count = col_count + 1
}
}
HG[col] <- col_count
}
t_HG <- t(HG)
hist(HG,col="red",xlab="Present in # Genomes",ylab="# HGs",main="Histogram of HG Frequenies")
```
```{r}
# Make a histogram based on gene presence absence
# Version two (after googling R and histogram)
qplot(HG,
geom="histogram",
binwidth = 0.5,
main = "Histogram of HG Frequenies",
xlab="Present in # Genomes",
ylab="# HGs",
col=I("blue"),
fill=I("red"),
alpha=I(.6)
)
```
To plot this to file
```{r}
#to plot to file
jpeg('histogram_plot2.jpg')
qplot(HG,
geom="histogram",
binwidth = 0.5,
main = "Histogram of HG Frequenies",
xlab="Present in # Genomes",
ylab="# HGs",
col=I("blue"),
fill=I("red"),
alpha=I(.6)
)
#dev.off()
```
```{r}
#make a distance matrix tree based on gene presence absence
r_names <- row.names(pan_matrix)
matrix_dist <- distance(pan_matrix, method = "jaccard")
row.names(matrix_dist) <- r_names
tree_bionj <- bionj(matrix_dist)
plot(tree_bionj)
```
Using gene presence absence create a PCoA plot
```{r}
pcoa <- wcmdscale(matrix_dist)
pcoa_names <- row.names(pcoa)
plot(pcoa,pch=1,cex=2, col=c("red"))
text(pcoa, pcoa_names,cex=0.9)
```
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Cmd+Shift+K* to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike *Knit*, *Preview* does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.