First we will make up, errrr, simulate data

set.seed(13342143)
sessionInfo()
## R version 3.2.2 (2015-08-14)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.10.5 (Yosemite)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] magrittr_1.5    formatR_1.2     tools_3.2.2     htmltools_0.2.6
##  [5] yaml_2.1.13     stringi_0.5-5   rmarkdown_0.7   knitr_1.11     
##  [9] stringr_1.0.0   digest_0.6.8    evaluate_0.7.2
setwd("~/Desktop/Projects/NGS-course/SLDC")

# simulate read counts under a log normal distribution
go.1 <- rlnorm(n = 1e4, meanlog = 3, sdlog = 1)
head(go.1)
## [1] 104.032753  21.115618  13.296879   7.716048  28.856369  33.456710
hist(go.1, col = "grey", main = "", las = 1, breaks = 50)

Now we will simulate data for males and females under equal expression

#Male data
M.exp <- go.1 + rnorm(1e4, mean = 0, sd = 10)
M.exp[M.exp < 0.1] <- 0.1 # we will be using log2, and don't want the world to implode.

#Female data
F.exp <- go.1 + rnorm(1e4, mean = 0, sd = 10)
F.exp[F.exp < 0.1] <- 0.1

# Combine into data.frame
df.2 <- data.frame(exp = go.1, male = M.exp, female = F.exp, chr = as.factor(rep(1:10)))
head(df.2)
##          exp       male    female chr
## 1 104.032753 108.777455 104.35075   1
## 2  21.115618  26.920815  35.79136   2
## 3  13.296879   8.237346  11.17147   3
## 4   7.716048   0.100000  25.14156   4
## 5  28.856369  38.871033  26.13981   5
## 6  33.456710  39.967444  27.40130   6
tail(df.2)
##             exp      male    female chr
## 9995  22.001704 25.884432 35.098484   5
## 9996  12.251902 21.353553  7.301167   6
## 9997   3.623728  0.100000  0.100000   7
## 9998   1.507560  1.495755 23.806271   8
## 9999  14.583080 15.142078 10.117831   9
## 10000 51.379336 46.607079 45.080487  10
str(df.2)
## 'data.frame':    10000 obs. of  4 variables:
##  $ exp   : num  104.03 21.12 13.3 7.72 28.86 ...
##  $ male  : num  108.78 26.92 8.24 0.1 38.87 ...
##  $ female: num  104.4 35.8 11.2 25.1 26.1 ...
##  $ chr   : Factor w/ 10 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...

We can explore the data a bit:

plot(M.exp, F.exp, pch = 19, las = 1, col = rgb(0, 0, 0, 0.2))

#### Make your own M:A plot
plot(log2(go.1), log2(M.exp) - log2(F.exp), pch = 19, col = rgb(0, 0, 0, 0.2), ylim = c(-10, 10), xlim = c(0, 10), las = 1, xlab = expression(paste("log Expression")), ylab = expression(paste("log"[2], " Male - log"[2], " Female")))