# par is used to specify global graphics parameters
# adn affects of subsequent graph functions

par(pch=1)       # defines the plotting symbol
par(lty=2)       # defines the line type
par(lwd=1)       # line's width
par(col="black") # plotting color
par(bg="white")  # background color

par(mfrow=c(2,2))  # number of plots/(row,column) (plots filled row-wise)
par(mfcol=c(2,2))  # number of plots/(row,column) (plots filled col-wise)
par(mfcol=c(1,1))  # default

# ?par check more options

par("mar")  # check the current value of a given attribute, in this case the margins' size

Scatter plots

xs <- seq(-2,2,.05)
plot(xs,dnorm(xs),xlab="",ylab="", pch=20)

lines(c(-2,2),c(.05,.4), col="green", lwd=2) #  add a line to the current plot
points(seq(-2,2,length.out=10),rep(.2,10),pch=20, col="blue")     # add points to the plot
text(-1,.1,"extra text")       # add labels to the plot
title(main="title",sub="subtitle",xlab="x-axis",ylab="y-axis") # add annotations to plot
legend("topleft",c("a label","other label"), col=1:2, pch=20)  # add a legend
axis(4)               # add axis to the other edges
mtext("more text",3)  # (1=bottom, 2=left, 3=top, 4=right).

x <- seq(0,1,.025)
plot(x,dbeta(x,4,2), type="l")  # unite points using a line
curve(dbeta(x,2,4),from=0,to=1,type="l",col="red",add=TRUE) # add a new curve to the plot
abline(h=seq(0,2,.25),v=seq(0,1,.2),col="grey") # add a grid

# making several plots at once
xs <- seq(0,1,.05)
parameters <- c(.5,1,2,5)
par(mfrow=c(length(parameters),length(parameters)))
for(a in parameters) {
  for(b in parameters) {
    plot(xs,dbeta(xs,a,b), type="l", col="red",
         xlab=paste("beta(",a,",",b,")"), ylab="density")
  }
}

x <- rnorm(100)     
y <- x + rnorm(100) 
plot(x,y,type="n")  # plots everything except the data

#let's say that the first half of (x,y) are males, and the last half are females

#define levels for that
gender = gl(2,50,labels=c("male","female"))
#plot male points in blue
points(x[gender=="male"], y[gender=="male"], col="blue")
#plot male points in red
points(x[gender=="female"], y[gender=="female"], col="red")
abline(lm(x~y),col="purple")

# points can have size too:
set.seed(4321)
n.trees <- 100
tree.data <- data.frame(size   = floor(rexp(n.trees, 1/50)),
                        value  = rnorm(n.trees,1000,55),
                        age    = floor(rexp(n.trees, 1/50)))
tree.data$weight <- tree.data$age/50 * floor(rexp(n.trees, 1/150))

head(tree.data)
##   size     value age weight
## 1   51 1002.3564  40 149.60
## 2    1  967.6719   4   2.96
## 3   26  942.8321  47 109.98
## 4   25 1008.5532  16  14.08
## 5   29  991.2168  62 219.48
## 6   15 1102.1071  36  18.72
plot(tree.data$value, tree.data$weight, cex=log(tree.data$size+1),
     xlab="dollars", ylab="tons",main="Size of trees defined by point area")

# we can also divide trees by age assigning different colors
f <- function(age) { 
  if (age<50)
    return ("YOUNG")
  else if (age<90)
    return ("ADULT")
  else 
    return ("OLD")
}
tree.data$age.cat <- Map(f,tree.data$age)
head(tree.data)
##   size     value age weight age.cat
## 1   51 1002.3564  40 149.60   YOUNG
## 2    1  967.6719   4   2.96   YOUNG
## 3   26  942.8321  47 109.98   YOUNG
## 4   25 1008.5532  16  14.08   YOUNG
## 5   29  991.2168  62 219.48   ADULT
## 6   15 1102.1071  36  18.72   YOUNG
plot(tree.data$value, tree.data$weight, 
     xlab="dollars", ylab="tons", type="n",main="Size of trees defined by point area")
colors <- c("blue","green","red")
labels <- c("YOUNG","ADULT","OLD")
for(i in 1:3 ) {
  points(tree.data$value[tree.data$age.cat==labels[i]], 
         tree.data$weight[tree.data$age.cat==labels[i]], 
         cex=log(tree.data$size[tree.data$age.cat==labels[i]]+1), col=colors[i])
}
legend("topleft", tolower(labels), col = colors, pch = 20)  # add a legend

How to present and compute areas for a range of, say, a normal distribution

mean <- 0    # mean parameter 
sd   <- 1    # sd parameter
lb   <- -1.5 # lower bound
ub   <- 1.25 # upper bound

x  <- seq(-4,4,length=100)*sd + mean
hx <- dnorm(x,mean,sd)

plot(x, hx, type="n", xlab="y", ylab="Density",
     main="Normal Distribution")

i <- x >= lb & x <= ub
lines(x, hx)
# polygon draws the polygons whose vertices are given in x and y
polygon(c(lb,x[i],ub), c(0,hx[i],0), col="red")

area <- pnorm(ub, mean, sd) - pnorm(lb, mean, sd)
result <- paste("P(",lb,"< Y <",ub,") =", signif(area, digits=3))
# place label on top of graph
mtext(result,3)

# Another eg:
lambda <- 1/20 # sd parameter
lb   <- 0      # lower bound
ub   <- 60     # upper bound

x  <- seq(lb,ub*1.5,length=100)
hx <- dexp(x,lambda)
plot(x, hx, type="n", xlab="y", ylab="Density")
i <- x >= lb & x <= ub
lines(x, hx)
# polygon draws the polygons whose vertices are given in x and y
polygon(c(lb,x[i],ub), c(0,hx[i],0), col="red")

area <- pexp(ub, 1/20) - pexp(lb, 1/20)
result <- paste("P(",lb,"< Y <",ub,") =", signif(area, digits=6))
# place label on top of graph
mtext(result,3)

title(bquote(paste("Exponential Distribution with ", lambda, "= 1/20")))

Histogram, Boxplot and Barplot

Histogram shows the distribution of observations.

normal.data <- rnorm(200,mean=2,sd=1)

hist(normal.data, breaks=25, freq=FALSE) # freq decides between frequency vs. density
rug(normal.data)                         # shows actual data points
# A density plot captures the distribution shape by smoothing the histogram. You can specify the amount/type of smoothing.
lines(density(normal.data,bw=.1),col="black",lwd=2)  # bw defines the density smoothness
lines(density(normal.data),col="red",lwd=2)
lines(density(normal.data,bw=.5),col="blue",lwd=2)

boxplot(normal.data)

# Let's make some fake data:
some.data <- data.frame(name=sample(letters,26), 
                        age=signif(rnorm(26,45,10),2))
some.data$salary=(some.data$age/45)*signif(runif(26,1000,2000),4)
some.data$profession=sample(gl(2, 13, labels = c("Guard", "Driver")),26)
head(some.data)
##   name age   salary profession
## 1    a  47 1285.711      Guard
## 2    i  44 1327.822     Driver
## 3    k  52 1725.244      Guard
## 4    t  45 1842.000      Guard
## 5    r  33  756.800     Driver
## 6    s  54 1274.400     Driver
# we can boxplot this stuff divided by a category, say profession:
boxplot(some.data$salary ~ some.data$profession)

# we can try do the same thing with age, but we need to cut some intervals first
c1 <- cut(some.data$age, breaks = seq(25,65,by=10))
table(c1)
## c1
## (25,35] (35,45] (45,55] (55,65] 
##       6       8       8       3
some.data$age.interval <- c1
boxplot(some.data$salary ~ some.data$age.interval, xlab="ages", ylab="wages")

size <- 200
another.data <- data.frame(id=sample(1:size), 
                           value=sample(seq(100,900,by=100),size,rep=TRUE))
head(another.data)
##    id value
## 1  65   900
## 2  43   100
## 3 127   700
## 4 199   300
## 5  34   400
## 6  13   100
# to show a histogram of the frequency of values, we use barplot
barplot(table(another.data$value),xlab="frequency", ylab="value",
        horiz=TRUE,xlim=c(0,5+max(table(another.data$value))))