ggplot2
¶library(ggplot2)
library(reshape2)
library(lattice)
head(mtcars)
mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
---|---|---|---|---|---|---|---|---|---|---|---|
Mazda RX4 | 21 | 6 | 160 | 110 | 3.9 | 2.62 | 16.46 | 0 | 1 | 4 | 4 |
Mazda RX4 Wag | 21 | 6 | 160 | 110 | 3.9 | 2.875 | 17.02 | 0 | 1 | 4 | 4 |
Datsun 710 | 22.8 | 4 | 108 | 93 | 3.85 | 2.32 | 18.61 | 1 | 1 | 4 | 1 |
Hornet 4 Drive | 21.4 | 6 | 258 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 |
Hornet Sportabout | 18.7 | 8 | 360 | 175 | 3.15 | 3.44 | 17.02 | 0 | 0 | 3 | 2 |
Valiant | 18.1 | 6 | 225 | 105 | 2.76 | 3.46 | 20.22 | 1 | 0 | 3 | 1 |
with(mtcars, plot(wt, mpg))
ggplot(mtcars, aes(x=wt, y=mpg)) + geom_point()
scatter.smooth(mtcars$wt, mtcars$mpg)
ggplot(mtcars, aes(x=wt, y=mpg)) + geom_point() + geom_smooth(method=loess)
df <- mtcars[order(mtcars$wt),]
with(df, plot(wt, mpg, type="b"))
ggplot(mtcars, aes(x=wt, y=mpg)) + geom_point() + geom_line()
with(mtcars, hist(mpg, breaks=10))
ggplot(mtcars, aes(x=mpg)) + geom_histogram(binwidth=2)
plot(density(mtcars$mpg), main="Density plot")
density(mtcars$mpg)
Call:
density.default(x = mtcars$mpg)
Data: mtcars$mpg (32 obs.); Bandwidth 'bw' = 2.477
x y
Min. : 2.97 Min. :6.481e-05
1st Qu.:12.56 1st Qu.:5.461e-03
Median :22.15 Median :1.926e-02
Mean :22.15 Mean :2.604e-02
3rd Qu.:31.74 3rd Qu.:4.530e-02
Max. :41.33 Max. :6.795e-02
ggplot(mtcars, aes(x=mpg)) +
geom_line(stat="density") +
xlim(2.97, 41.33) +
labs(title="Density plot")
attach(mtcars)
hist(mpg, breaks=10, probability = TRUE, main="")
rug(mpg)
x <- seq(min(mpg), max(mpg), length.out = 50)
lines(x, dnorm(x, mean=mean(x), sd=sd(x)), col="red", lwd=2)
detach(mtcars)
ggplot(mtcars, aes(x=mpg)) +
geom_histogram(aes(y=..density..), binwidth=2, color="black", alpha=0) +
stat_function(fun = dnorm, arg=list(mean=mean(mtcars$mpg), sd=sd(mtcars$mpg)), color="red") +
geom_rug()
with(mtcars, pie(table(carb)))
df <- data.frame(table(mtcars$carb))
colnames(df) <- c("Carb", "Freq")
df
Carb | Freq | |
---|---|---|
1 | 1 | 7 |
2 | 2 | 10 |
3 | 3 | 3 |
4 | 4 | 10 |
5 | 6 | 1 |
6 | 8 | 1 |
ggplot(df, aes(x=1, y=Freq, fill=Carb)) +
geom_bar(stat="identity", color="black") +
coord_polar(theta="y") +
theme(axis.ticks=element_blank(),
axis.text.y=element_blank(),
axis.text.x=element_text(colour='black'),
axis.title=element_blank())
with(mtcars, barplot(table(carb)))
ggplot(mtcars, aes(x=factor(carb))) +
geom_bar()
with(mtcars, barplot(table(carb), horiz=TRUE))
ggplot(mtcars, aes(x=factor(carb))) +
geom_bar() +
coord_flip()
attach(mtcars)
(tbl <- table(carb, am))
barplot(tbl, beside=TRUE, legend=rownames(tbl), col=heat.colors(carb))
detach(mtcars)
am
carb 0 1
1 3 4
2 6 4
3 3 0
4 7 3
6 0 1
8 0 1
# Threebartable = as.data.frame(table(simData$FacVar1, simData$FacVar2, simData$FacVar3)) ## CrossTab
# ggplot(Threebartable,aes(x=Var3,y=Freq,fill=Var2))+geom_bar(position="dodge")+facet_wrap(~Var1) ## Bar plot with facetting
ggplot(mtcars, aes(x=factor(am), fill=factor(carb))) +
geom_bar(position="dodge") +
scale_fill_brewer(palette="Oranges")
boxplot(log1p(mtcars))
head(mtcars)
mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
---|---|---|---|---|---|---|---|---|---|---|---|
Mazda RX4 | 21 | 6 | 160 | 110 | 3.9 | 2.62 | 16.46 | 0 | 1 | 4 | 4 |
Mazda RX4 Wag | 21 | 6 | 160 | 110 | 3.9 | 2.875 | 17.02 | 0 | 1 | 4 | 4 |
Datsun 710 | 22.8 | 4 | 108 | 93 | 3.85 | 2.32 | 18.61 | 1 | 1 | 4 | 1 |
Hornet 4 Drive | 21.4 | 6 | 258 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 |
Hornet Sportabout | 18.7 | 8 | 360 | 175 | 3.15 | 3.44 | 17.02 | 0 | 0 | 3 | 2 |
Valiant | 18.1 | 6 | 225 | 105 | 2.76 | 3.46 | 20.22 | 1 | 0 | 3 | 1 |
df <- melt(mtcars)
ggplot(df, aes(x=variable, y=value)) +
geom_boxplot() +
scale_y_continuous(trans="log1p")
No id variables; using all as measure variables
df <- mtcars[order(-mtcars$mpg),]
dotchart(df$mpg, labels=row.names(df))
df <- mtcars[order(-mtcars$mpg),]
df$names <- as.factor(rownames(df))
ggplot(df, aes(x=reorder(names, -mpg), y=mpg)) +
geom_dotplot(binaxis="y", stackdir="center", binwidth=0.5) +
coord_flip()
dotchart(df$mpg, labels=row.names(df), groups=df$cyl, color=df$cyl, pch=19)
ggplot(df, aes(x=reorder(names, mpg), y=mpg, col=factor(cyl))) +
geom_point() +
facet_grid(. ~ cyl) +
guides(col=FALSE) +
coord_flip()
pairs(~mpg + drat + wt, data=mtcars)
Note: splom
is from the lattice package - ggpolot does not do
scatterplot matrices
splom(mtcars[, c("mpg", "drat", "wt")])
ggplot2
(Grammar of Graphics)¶In addition to the base
plotting facilites we have been using, R
also has the ggplot2
package that can be used to generate beutfiul
graphs. We will only touch on a small subset of ggplot2
capabiliites
here.
References
library(ggplot2)
library(grid)
library(gridExtra)
head(mtcars)
mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
---|---|---|---|---|---|---|---|---|---|---|---|
Mazda RX4 | 21 | 6 | 160 | 110 | 3.9 | 2.62 | 16.46 | 0 | 1 | 4 | 4 |
Mazda RX4 Wag | 21 | 6 | 160 | 110 | 3.9 | 2.875 | 17.02 | 0 | 1 | 4 | 4 |
Datsun 710 | 22.8 | 4 | 108 | 93 | 3.85 | 2.32 | 18.61 | 1 | 1 | 4 | 1 |
Hornet 4 Drive | 21.4 | 6 | 258 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 |
Hornet Sportabout | 18.7 | 8 | 360 | 175 | 3.15 | 3.44 | 17.02 | 0 | 0 | 3 | 2 |
Valiant | 18.1 | 6 | 225 | 105 | 2.76 | 3.46 | 20.22 | 1 | 0 | 3 | 1 |
ggplot(data=mtcars, aes(x=wt, y=mpg)) +
geom_point() +
labs(title="Simple scatter plot", x="Weight", y="Miles per gallon")
ggplot(data=mtcars, aes(x=wt, y=mpg)) +
geom_point(color="blue", size=5) +
geom_smooth(method="loess", color="orange") +
labs(title="Fitting a loess", x="Weight", y="Miles per gallon")
ggplot(data=mtcars, aes(x=wt, y=mpg, color=factor(cyl),, shape=factor(am))) +
geom_point(size=5) +
labs(title="Use shape and color", x="Weight", y="Miles per gallon")
p <- ggplot(mtcars, aes(x=wt, y=mpg))
p +
geom_point(aes(size=hp, color=disp)) +
ggtitle("Use color and size") +
scale_colour_gradientn(colours=heat.colors(10)) +
scale_size(range=c(2, 10))
ggplot(data=mtcars, aes(x=hp, y=mpg, color=factor(cyl))) +
geom_point(size=5) +
facet_grid(am ~ vs, labeller = label_both) +
labs(title="Split plots with conditioning", x="Horsepower", y="Miles per gallon")
p4 <- ggplot(mtcars, aes(x=factor(gear), y=wt)) +
geom_boxplot()
p5 <- ggplot(data.frame(x=seq(0, 2*pi, length.out = 50)), aes(x=x)) +
stat_function(fun=sin, geom="line")
p6 <- ggplot(mtcars, aes(x=mpg, alpha=0.5, fill=factor(gear))) +
geom_density() +
guides(alpha=FALSE, fill=FALSE)
grid.arrange(p4, p5, p6, ncol = 1)
ggplot(mtcars, aes(x=wt, y=mpg)) +
geom_point(colour="black", size = 4.5, show_guide = TRUE) +
geom_point(colour="pink", size = 4, show_guide = TRUE) +
geom_point(aes(shape = factor(cyl))) +
theme_bw(base_size=18) +
theme(aspect.ratio=1)
p <- ggplot(mtcars, aes(x=wt, y=mpg))
p1 <- p +
geom_point() +
stat_smooth(method=lm, se=FALSE)
p2 <- p +
geom_point() +
stat_smooth(method=lm, level=0.95)
p3 <- p +
geom_point() +
stat_smooth(method=loess, color='red')
p4 <- ggplot(mtcars, aes(x=wt, y=mpg, color=factor(am))) +
geom_point() +
geom_smooth(method='loess') +
guides(color=FALSE)
grid.arrange(p1, p2, p3, p4, ncol = 2)
m1 <- lm(mpg ~ wt, data=mtcars)
pred1 <- data.frame(wt=seq(min(mtcars$wt), max(mtcars$wt), length.out=100))
pred <- predict(m1, pred1, se.fit=TRUE)
pred1$mpg <- pred$fit
pred1$low <- pred1$mpg - 1.96*pred$se.fit
pred1$high <- pred1$mpg + 1.96*pred$se.fit
m2 <- loess(mpg ~ wt, data=mtcars)
pred2 <- data.frame(wt=seq(min(mtcars$wt), max(mtcars$wt), length.out=100))
pred2$mpg <- predict(m2, pred2)
p <- ggplot(mtcars, aes(x=wt, y=mpg))
p1 <- p +
geom_point(size=4, color='gray40') +
geom_line(data=pred1)
p2 <- p +
geom_point(size=4, color='gray40') +
geom_line(data=pred1) +
annotate("text", label="r^2 == 0.75", parse=TRUE, x=4.8, y=32)
p3 <- p +
geom_point(size=4, color='gray40') +
geom_line(data=pred1) +
geom_ribbon(data=pred1, aes(ymin=low, ymax=high), alpha=0.3) +
annotate("text", label="r^2 == 0.75", parse=TRUE, x=4.8, y=32)
p4 <- p +
geom_point(size=4, color='blue', alpha=0.5) +
geom_line(data=pred2, color='red', size=1)
grid.arrange(p1, p2, p3, p4, ncol = 2)
ggplot(mtcars, aes(x=mpg, y=am)) +
geom_point(position=position_jitter(width=.3, height=.08), shape=21, alpha=0.6, size=3) +
stat_smooth(method=glm, family=binomial, color="red")