- What is a (data) plot?
- What are the three most important data plots?
2016-02-24
How would you describe this plot?
Elements of a plot
Additional components
Extracted from http://openexchangerates.org, extracted using the json api, with the R package, jsonlite.
library(readr) rates <- read_csv("http://dicook.github.io/Monash-R/data/rates.csv") rates[1:5,1:8] #> Source: local data frame [5 x 8] #> #> date AED AFN ALL AMD ANG AOA ARS #> (date) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl) #> 1 2015-02-23 3.672900 57.33792 123.8969 478.692 1.78968 105.9075 8.702166 #> 2 2015-02-24 3.672069 57.35200 123.7132 478.608 1.78958 106.1014 8.696728 #> 3 2015-02-25 3.673324 57.32655 123.5259 478.616 1.78954 106.1913 8.715239 #> 4 2015-02-26 3.673028 57.52745 124.5801 480.294 1.78956 106.3042 8.720107 #> 5 2015-02-27 3.672648 57.33172 124.8491 478.812 1.78958 106.3389 8.721236
If you'd like to collect exchange rates yourself, see here.
qplot(date, AUD, data=rates)
qplot(date, AUD, data=rates, geom="line")
qplot(date, AUD, data=rates, geom=c("line", "point"))
ggplot(data=rates, aes(x=date, y=AUD)) + geom_point() + geom_line()
ggplot(data=rates, aes(x=date, y=AUD)) + geom_line() + geom_line(aes(y=NZD), colour="blue") + geom_line(aes(y=GBP), colour="red")
rates.sub <- select(rates, date, AUD, NZD, GBP) rates.sub.m <- gather(rates.sub, currency, rate, -date) qplot(date, rate, data=rates.sub.m, geom="line", colour=currency)
## Emphasis on magnitude of cross rate
rates.sub <- mutate(rates.sub, AUD=scale(AUD), NZD=scale(NZD), GBP=scale(GBP)) rates.sub$date <- as.Date(rates.sub$date) rates.sub.m <- gather(rates.sub, currency, rate, -date) qplot(date, rate, data=rates.sub.m, geom="line", colour=currency)
Set the linetype/shape to be different for the different currencies.
Set the linetype/shape to be different for the different currencies.
qplot(date, rate, data=rates.sub.m, geom="line", colour=currency, linetype=currency, lwd=I(2))
qplot(date, rate, data=rates.sub.m, geom=c("line", "point"), colour=currency, linetype=currency, shape=currency)
qplot(AUD, NZD, data=rates.sub) + theme(aspect.ratio=1)
qplot(AUD, NZD, data=rates.sub, geom="line") + theme(aspect.ratio=1)
Problem: line only connects points from left to right along the x axis
qplot(AUD, NZD, data=rates.sub, geom="path", colour=order(date)) + theme(aspect.ratio=1)
qplot(AUD, NZD, data=rates.sub, geom=c("density2d", "point")) + theme(aspect.ratio=1)
AUD <- rates[,c("date", "AUD")] AUD.1 <- lag(AUD$AUD, 1) AUD.2 <- lag(AUD$AUD, 2) AUD.7 <- lag(AUD$AUD, 7) qplot(AUD, AUD.1, data=rates.sub) + theme(aspect.ratio=1) qplot(AUD, AUD.2, data=rates.sub) + theme(aspect.ratio=1) qplot(AUD, AUD.7, data=rates.sub) + theme(aspect.ratio=1)
Look up geom_rug
in the ggplot2 cheat sheet and add the marginal distributions to a scatterplot of AUD and NZD.
qplot(AUD, data=rates.sub, geom="histogram")
Only one variable was passed to the plot command, but two axes are shown in the histogram. What happened? What is plotted on the vertical axis?
qplot(AUD, data=rates.sub, geom="density", fill=I("black"))
qplot(date, rate, data=rates.sub.m, geom="line", colour=currency) + xlab("Date") + ylab("Standardized rates") + ggtitle("Cross rates 23/2/2015-11/11/2015")
qplot(date, rate, data=rates.sub.m, geom="line", colour=currency) + xlab(expression(Date[i]^2~ mu ~ pi * sigma)) + ylab("Standardized rates") + ggtitle("Cross rates 23/2/2015-11/11/2015")
rates.sub.m$date <- as.POSIXct(rates.sub.m$date) p <- qplot(date, rate, data = rates.sub.m, geom = "line", colour = currency) + scale_x_datetime(breaks = date_breaks("1 month"), labels = date_format("%b")) + scale_y_continuous("Standardized rates") p
p + theme(legend.position = "bottom")
library(ggthemes) p + theme_tufte()
p + theme_economist()
p + scale_color_brewer("", palette = "Dark2")
library(dichromat) clrs <- hue_pal()(3) p + scale_color_manual("", values=clrs) + theme(legend.position = "none") clrs <- dichromat(hue_pal()(3)) p + scale_color_manual("", values=clrs) + theme(legend.position = "none")
library(RColorBrewer) clrs <- brewer.pal(3, "Dark2") p + scale_color_manual("", values=clrs) + theme(legend.position = "none") clrs <- dichromat(brewer.pal(3, "Dark2")) p + scale_color_manual("", values=clrs) + theme(legend.position = "none")
p <- qplot(date, rate, data = rates.sub.m, geom = "line", colour = currency, linetype=I(2)) + scale_x_datetime(breaks = date_breaks("1 month"), labels = date_format("%b")) + scale_y_continuous("Standardized rates") p + geom_smooth() + scale_color_brewer(palette="Dark2")
How has the EUR changed relative to the USD over the course of this year? What about the JPY?
qplot(date, rate, data=rates.sub2.m, group=currency, geom="line", alpha=I(0.5)) + facet_wrap(~cl, ncol=3)
library(maps) world <- map_data("world") ggplot(data=world) + geom_path(aes(x=long, y=lat, group=group, order=order)) + theme_solid() ggplot(data=world) + geom_polygon(aes(x=long, y=lat, group=group, order=order), fill="grey70") + theme_solid()
rates.map <- merge(rates.countries, world, by.x="name", by.y="region") rates.map <- rates.map[order(rates.map$order),] rates.map$cl <- factor(rates.map$cl) ggplot(data=rates.map) + geom_polygon(aes(x=long, y=lat, group=group, order=order, fill=cl)) + scale_fill_brewer(palette="Dark2") + theme_solid() + theme(legend.position="None")
Map and line plots
p1 <- ggplot(data=subset(rates.map, cl==3)) + geom_polygon(aes(x=long, y=lat, group=group, order=order), fill="#1B9E77") + geom_path(aes(x=long, y=lat, group=group, order=order), data=world, colour="grey90") + theme_solid() + theme(legend.position="None") p2 <- ggplot(data=subset(rates.sub2.m, cl==3)) + geom_line(aes(x=date, y=rate, group=currency), alpha=0.2, colour="#1B9E77") + theme(legend.position="None") p3 <- ggplot(aes(x=long, y=lat, group=group, order=order), data=subset(rates.map, cl==2)) + geom_polygon(fill="#D95F02") + geom_path(data=world, colour="grey90") + theme_solid() + theme(legend.position="None") p4 <- ggplot(data=subset(rates.sub2.m, cl==2)) + geom_line(aes(x=date, y=rate, group=currency), alpha=0.2, colour="#D95F02") + theme(legend.position="None")
library(gridExtra) grid.arrange(p1, p2, p3, p4, ncol=2)
!! Requires internet connection
library(ggmap) melb <- get_map(location=c(144.9631, -37.8136)) ggmap(melb) + theme_solid()
poll_loc <- read_csv("http://dicook.github.io/Monash-R/data/polling-places.csv") ggmap(melb) + geom_point(data=poll_loc, aes(x=Long, y=Lat)) + theme_solid()
Work out how to get a watercolor map as the background.
melb <- get_map(location=c(144.9631, -37.8136), maptype="watercolor") ggmap(melb) + theme_solid()
Plots
Data
internet <- read_csv("http://dicook.github.io/Monash-R/data/internet.csv") qplot(`Social networks`, data=internet, geom="bar", binwidth=0.5) + facet_grid(Gender~name)
ggplot( data=internet) + geom_bar(aes(x=`Social networks`, fill=Gender)) + facet_wrap(~name, ncol=5) + theme(legend.position="bottom")
ggplot( data=internet) + geom_bar(aes(x=`Social networks`, fill=Gender), position="dodge") + facet_wrap(~name, ncol=5) + theme(legend.position="bottom")
grad <- read_csv("http://dicook.github.io/Monash-R/data/graduate-programs.csv") qplot(subject, AvGREs, data=grad, geom="boxplot")
How do the four programs compare in terms of average number of publications? Number of students?
Can you find the odd one out?
Is it easier now?
What's wrong with this plot? Can you answer: Is the proportion of girls who use social networks every day (4) higher than boys, in Australia? And is this different in Germany?
Is this easier? But what about answering this: Are German girls more likely to report using social networks once or twice per month (1) than Japanese girls?
Are German girls more likely to report using social networks once or twice per month (1) than Japanese girls? Easier?
Using rmarkdown generate a document with several plots to answer these questions:
This work is licensed under the Creative Commons Attribution-Noncommercial 3.0 United States License. To view a copy of this license, visit http://creativecommons.org/licenses/by-nc/ 3.0/us/ or send a letter to Creative Commons, 171 Second Street, Suite 300, San Francisco, California, 94105, USA.