Mosaic plots with ggplot2

Haley Jeppson and Heike Hofmann

2017-02-08

Introduction

Basic Explanation of ggmosaic

Creation of ggmosaic

ggmosaic was created primarily using ggproto and the productplots package

ggproto allows you to extend ggplot2 from within your own packages

ggplot2 limitations

ggplot2 is not capable of handling a variable number of variables

These limitations also lead to issues with the labeling, but those can be manually fixed.

geom_mosaic: setting the aesthetics

Aesthetics that can be set:

These values are then sent through productplots functions to create the formula for the desired distribution

Formula: weight ~ fill + x | conds

From the aesthetics to the formula

Example of how the formula is built

  • weight = 1
  • x = product(Y, X)
  • fill = W
  • conds = Z

These aesthetics set up the formula for the distribution:

Formula: 1 ~ W + Y + X | Z

Because a mosaic plot is constructed hierarchically through alternating spines, the ordering of the variables is very important.

Weight ~ X

 ggplot(data = NHANES) +
   geom_mosaic(aes(weight = Weight, x = product(SleepHrsNight), fill=factor(SleepHrsNight)), na.rm=TRUE) +
   labs(x="Hours of sleep a night ", title='f(SleepHrsNight)') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

Weight ~ Y + X

 ggplot(data = NHANES) +
   geom_mosaic(aes(weight = Weight, x = product(SleepHrsNight, AgeDecade), fill=factor(SleepHrsNight)), na.rm=TRUE) +    theme(axis.text.x=element_text(angle=-25, hjust= .1)) + labs(x="Age in Decades ", title='f(SleepHrsNight | AgeDecade) f(AgeDecade)') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

Weight ~ X + Y / Z

 ggplot(data = NHANES) +
   geom_mosaic(aes( x = product(SleepHrsNight, AgeDecade), fill=factor(SleepHrsNight), conds=product(Gender)), na.rm=TRUE, divider=mosaic("v")) +    theme(axis.text.x=element_text(angle=-25, hjust= .1)) + labs(x="Age in Decades ", title='f(SleepHrsNight, AgeDecade | Gender)') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

Alternative to conditioning: facetting

ggplot(data = NHANES) +
   geom_mosaic(aes( x = product(SleepHrsNight, AgeDecade), fill=factor(SleepHrsNight)), na.rm=TRUE) +    theme(axis.text.x=element_text(angle=-25, hjust= .1)) +
   labs(x="Age in Decades ", title='f(SleepHrsNight, AgeDecade | Gender)')  + facet_grid(Gender~.) + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

Importance of ordering

order1 <- ggplot(data = NHANES) + geom_mosaic(aes(weight = Weight, x = product(SleepHrsNight, Gender), fill=factor(SleepHrsNight)), na.rm=TRUE, offset=0.015) + labs(x="Gender ", title='f(SleepHrsNight | Gender)  f(Gender)') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE)) + theme(plot.title = element_text(size = rel(1)))

order2<- ggplot(data = NHANES) + geom_mosaic(aes(weight = Weight, x = product(Gender, SleepHrsNight), fill=factor(SleepHrsNight)), na.rm=TRUE, offset=0.015) + labs(x="", y="Gender", title='f(Gender | SleepHrsNight)  f(SleepHrsNight)') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE)) + theme(plot.title = element_text(size = rel(1))) + coord_flip()
grid_arrange_shared_legend(order1, order2, ncol = 2, nrow = 1, position = "right")

Other features of geom_mosaic

Arguments unique to geom_mosaic:

Divider function: Types of partitioning

Four options available for each partion:

a2 <- ggplot(data = NHANES) +
   geom_mosaic(aes( x = product(SleepHrsNight), fill=factor(SleepHrsNight)), divider="hbar", na.rm=TRUE) + theme(#axis.text.x=element_text(angle=35, hjust= 1),
     legend.position="none")+labs(x=" ", title='divider= "hbar"') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))


a1 <- ggplot(data = NHANES) +
   geom_mosaic(aes( x = product(SleepHrsNight), fill=factor(SleepHrsNight)), divider="hspine", na.rm=TRUE) + theme(#axis.text.x=element_text(angle=35, hjust= 1),
     legend.position="none") + labs(x=" ", title='divider= "hspine"') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))


b2 <- ggplot(data = NHANES) +
   geom_mosaic(aes( x = product(SleepHrsNight), fill=factor(SleepHrsNight)), divider="vbar", na.rm=TRUE) + theme(legend.position="none") + labs(y=" ", x="", title='divider= "vbar"') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))
 
b1 <- ggplot(data = NHANES) +
   geom_mosaic(aes(  x = product(SleepHrsNight), fill=factor(SleepHrsNight)), divider="vspine", na.rm=TRUE) + theme(legend.position="none") + labs(y=" ", x="", title='divider= "vspine"') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))
grid_arrange_shared_legend(a1, a2, b1, b2, ncol = 2, nrow = 2, position = "right")

Partitioning with one or more variables

#set.separators(c(":", ";","|"))

m1 <-ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=mosaic("h")) +
   theme(axis.text.x=element_blank(), legend.position="none")+labs(x=" ", title='divider= mosaic()') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

m2 <-ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=mosaic("v")) +
   theme(axis.text.x=element_blank(), legend.position="none")+labs(x=" ", title='divider= mosaic("v")') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))


m3 <-ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=ddecker()) +
   theme(axis.text.x=element_blank(), legend.position="none")+labs(x=" ", title='divider= ddecker()') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))
grid_arrange_shared_legend(m1, m2, m3, ncol = 3, nrow = 1, position = "right")

m4 <-  ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=c("vspine", "vspine", "hbar")) +
   theme(axis.text.x=element_blank(), legend.position="none")+labs(x=" ", title='divider= c("vspine", "vspine", "hbar")') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

m5 <-  ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=c("hbar", "vspine", "hbar")) +
   theme(axis.text.x=element_blank(), legend.position="none")+labs(x=" ", title='divider= c("hbar", "vspine", "hbar")') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

m6 <-  ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=c("hspine", "hspine", "hspine")) +
   theme(axis.text.x=element_blank(),
legend.position="none")+labs(x=" ", title='divider= c("hspine", "hspine", "hspine")') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

m7 <-  ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=c("vspine", "vspine", "vspine")) +
   theme(axis.text.x=element_blank(), legend.position="none")+labs(x=" ", title='divider= c("vspine", "vspine", "vspine")') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))
grid_arrange_shared_legend(m4, m5, m6, m7, ncol = 2, nrow = 2, position="right")

geom_mosaic: offset

offset: Set the space between the first spine

Adjusting the offset

oo1 <- ggplot(data = NHANES) +
  geom_mosaic(aes(weight = Weight,  x = product(Age), fill=factor(SleepHrsNight)), na.rm=TRUE) +   theme(axis.text.x=element_text(angle=0, hjust= .5))+labs(x="Age", y=" ",  title=" offset = 0.01") + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE)) 

o1 <- ggplot(data = happy) +
  geom_mosaic(aes(weight = wtssall,  x = product(age), fill=marital)) +
  theme(axis.text.x=element_text(angle=0, hjust= .5))+labs(x="Age", y=" ",  title=" offset = 0.01")+ guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE)) 

oo2 <- ggplot(data = NHANES) +
  geom_mosaic(aes(weight = Weight,  x = product(Age), fill=factor(SleepHrsNight)), offset=0, na.rm=TRUE) +   theme(axis.text.x=element_text(angle=0, hjust= .5))+labs(x="Age", y=" ",  title=" offset = 0")+ guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE)) 

o2 <- ggplot(data = happy) +
  geom_mosaic(aes(weight = wtssall,  x = product(age), fill=marital), offset = 0) +
  theme(axis.text.x=element_text(angle=0, hjust= .5))+labs(x="Age", y=" ",  title=" offset = 0") + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))
grid_arrange_shared_legend(oo1, oo2, nrow = 1, ncol =2, position="right")

Plotly

gg <- ggplot(data = NHANES) + geom_mosaic(aes(x = product(SleepHrsNight, Gender, 
    AgeDecade), fill = factor(SleepHrsNight)), na.rm = T, divider = ddecker(), 
    offset = 0.025)
# just for now commented out ggplotly(gg)