# Introduction

## Basic Explanation of ggmosaic

• designed to create visualizations of categorical data
• can produce bar charts, stacked bar charts, mosaic plots, and double decker plots
• plots are constructed hierarchically, so the ordering of the variables is very important.
• integrated in ggplot2 as a geom
• allows for ggplot2 tools, such as facetting and layering

## Creation of ggmosaic

ggmosaic was created primarily using ggproto and the productplots package

ggproto allows you to extend ggplot2 from within your own packages

• ggmosaic began as a geom extension of the rect geom
• used the data handling provided in the productplots package
• calculates xmin, xmax, ymin, and ymax for the rect geom to plot

## ggplot2 limitations

ggplot2 is not capable of handling a variable number of variables

• current solution: read in the variables x1 and x2 as x = product(x1, x2)

• product function:
• a wrapper function for a list
• allows for it to pass check_aesthetics

These limitations also lead to issues with the labeling, but those can be manually fixed.

## geom_mosaic: setting the aesthetics

Aesthetics that can be set:

• weight : select a weighting variable
• x : select variables to add to formula
• declared as x = product(x1, x2, …)
• fill : select a variable to be filled
• if the variable is not also called in x, it will be added to the formula in the first position
• conds : select a variable to condition on

These values are then sent through productplots functions to create the formula for the desired distribution

Formula: weight ~ fill + x | conds

### From the aesthetics to the formula

Example of how the formula is built

• weight = 1
• x = product(Y, X)
• fill = W
• conds = Z

These aesthetics set up the formula for the distribution:

Formula: 1 ~ W + Y + X | Z

Because a mosaic plot is constructed hierarchically through alternating spines, the ordering of the variables is very important.

## Weight ~ X

 ggplot(data = NHANES) +
geom_mosaic(aes(weight = Weight, x = product(SleepHrsNight), fill=factor(SleepHrsNight)), na.rm=TRUE) +
labs(x="Hours of sleep a night ", title='f(SleepHrsNight)') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

## Weight ~ Y + X

 ggplot(data = NHANES) +
geom_mosaic(aes(weight = Weight, x = product(SleepHrsNight, AgeDecade), fill=factor(SleepHrsNight)), na.rm=TRUE) +    theme(axis.text.x=element_text(angle=-25, hjust= .1)) + labs(x="Age in Decades ", title='f(SleepHrsNight | AgeDecade) f(AgeDecade)') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

## Weight ~ X + Y / Z

 ggplot(data = NHANES) +
geom_mosaic(aes( x = product(SleepHrsNight, AgeDecade), fill=factor(SleepHrsNight), conds=product(Gender)), na.rm=TRUE, divider=mosaic("v")) +    theme(axis.text.x=element_text(angle=-25, hjust= .1)) + labs(x="Age in Decades ", title='f(SleepHrsNight, AgeDecade | Gender)') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

## Alternative to conditioning: facetting

ggplot(data = NHANES) +
geom_mosaic(aes( x = product(SleepHrsNight, AgeDecade), fill=factor(SleepHrsNight)), na.rm=TRUE) +    theme(axis.text.x=element_text(angle=-25, hjust= .1)) +
labs(x="Age in Decades ", title='f(SleepHrsNight, AgeDecade | Gender)')  + facet_grid(Gender~.) + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

## Importance of ordering

order1 <- ggplot(data = NHANES) + geom_mosaic(aes(weight = Weight, x = product(SleepHrsNight, Gender), fill=factor(SleepHrsNight)), na.rm=TRUE, offset=0.015) + labs(x="Gender ", title='f(SleepHrsNight | Gender)  f(Gender)') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE)) + theme(plot.title = element_text(size = rel(1)))

order2<- ggplot(data = NHANES) + geom_mosaic(aes(weight = Weight, x = product(Gender, SleepHrsNight), fill=factor(SleepHrsNight)), na.rm=TRUE, offset=0.015) + labs(x="", y="Gender", title='f(Gender | SleepHrsNight)  f(SleepHrsNight)') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE)) + theme(plot.title = element_text(size = rel(1))) + coord_flip()
grid_arrange_shared_legend(order1, order2, ncol = 2, nrow = 1, position = "right")

## Other features of geom_mosaic

Arguments unique to geom_mosaic:

• divider: used to declare the type of partitions to be used
• offset: sets the space between the first spine

## Divider function: Types of partitioning

Four options available for each partion:

• vspine: width constant, height varies.
• hspine: height constant, width varies.
• vbar: height constant, width varies.
• hbar: width constant, height varies.
a2 <- ggplot(data = NHANES) +
geom_mosaic(aes( x = product(SleepHrsNight), fill=factor(SleepHrsNight)), divider="hbar", na.rm=TRUE) + theme(#axis.text.x=element_text(angle=35, hjust= 1),
legend.position="none")+labs(x=" ", title='divider= "hbar"') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

a1 <- ggplot(data = NHANES) +
geom_mosaic(aes( x = product(SleepHrsNight), fill=factor(SleepHrsNight)), divider="hspine", na.rm=TRUE) + theme(#axis.text.x=element_text(angle=35, hjust= 1),
legend.position="none") + labs(x=" ", title='divider= "hspine"') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

b2 <- ggplot(data = NHANES) +
geom_mosaic(aes( x = product(SleepHrsNight), fill=factor(SleepHrsNight)), divider="vbar", na.rm=TRUE) + theme(legend.position="none") + labs(y=" ", x="", title='divider= "vbar"') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

b1 <- ggplot(data = NHANES) +
geom_mosaic(aes(  x = product(SleepHrsNight), fill=factor(SleepHrsNight)), divider="vspine", na.rm=TRUE) + theme(legend.position="none") + labs(y=" ", x="", title='divider= "vspine"') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))
grid_arrange_shared_legend(a1, a2, b1, b2, ncol = 2, nrow = 2, position = "right")

## Partitioning with one or more variables

• mosaic()
• default
• will use spines in alternating directions
• begins with a horizontal spine
• mosaic(“v”)
• begins with a vertical spine and then alternates
• ddecker()
• selects n-1 horizontal spines and ends with a vertical spine
• Define each type of partition
• c(“hspine”, “vspine”, “hbar”)
#set.separators(c(":", ";","|"))

m1 <-ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=mosaic("h")) +
theme(axis.text.x=element_blank(), legend.position="none")+labs(x=" ", title='divider= mosaic()') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

m2 <-ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=mosaic("v")) +
theme(axis.text.x=element_blank(), legend.position="none")+labs(x=" ", title='divider= mosaic("v")') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

m3 <-ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=ddecker()) +
theme(axis.text.x=element_blank(), legend.position="none")+labs(x=" ", title='divider= ddecker()') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))
grid_arrange_shared_legend(m1, m2, m3, ncol = 3, nrow = 1, position = "right")

m4 <-  ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=c("vspine", "vspine", "hbar")) +
theme(axis.text.x=element_blank(), legend.position="none")+labs(x=" ", title='divider= c("vspine", "vspine", "hbar")') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

m5 <-  ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=c("hbar", "vspine", "hbar")) +
theme(axis.text.x=element_blank(), legend.position="none")+labs(x=" ", title='divider= c("hbar", "vspine", "hbar")') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

m6 <-  ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=c("hspine", "hspine", "hspine")) +
theme(axis.text.x=element_blank(),
legend.position="none")+labs(x=" ", title='divider= c("hspine", "hspine", "hspine")') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

m7 <-  ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=c("vspine", "vspine", "vspine")) +
theme(axis.text.x=element_blank(), legend.position="none")+labs(x=" ", title='divider= c("vspine", "vspine", "vspine")') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))
grid_arrange_shared_legend(m4, m5, m6, m7, ncol = 2, nrow = 2, position="right")

## geom_mosaic: offset

offset: Set the space between the first spine

• default = 0.01
• space between partitions decreases as layers build

oo1 <- ggplot(data = NHANES) +
geom_mosaic(aes(weight = Weight,  x = product(Age), fill=factor(SleepHrsNight)), na.rm=TRUE) +   theme(axis.text.x=element_text(angle=0, hjust= .5))+labs(x="Age", y=" ",  title=" offset = 0.01") + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

o1 <- ggplot(data = happy) +
geom_mosaic(aes(weight = wtssall,  x = product(age), fill=marital)) +
theme(axis.text.x=element_text(angle=0, hjust= .5))+labs(x="Age", y=" ",  title=" offset = 0.01")+ guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

oo2 <- ggplot(data = NHANES) +
geom_mosaic(aes(weight = Weight,  x = product(Age), fill=factor(SleepHrsNight)), offset=0, na.rm=TRUE) +   theme(axis.text.x=element_text(angle=0, hjust= .5))+labs(x="Age", y=" ",  title=" offset = 0")+ guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

o2 <- ggplot(data = happy) +
geom_mosaic(aes(weight = wtssall,  x = product(age), fill=marital), offset = 0) +
theme(axis.text.x=element_text(angle=0, hjust= .5))+labs(x="Age", y=" ",  title=" offset = 0") + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))
grid_arrange_shared_legend(oo1, oo2, nrow = 1, ncol =2, position="right")

## Plotly

gg <- ggplot(data = NHANES) + geom_mosaic(aes(x = product(SleepHrsNight, Gender,
AgeDecade), fill = factor(SleepHrsNight)), na.rm = T, divider = ddecker(),
offset = 0.025)
# just for now commented out ggplotly(gg)