Data Visualization - the RainCloud Plot in R


RainCloud plot is arising as a very informative method to present raw data, basically, it combines boxplot , volin plot , and scatter plot together, in a visually pleasure way.

The RainClould plot requires some basic packages like ggplot2, so we need to load them first.

1
2
3
4
## load packages
library(ggplot2)
library(plyr)
library(scales)

Then we set up the working directory and load the test data

1
2
3
4
5
## set work directory and load the data
setwd('/to/whenever/the/data/is')
## read data
# here I read a csv file
test <- read.csv('example.csv',header = T, sep = ',')

A very important function used in raincloud plot is geom_flat_volin, which is not a standard function in ggplot2, therefore we need to define it beforehand.

The following function definition is extracted from the this web page: https://gist.github.com/dgrtwo/eb7750e74997891d7c202. Copy and paste them into your code.

Or, you can just source this function via the following code if you won’t bother to copy the complex code. Either way is fine:

1
source("https://gist.githubusercontent.com/benmarwick/2a1bb0133ff568cbe28d/raw/fb53bd97121f7f9ce947837ef1a4c65a73bffb3f/geom_flat_violin.R")`

geom_flat_volin function definition:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# =========================== function definition ===========================

## define function of geom_flat_volin
# devtools::install_github(repo = "IndrajeetPatil/ggstatsplot")

"%||%" <- function(a, b) {
if (!is.null(a))
a
else
b
}

geom_flat_violin <-
function(mapping = NULL,
data = NULL,
stat = "ydensity",
position = "dodge",
trim = TRUE,
scale = "area",
show.legend = NA,
inherit.aes = TRUE,
...) {
ggplot2::layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomFlatViolin,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(trim = trim,
scale = scale,
...)
)
}

GeomFlatViolin <-
ggproto(
"GeomFlatViolin",
Geom,
setup_data = function(data, params) {
data$width <- data$width %||%
params$width %||% (resolution(data$x, FALSE) * 0.9)

# ymin, ymax, xmin, and xmax define the bounding rectangle for each group
data %>%
dplyr::group_by(.data = ., group) %>%
dplyr::mutate(
.data = .,
ymin = min(y),
ymax = max(y),
xmin = x,
xmax = x + width / 2
)
},

draw_group = function(data, panel_scales, coord)
{
# Find the points for the line to go all the way around
data <- base::transform(data,
xminv = x,
xmaxv = x + violinwidth * (xmax - x))

# Make sure it's sorted properly to draw the outline
newdata <-
base::rbind(
dplyr::arrange(.data = base::transform(data, x = xminv), y),
dplyr::arrange(.data = base::transform(data, x = xmaxv), -y)
)

# Close the polygon: set first and last point the same
# Needed for coord_polar and such
newdata <- rbind(newdata, newdata[1,])

ggplot2:::ggname("geom_flat_violin",
GeomPolygon$draw_panel(newdata, panel_scales, coord))
},

draw_key = draw_key_polygon,

default_aes = ggplot2::aes(
weight = 1,
colour = "grey20",
fill = "white",
size = 0.5,
alpha = NA,
linetype = "solid"
),

required_aes = c("x", "y")
)
# =========================== function definition ===========================

Everything we need is ready, so let’s do the plot!

First we make a data frame as what usually used in ggplot2.

1
2
3
4
## make data frame
df <- data.frame(rt = c(test$subi_att,test$subi_unatt),
condition = c(rep('Attended',length(test$subi_att)),rep('Unattended',length(test$subi_unatt)))
)

Then is the plot.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
ggplot(data = df, aes(y = rt, x = condition, fill = condition)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0), alpha = .5) +
geom_boxplot(width = .1, outlier.shape = NA, alpha = 0.5) +
geom_point(aes(y = rt, color = condition), position = position_jitter(width = .15), size = .8, alpha = 0.8) +

labs(x="Condition",y="Response Time (s)") + theme(legend.position="none") + theme(axis.title = element_text(family = "Times New Roman")) +
theme(axis.text=element_text(size=10, family = "Times New Roman")) +
theme(legend.title = element_text(family = "Times New Roman")) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(panel.background = element_blank()) +
#theme(panel.border = element_rect(color = 'black',fill=NA, size = 1)) +
theme(axis.line = element_line(colour = 'black', size = 0.5)) +
theme(axis.ticks = element_line(colour = "black", size = 0.4)) +
scale_y_continuous(limits = c(.2,.8), expand = c(0,0), oob = rescale_none) + ## y axis starts from non-zero point
scale_x_discrete(expand = c(0.2,0.2))

ggsave('plot.tiff')

We then will see the figure.