ggpubr package provides some easy-to-use functions for creating and customizing ggplot2 based plots.
In particular, it contains:
already designed plots that you can explore @ https://rpkgs.datanovia.com/ggpubr/, including the table plot
several statistical methods for adding correlation coefficients, p-value evaluating the difference of distributions and so on…
additional methods for gglot2 objects, like arranging more than one plot in the same panel
If we have two continous variables and we want to
see the relation between them, we can make a scatter
plot. Let’s say that by plotting we notice a possible correlation
between the two. We want to see if the correlation we see is
statistically significant directly on the plot. In this case we
can use stat_cor()
. We will use, once again, the
mpg
and we want to see the relation between the miles per
gallon on highway and in the city:
library(ggpubr)
library(ggplot2)
ggplot(mpg, aes(x=hwy, y=cty))+geom_point()+theme_bw()+stat_cor()
As we can see on the top of the graph are indicated the correlation coefficient, R and the statistic significance, the p-value p.
If we have a continous variable and we want to
compare the values it assumes in different
groups, we can make a boxplot. Then we may want to evaluate if
there is a statistical difference between values
assumed in each group. For example, we may want to know if there is a
significant difference between the miles per gallon in the city between
cars produced in 1999 and 2008 We will add
stat_compare_means()
to evaluate the difference. Notice: as
the year is a continous variable in the data frame, we will first
transform it in a discrete one by changing its type in character. We
will use notches in boxplot to better see
differences.
ggplot(mpg, aes(x=as.character(year), y=cty))+geom_boxplot(notch = TRUE,width=0.6)+theme_bw()+stat_compare_means(label.x = 1.5)
This means that there is no statistical difference (p-value=0.92).
If we compare the city miles per gallon for the suv
and
compact
classes, we obtain a low p-value, indicating that
the two groups are statistically different.
ggplot(subset(mpg,class%in%c("suv","compact")),aes(x=class,y=cty))+
geom_boxplot(notch = TRUE,width=0.6)+theme_bw()+
stat_compare_means(label.x = 1.5)
We can also change the type of test performed (default: Wilcoxon test) or display the p-value as a symbol instead of a number.
ggplot(subset(mpg,class%in%c("suv","compact")),aes(x=class,y=cty))+
geom_boxplot(notch = TRUE,width=0.6)+theme_bw()+
stat_compare_means(label.x = 1.5,method = "t.test")
ggplot(subset(mpg,class%in%c("suv","compact")),aes(x=class,y=cty))+
geom_boxplot(notch = TRUE,width=0.6)+theme_bw()+
stat_compare_means(label.x = 1.5,method = "t.test",aes(label = ..p.signif..))
If we do not have only two groups but more, we have to decide which comparisons we want to see. For example:
my_comparisons=list(c("f", "4"), c("4", "r"), c("r", "f"))
ggplot(mpg, aes(x=drv, y=cty))+geom_boxplot()+theme_bw()+stat_compare_means(comparisons = my_comparisons)
my_comparisons=list(c("f", "4"), c("4", "r"), c("r", "f"))
ggplot(mpg, aes(x=drv, y=cty))+geom_boxplot()+theme_bw()+stat_compare_means(comparisons = my_comparisons,aes(label = ..p.signif..))
This means that there is statistical difference between “4” and “f” and between “r” and “f”, but there is not between “4” and “r”.
Sometimes you have to show a data table as image.
ggpubr allows you to do that through the ggtexttable()
function.
You can simply give your dataframe to the function, but you can also customize your plot by defining theme, styles, colors.
Visit https://rpkgs.datanovia.com/ggpubr/reference/ggtexttable.html for all options. You will find lots of examples.
Let’s create a dataframe and make a figure with it.
df=cbind.data.frame(flower=c("rose","daffodil", "tulip", "orchid", "lily"), N_remaining=c(5,20,14,2,0))
df
## flower N_remaining
## 1 rose 5
## 2 daffodil 20
## 3 tulip 14
## 4 orchid 2
## 5 lily 0
ggtexttable(df, rows = NULL)
ggtexttable(df, rows = NULL, theme = ttheme("classic"))
ggtexttable(df, rows = NULL,
theme = ttheme(
colnames.style = colnames_style(color = "white", fill = "#8cc257"),
tbody.style = tbody_style(color = "black",fill = c("#e8f3de", "#d3e8bb"))
)
)
ggarrange()
function can arrange toghether
multiple ggplot objects and, if you want, create a
unique legend. You can also specify how many
rows or columns do you prefer.
a=ggplot(mpg, aes(x=hwy, y=cty, color=class))+geom_point()+theme_bw()
b=ggplot(mpg, aes(x=as.character(year), y=cty))+geom_boxplot()+geom_jitter(aes(color=class))+theme_bw()+xlab("Years")
ggarrange(a,b, nrow = 1, common.legend = TRUE)
Exploiting ggpubr
package more in detail we can create a
ready-to-publish figure that combines multiple ggplots
on the same page or over multiple pages.
a=ggplot(mpg, aes(x=hwy, y=cty, color=class))+geom_point()+theme_bw()
b=ggplot(mpg, aes(x=as.character(year), y=cty))+geom_boxplot()+geom_jitter(aes(color=class))+theme_bw()+xlab("Years")
df2=as.data.frame(table(mpg$year, mpg$class))
colnames(df2)=c("year", "class", "N")
c=ggtexttable(df2, rows = NULL, theme = ttheme("classic"))
d=ggplot(mpg, aes(x=hwy, y=cty))+geom_density2d_filled()+theme_bw()+ggtitle("Density 2D")
ggarrange(a,b,c,d, labels = c("A", "B", "C", "D"))
font.label
or control their position by using parameters like label.x
,
label.y
,hjust
, vjust
. See https://rpkgs.datanovia.com/ggpubr/reference/ggarrange.html
for all optionsggarrange(a,b,c,d, labels = c("A", "B", "C", "D"),
font.label=list(size = 20, color = "grey50", face = "bold"),
hjust=-1, # Adjusts the horizontal position of each label. More negative values move the label further to the right on the plot canvas.
vjust=1 # Adjusts the vertical position of each label. More positive values move the label further down on the plot canvas.
)
# if we try using common.legend=T we see that the legend of density plot disappears.
ggarrange(a,b,c,d, labels = c("A", "B", "C", "D"),
widths = c(1,2), # the right column is double the first
heights=c(1, 1.5) # the second row is 1.5 times the first
)
annotate_figure()
supports any ggplots.figure=ggarrange(a,b,c,d, labels = c("A", "B", "C", "D"),
widths = c(1,2), # the right column is double the first
heights=c(1, 1.5) # the second row is 1.5 times the first
)
figure=annotate_figure(figure,
top = text_grob("Visualizing mpg", color = "magenta", face = "bold", size = 14),
bottom = text_grob("Data source: \n mtcars data set", color = "navyblue",
hjust = 1, x = 1, face = "italic", size = 10),
left = text_grob("Figure arranged using ggarrange() function", color = "green", rot = 90),# rot indicate rotation angle
right = "Figure completed",
fig.lab = "Figure 1", fig.lab.face = "bold"
)
figure
h
) or vertically
(v
) aligned; options are none
(default),
hv
(align in both directions), h
, and
v
.ggarrange(a,b,c,d, labels = c("A", "B", "C", "D"), align = "v",legend=c("none", "right", "none", "right"))
plot1=ggarrange(a,b,c, labels = c("A", "B", "C"), nrow=1, common.legend = T) # Put "a", "b" and "c" on a unique line; there is only one legend that is repeated, so we can use common.legend to unify it
ggarrange(plot1,d, labels=c("","D") , nrow=2)