基于ggplot的两分组变量条形图

更新时间：2025-01-19 工作计划版权反馈

【摘要】：我们先来用一组记录三个年龄段的人购买四种商品数量的随机生成数据为例，来展示堆积条形图和并列条形图的画法。p=ggplot # 条形图和文字图层使用相同的分组方式## 堆积条形图# position需指向由position_stack设置的位置。

一、堆积条形图和并列条形图

本书第二章介绍了只呈现一个数值向量的简单条形图的绘制方法，本节将对更为复杂的两分组变量条形图进行讲解。

我们先来用一组记录三个年龄段的人购买四种商品数量的随机生成数据为例，来展示堆积条形图和并列条形图的画法。

library(ggplot2)

dat=read.csv("buy.csv", row.names=1) # 课件中的文件

# 首先要对原始数据进行整理

tab=table(dat$Item, dat$Age) # 生成交叉表（注意：有时我们拿到手里的数据本身已经是交叉表了）

dat=as.data.frame(tab) # 把数据整理成ggplot接受的结构

colnames(dat)=c("Item", "Age", "Number")

## 堆积条形图：position的默认值为"stack"

# 先按商品分配X坐标，再在单个矩形内按年龄分组

ggplot(dat)+geom_bar(aes(x=Item, y=Number, fill=Age), stat="identity", position="stack")

# 先按年龄分配X坐标，再在单个矩形内按商品分组

ggplot(dat)+geom_bar(aes(x=Age, y=Number, fill=Item), stat="identity")

## 并列条形图：此时需设置position="dodge"

ggplot(dat)+geom_bar(aes(x=Item, y=Number, fill=Age), stat="identity", position="dodge")

## 排列顺序：观察发现，在堆积条形图中，单个条形中的各组是按因子水平从上到下排列的；在并列条形图中，各组是按因子水平从左到右排列的。这意味着我们要通过修改因子水平来改变这个顺序

dat$Age2=factor(dat$Age, levels=c("Young", "Middle", "Old"))

ggplot(dat)+geom_bar(aes(x=Item, y=Number, fill=Age2), stat="identity")

ggplot(dat)+geom_bar(aes(x=Item, y=Number, fill=Age2), stat="identity", position="dodge")

## 宽度

# 在并列条形图中，宽度指的是多个并列条形的总宽度

ggplot(dat)+geom_bar(aes(x=Item, y=Number, fill=Age), stat="identity", position="dodge", width=0.5)

# 我们可以用position_dodge函数进一步调整宽度。此时宽度由geom_bar(width=...)和position_dodge(width=...)共同确定（要保证前者小于后者，以避免条形重叠）。当要增加处于同一个X坐标上的几个条形之间的缝隙时，要么减小前者，要么增大后者

ggplot(dat)+geom_bar(aes(x=Item, y=Number, fill=Age), stat="identity", width=0.6, position=position_dodge(width=0.7))

## 条形等高的堆积条形图

# 方法一：设置position="fill"

ggplot(dat)+geom_bar(aes(x=Item, y=Number, fill=Age), stat="identity", position="fill")

# 方法二：使用百分比（这样每个条形的总高度都是1）

PCT=apply(tab, 1, FUN=function(x)x/ sum(x)) # 用我们最开始生成的交叉表来求百分比

PCT=as.data.frame(as.table(PCT)) # 把数据整理成ggplot接受的结构

colnames(PCT)=c("Age", "Item", "Percent")

ggplot(PCT)+geom_bar(aes(x=Item, y=Percent, fill=Age), stat="identity")

二、为两分组变量条形图添加标签

为两分组变量条形图添加标签时，需用到geom_text/label中的position参数。

p=ggplot(data=dat, aes(x=Item, y=Number, fill=Age)) # 条形图和文字图层使用相同的分组方式

## 堆积条形图

# position需指向由position_stack设置的位置。在默认状态下（vjust=1）,文字出现在条形的上沿

p+geom_bar(stat="identity")+

geom_label(show.legend=FALSE, aes(label=Number), position= position_stack())

# 但多数情况下，我们需要把文字放到条形的中间

p+geom_bar(stat="identity")+

geom_label(show.legend=FALSE, aes(label=Number), position= position_stack(vjust=0.5))

## 并列条形图

# 注意：geom_bar和geom_text/label中的position_dodge应设置相等的width值

p+geom_bar(stat="identity", position=position_dodge(width=0.7), width=0.6)+

geom_text(aes(label=Number), position=position_dodge (width=0.7), vjust=-0.3) # 此处修改vjust参数是为了把文字放得向上些，以免压住条形上沿

## 当设置position="fill"时，geom_text/label也需使用position_fill(vjust=...) p+geom_bar(stat="identity", position="fill")+

geom_text(aes(label=Number), position=position_fill(vjust=0.5))

## 翻转坐标轴后，position_stack和position_fill的使用方法不变，position_dodge则需设置hjust，而不再是vjust

p+coord_flip()+geom_bar(stat="identity")+(www.xing528.com)

geom_label(show.legend=FALSE, aes(label=Number), position= position_stack(vjust=0.5))

p+coord_flip()+geom_bar(stat="identity", width=0.6, position= position_dodge(width=0.7))+

geom_text(aes(label=Number), position=position_dodge (width=0.7), hjust=-0.3)

## 当绘制极坐标系中的并列条形图时，可将hjust和vjust设为"outward"

p+coord_polar()+

geom_bar(stat="identity", position=position_dodge(width=0.7), width=0.6)+

geom_text(aes(label=Number), position=position_dodge(width=0.7), hjust="outward", vjust="outward")+

scale_y_continuous(limits=c(0, max(dat$Number)*1.2)) # 略微拉长Y轴以避免文字与X轴标签重合

#==========

# 练习：绘制金字塔图

#==========

# 金字塔图实际就是坐标轴两边都有条形的条形图。在作图中，我们需要把放在左（下）边的值变成负值

v=c(-3, -2, 4, 5)

lab=c("a", "b", "a", "b")

ggplot()+geom_bar(aes(x=lab, y=v, fill=factor(sign(v))), stat="identity")+

scale_fill_manual(values=c("blue", "red"))+

geom_text(aes(x=lab, y=v, label=v), position=position_stack(vjust=0.5))

# 下面我们用2018年日本各年龄段的人口数据，来绘制带有渐变色的金字塔图（图6-5-1）

图6-5-1 用geom_shading_bar绘制渐变金字塔图

library(plothelper) # 使用round_text

library(tibble) # 使用tibble

dat=read.csv("japan age.csv", row.names=1) # 课件中的文件

male=dat[1: 10, ] # 根据性别拆分出两组数据

female=dat[11: 20, ]

# 生成渐变色。注意：画在Y轴右边的条形从蓝色渐变到其他颜色，画在Y轴左边的条形则相反

mycolor=c("steelblue1", "springgreen", "indianred1", "red", "orange","gold1")

mycolor=color Ramp Palette(mycolor, space="Lab")(10)

color_male=lapply(mycolor, function(x) c("royalblue4", x))

color_female=lapply(mycolor, function(x) c(x, "royalblue4"))

# 将颜色合并到tibble数据框中

male=tibble(Value=male$Value, Age=male$Age, raster=color_male)

female=tibble(Value=female$Value, Age=female$Age, raster=color_female)

female$Value=-female$Value # 对女性的数值取负数

p=ggplot()+

geom_shading_bar(data=male, aes(x=Value, y=Age, raster=raster), orientation="y")+

geom_shading_bar(data=female, aes(x=Value, y=Age, raster= raster), orientation="y")

# 确保标在图表中的数值显示小数点后三位，并通过加空格的方法使其与条形保持距离

f=function(x) paste("", round_text(abs(x)/1000000, 3), "", sep="")

p+geom_vline(aes(xintercept=0), linetype=3, size=1)+

geom_text(data=male, aes(x=Value, y=Age, label=f(Value)), hjust="outward", size=5, family="Hershey Sans", fontface=3)+

geom_text(data=female, aes(x=Value, y=Age, label=f(Value)), hjust="outward", size=5, family="Hershey Sans", fontface=3)+

scale_x_continuous(expand=expansion(0.25))+

labs(x=NULL, y=NULL, title="Japan's Population by Age, Sex", subtitle="(unit: million)\n")+

geom_text(aes(x=c(Inf, -Inf), y=c(Inf, Inf), label=c("Male","Female")), size=8, hjust="inward", vjust="inward", family="serif")+

theme_minimal(base_size=15, base_family="serif")+

theme(axis.text.x=element_blank(), axis.text.y=element_text (size=15),

axis.title=element_text(size=20),

plot.title=element_text(size=22, face=2, hjust=1),

plot.subtitle=element_text(size=20, face=3, hjust=1)

)

免责声明：以上内容源自网络，版权归原作者所有，如有侵犯您的原创版权请告知，我们将尽快删除相关内容。

我要反馈

工作思路

策划书

留守儿童

关工委

内部审计

爱国卫生

业务员

信息技术

教研员

心理教育

居委会

社区学校

公司安全

部门月度

区安全

公司工会

乡镇基层

劳动保障

经理年度

内科年度

教学个人

前台个人

个人月度

配班个人

基于ggplot的两分组变量条形图

相关推荐

基于ggplot的两分组变量条形图

有关基于ggplot的政经数据可视化的文章

相关推荐