29. 聚类树图绘制
清除当前环境中的变量
rm(list=ls())
设置工作目录
setwd("C:/Users/Dell/Desktop/R_Plots/29dendrogram/")
使用dendrogram函数绘制聚类树图
# 查看内置示例数据
head(USArrests)
## Murder Assault UrbanPop Rape
## Alabama 13.2 236 58 21.2
## Alaska 10.0 263 48 44.5
## Arizona 8.1 294 80 31.0
## Arkansas 8.8 190 50 19.5
## California 9.0 276 91 40.6
## Colorado 7.9 204 78 38.7
# 计算距离矩阵,默认method = "euclidean"计算欧氏距离
dists <- dist(USArrests,method = "euclidean")
head(dists)
## [1] 37.17701 63.00833 46.92814 55.52477 41.93256 128.20694
# 进行层次聚类,method = "average"选择UPGMA聚类算法
hc <- hclust(dists, method = "ave")
hc
##
## Call:
## hclust(d = dists, method = "ave")
##
## Cluster method : average
## Distance : euclidean
## Number of objects: 50
# 将hclust对象转换为dendrogram对象
dend1 <- as.dendrogram(hc)
dend1
## 'dendrogram' with 2 branches and 50 members total, at height 152.314
# 绘制聚类树图,默认type = "rectangle"
plot(dend1, type = "rectangle",
ylab="Height",
main="Cluster Dendrogram")
## "triangle" type and show inner nodes:
plot(dend1,
nodePar = list(pch = c(1,NA), cex = 1.2, lab.cex = 0.9),#设置节点的形状,大小和标签字体大小
type = "triangle", center = TRUE)
plot(dend1,
edgePar = list(col = c("red","blue"), lty = 1:2),#设置节点边的颜色和线型
dLeaf = 2, edge.root = TRUE)
plot(dend1,
nodePar = list(pch = 17:16, cex = 1.2:0.8, col = 2:3),
horiz = TRUE)#水平放置聚类树
nP <- list(col = 3:2, cex = c(2.0, 0.8), pch = 21:22,
bg = c("light blue", "pink"),
lab.cex = 0.8, lab.col = "tomato")
plot(dend1,
nodePar= nP,
edgePar = list(col = "gray", lwd = 2),
horiz = TRUE)
# plot dendrogram with some cuts
dend2 <- cut(dend1, h = 70)
dend2
## $upper
## 'dendrogram' with 2 branches and 4 members total, at height 152.314
##
## $lower
## $lower[[1]]
## 'dendrogram' with 2 branches and 2 members total, at height 38.52791
##
## $lower[[2]]
## 'dendrogram' with 2 branches and 14 members total, at height 44.28392
##
## $lower[[3]]
## 'dendrogram' with 2 branches and 14 members total, at height 44.83793
##
## $lower[[4]]
## 'dendrogram' with 2 branches and 20 members total, at height 54.74683
plot(dend2$upper, main = "Upper tree of cut at h=70")
## dend2$lower is *NOT* a dendrogram, but a list of .. :
plot(dend2$lower[[1]], main = "First branch of lower tree with cut at h=70")
## "inner" and "leaf" edges in different type & color :
plot(dend2$lower[[2]],
nodePar = list(col = 1), # non empty list
edgePar = list(lty = 1:2, col = 2:1),
edge.root = TRUE)
plot(dend2$lower[[3]],
nodePar = list(col = 4),
horiz = TRUE, type = "tr")
使用ggdendro包绘制聚类树图
# 安装并加载所需的R包
#install.packages('ggdendro')
library(ggdendro)
library(ggplot2)
# 层次聚类
hc <- hclust(dist(USArrests), "ave")
hc
##
## Call:
## hclust(d = dist(USArrests), method = "ave")
##
## Cluster method : average
## Distance : euclidean
## Number of objects: 50
# Demonstrate plotting directly from object class hclust
# 使用ggdendrogram函数绘制聚类树
ggdendrogram(hc)
# 旋转90度
ggdendrogram(hc, rotate = TRUE)
# demonstrate converting hclust to dendro using dendro_data first
hcdata <- dendro_data(hc, type = "triangle")
hcdata
## $segments
## x y xend yend
## 1 17.994141 152.313999 4.765625 77.605024
## 2 4.765625 77.605024 1.500000 38.527912
## 3 1.500000 38.527912 1.000000 0.000000
## 4 1.500000 38.527912 2.000000 0.000000
## 5 4.765625 77.605024 8.031250 44.283922
## 6 8.031250 44.283922 3.875000 28.012211
## 7 3.875000 28.012211 3.000000 0.000000
## 8 3.875000 28.012211 4.750000 15.453120
## 9 4.750000 15.453120 4.000000 0.000000
## 10 4.750000 15.453120 5.500000 13.896043
## 11 5.500000 13.896043 5.000000 0.000000
## 12 5.500000 13.896043 6.000000 0.000000
## 13 8.031250 44.283922 12.187500 39.394633
## 14 12.187500 39.394633 9.625000 26.363428
## 15 9.625000 26.363428 7.750000 16.891499
## 16 7.750000 16.891499 7.000000 0.000000
## 17 7.750000 16.891499 8.500000 15.454449
## 18 8.500000 15.454449 8.000000 0.000000
## 19 8.500000 15.454449 9.000000 0.000000
## 20 9.625000 26.363428 11.500000 18.417331
## 21 11.500000 18.417331 10.500000 6.236986
## 22 10.500000 6.236986 10.000000 0.000000
## 23 10.500000 6.236986 11.000000 0.000000
## 24 11.500000 18.417331 12.500000 13.297368
## 25 12.500000 13.297368 12.000000 0.000000
## 26 12.500000 13.297368 13.000000 0.000000
## 27 12.187500 39.394633 14.750000 28.095803
## 28 14.750000 28.095803 14.000000 0.000000
## 29 14.750000 28.095803 15.500000 21.167192
## 30 15.500000 21.167192 15.000000 0.000000
## 31 15.500000 21.167192 16.000000 0.000000
## 32 17.994141 152.313999 31.222656 89.232093
## 33 31.222656 89.232093 23.796875 44.837933
## 34 23.796875 44.837933 20.343750 26.713777
## 35 20.343750 26.713777 17.937500 16.425489
## 36 17.937500 16.425489 17.000000 0.000000
## 37 17.937500 16.425489 18.875000 12.878100
## 38 18.875000 12.878100 18.000000 0.000000
## 39 18.875000 12.878100 19.750000 10.736739
## 40 19.750000 10.736739 19.000000 0.000000
## 41 19.750000 10.736739 20.500000 7.355270
## 42 20.500000 7.355270 20.000000 0.000000
## 43 20.500000 7.355270 21.000000 0.000000
## 44 20.343750 26.713777 22.750000 22.595978
## 45 22.750000 22.595978 22.000000 0.000000
## 46 22.750000 22.595978 23.500000 11.456439
## 47 23.500000 11.456439 23.000000 0.000000
## 48 23.500000 11.456439 24.000000 0.000000
## 49 23.796875 44.837933 27.250000 29.054195
## 50 27.250000 29.054195 25.750000 20.198479
## 51 25.750000 20.198479 25.000000 0.000000
## 52 25.750000 20.198479 26.500000 12.614278
## 53 26.500000 12.614278 26.000000 0.000000
## 54 26.500000 12.614278 27.000000 0.000000
## 55 27.250000 29.054195 28.750000 23.972143
## 56 28.750000 23.972143 28.000000 0.000000
## 57 28.750000 23.972143 29.500000 14.501034
## 58 29.500000 14.501034 29.000000 0.000000
## 59 29.500000 14.501034 30.000000 0.000000
## 60 31.222656 89.232093 38.648438 54.746831
## 61 38.648438 54.746831 34.437500 20.598507
## 62 34.437500 20.598507 31.875000 15.026107
## 63 31.875000 15.026107 31.000000 0.000000
## 64 31.875000 15.026107 32.750000 12.438692
## 65 32.750000 12.438692 32.000000 0.000000
## 66 32.750000 12.438692 33.500000 3.834058
## 67 33.500000 3.834058 33.000000 0.000000
## 68 33.500000 3.834058 34.000000 0.000000
## 69 34.437500 20.598507 37.000000 15.122897
## 70 37.000000 15.122897 35.500000 6.637771
## 71 35.500000 6.637771 35.000000 0.000000
## 72 35.500000 6.637771 36.000000 0.000000
## 73 37.000000 15.122897 38.500000 13.352260
## 74 38.500000 13.352260 37.500000 3.929377
## 75 37.500000 3.929377 37.000000 0.000000
## 76 37.500000 3.929377 38.000000 0.000000
## 77 38.500000 13.352260 39.500000 8.027453
## 78 39.500000 8.027453 39.000000 0.000000
## 79 39.500000 8.027453 40.000000 0.000000
## 80 38.648438 54.746831 42.859375 41.094765
## 81 42.859375 41.094765 41.000000 0.000000
## 82 42.859375 41.094765 44.718750 33.117815
## 83 44.718750 33.117815 42.750000 10.771175
## 84 42.750000 10.771175 42.000000 0.000000
## 85 42.750000 10.771175 43.500000 8.537564
## 86 43.500000 8.537564 43.000000 0.000000
## 87 43.500000 8.537564 44.000000 0.000000
## 88 44.718750 33.117815 46.687500 27.779904
## 89 46.687500 27.779904 45.500000 13.044922
## 90 45.500000 13.044922 45.000000 0.000000
## 91 45.500000 13.044922 46.000000 0.000000
## 92 46.687500 27.779904 47.875000 18.993398
## 93 47.875000 18.993398 47.000000 0.000000
## 94 47.875000 18.993398 48.750000 10.184218
## 95 48.750000 10.184218 48.000000 0.000000
## 96 48.750000 10.184218 49.500000 2.291288
## 97 49.500000 2.291288 49.000000 0.000000
## 98 49.500000 2.291288 50.000000 0.000000
##
## $labels
## x y label
## 1 1 0 Florida
## 2 2 0 North Carolina
## 3 3 0 California
## 4 4 0 Maryland
## 5 5 0 Arizona
## 6 6 0 New Mexico
## 7 7 0 Delaware
## 8 8 0 Alabama
## 9 9 0 Louisiana
## 10 10 0 Illinois
## 11 11 0 New York
## 12 12 0 Michigan
## 13 13 0 Nevada
## 14 14 0 Alaska
## 15 15 0 Mississippi
## 16 16 0 South Carolina
## 17 17 0 Washington
## 18 18 0 Oregon
## 19 19 0 Wyoming
## 20 20 0 Oklahoma
## 21 21 0 Virginia
## 22 22 0 Rhode Island
## 23 23 0 Massachusetts
## 24 24 0 New Jersey
## 25 25 0 Missouri
## 26 26 0 Arkansas
## 27 27 0 Tennessee
## 28 28 0 Georgia
## 29 29 0 Colorado
## 30 30 0 Texas
## 31 31 0 Idaho
## 32 32 0 Nebraska
## 33 33 0 Kentucky
## 34 34 0 Montana
## 35 35 0 Ohio
## 36 36 0 Utah
## 37 37 0 Indiana
## 38 38 0 Kansas
## 39 39 0 Connecticut
## 40 40 0 Pennsylvania
## 41 41 0 Hawaii
## 42 42 0 West Virginia
## 43 43 0 Maine
## 44 44 0 South Dakota
## 45 45 0 North Dakota
## 46 46 0 Vermont
## 47 47 0 Minnesota
## 48 48 0 Wisconsin
## 49 49 0 Iowa
## 50 50 0 New Hampshire
##
## $leaf_labels
## NULL
##
## $class
## [1] "hclust"
##
## attr(,"class")
## [1] "dendro"
ggdendrogram(hcdata, rotate = TRUE) +
labs(title = "Dendrogram in ggplot2")
使用ggraph包绘制聚类树图
# 安装并加载所需的R包
#install.packages("ggraph")
library(ggraph)
## Warning: package 'ggraph' was built under R version 3.6.3
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(tidyverse)
## -- Attaching packages ------------------------------------- tidyverse 1.2.1 --
## √ tibble 2.1.3 √ purrr 0.3.2
## √ tidyr 1.1.2 √ dplyr 1.0.2
## √ readr 1.3.1 √ stringr 1.4.0
## √ tibble 2.1.3 √ forcats 0.4.0
## Warning: package 'tidyr' was built under R version 3.6.3
## Warning: package 'dplyr' was built under R version 3.6.3
## -- Conflicts ---------------------------------------- tidyverse_conflicts() --
## x dplyr::as_data_frame() masks tibble::as_data_frame(), igraph::as_data_frame()
## x purrr::compose() masks igraph::compose()
## x tidyr::crossing() masks igraph::crossing()
## x dplyr::filter() masks stats::filter()
## x dplyr::groups() masks igraph::groups()
## x dplyr::lag() masks stats::lag()
## x purrr::simplify() masks igraph::simplify()
library(RColorBrewer)
theme_set(theme_void())
## Warning: New theme missing the following elements: axis.title.x,
## axis.title.x.top, axis.title.y, axis.title.y.right, axis.text.x,
## axis.text.x.top, axis.text.y, axis.text.y.right, axis.ticks, axis.line,
## axis.line.x, axis.line.y, legend.background, legend.margin, legend.spacing,
## legend.spacing.x, legend.spacing.y, legend.key, legend.key.height,
## legend.key.width, legend.text.align, legend.title.align, legend.direction,
## legend.justification, legend.box.margin, legend.box.background,
## legend.box.spacing, panel.background, panel.border, panel.spacing.x,
## panel.spacing.y, panel.grid, panel.grid.minor, plot.background,
## strip.background, strip.placement, strip.text.x, strip.text.y
# 构建示例数据
# data: edge list
d1 <- data.frame(from="origin", to=paste("group", seq(1,7), sep=""))
head(d1)
## from to
## 1 origin group1
## 2 origin group2
## 3 origin group3
## 4 origin group4
## 5 origin group5
## 6 origin group6
d2 <- data.frame(from=rep(d1$to, each=7), to=paste("subgroup", seq(1,49), sep="_"))
head(d2)
## from to
## 1 group1 subgroup_1
## 2 group1 subgroup_2
## 3 group1 subgroup_3
## 4 group1 subgroup_4
## 5 group1 subgroup_5
## 6 group1 subgroup_6
edges <- rbind(d1, d2)
edges
## from to
## 1 origin group1
## 2 origin group2
## 3 origin group3
## 4 origin group4
## 5 origin group5
## 6 origin group6
## 7 origin group7
## 8 group1 subgroup_1
## 9 group1 subgroup_2
## 10 group1 subgroup_3
## 11 group1 subgroup_4
## 12 group1 subgroup_5
## 13 group1 subgroup_6
## 14 group1 subgroup_7
## 15 group2 subgroup_8
## 16 group2 subgroup_9
## 17 group2 subgroup_10
## 18 group2 subgroup_11
## 19 group2 subgroup_12
## 20 group2 subgroup_13
## 21 group2 subgroup_14
## 22 group3 subgroup_15
## 23 group3 subgroup_16
## 24 group3 subgroup_17
## 25 group3 subgroup_18
## 26 group3 subgroup_19
## 27 group3 subgroup_20
## 28 group3 subgroup_21
## 29 group4 subgroup_22
## 30 group4 subgroup_23
## 31 group4 subgroup_24
## 32 group4 subgroup_25
## 33 group4 subgroup_26
## 34 group4 subgroup_27
## 35 group4 subgroup_28
## 36 group5 subgroup_29
## 37 group5 subgroup_30
## 38 group5 subgroup_31
## 39 group5 subgroup_32
## 40 group5 subgroup_33
## 41 group5 subgroup_34
## 42 group5 subgroup_35
## 43 group6 subgroup_36
## 44 group6 subgroup_37
## 45 group6 subgroup_38
## 46 group6 subgroup_39
## 47 group6 subgroup_40
## 48 group6 subgroup_41
## 49 group6 subgroup_42
## 50 group7 subgroup_43
## 51 group7 subgroup_44
## 52 group7 subgroup_45
## 53 group7 subgroup_46
## 54 group7 subgroup_47
## 55 group7 subgroup_48
## 56 group7 subgroup_49
# We can add a second data frame with information for each node!
name <- unique(c(as.character(edges$from), as.character(edges$to)))
vertices <- data.frame(
name=name,
group=c( rep(NA,8) , rep( paste("group", seq(1,7), sep=""), each=7)),
cluster=sample(letters[1:4], length(name), replace=T),
value=sample(seq(10,30), length(name), replace=T)
)
head(vertices)
## name group cluster value
## 1 origin <NA> d 18
## 2 group1 <NA> c 17
## 3 group2 <NA> c 15
## 4 group3 <NA> b 23
## 5 group4 <NA> b 28
## 6 group5 <NA> d 30
# Create a graph object
mygraph <- graph_from_data_frame( edges, vertices=vertices)
mygraph
## IGRAPH f42fbd3 DN-- 57 56 --
## + attr: name (v/c), group (v/c), cluster (v/c), value (v/n)
## + edges from f42fbd3 (vertex names):
## [1] origin->group1 origin->group2 origin->group3
## [4] origin->group4 origin->group5 origin->group6
## [7] origin->group7 group1->subgroup_1 group1->subgroup_2
## [10] group1->subgroup_3 group1->subgroup_4 group1->subgroup_5
## [13] group1->subgroup_6 group1->subgroup_7 group2->subgroup_8
## [16] group2->subgroup_9 group2->subgroup_10 group2->subgroup_11
## [19] group2->subgroup_12 group2->subgroup_13 group2->subgroup_14
## [22] group3->subgroup_15 group3->subgroup_16 group3->subgroup_17
## + ... omitted several edges
# 使用ggraph函数绘制聚类树图
ggraph(mygraph, layout = 'dendrogram') +
geom_edge_diagonal()
# 绘制圆形的聚类树
ggraph(mygraph, layout = 'dendrogram', circular = TRUE) +
geom_edge_diagonal()
# 添加节点的标签,形状和信息
ggraph(mygraph, layout = 'dendrogram') +
geom_edge_diagonal() +
geom_node_text(aes( label=name, filter=leaf, color=group) , angle=90 , hjust=1, nudge_y=-0.1) +
geom_node_point(aes(filter=leaf, size=value, color=group) , alpha=0.6) +
ylim(-.6, NA) +
theme(legend.position="none")
# 构建测试数据集
# create a data frame giving the hierarchical structure of your individuals
d1=data.frame(from="origin", to=paste("group", seq(1,10), sep=""))
d2=data.frame(from=rep(d1$to, each=10), to=paste("subgroup", seq(1,100), sep="_"))
edges=rbind(d1, d2)
# create a vertices data.frame. One line per object of our hierarchy
vertices = data.frame(
name = unique(c(as.character(edges$from), as.character(edges$to))) ,
value = runif(111)
)
# Let's add a column with the group of each name. It will be useful later to color points
vertices$group = edges$from[ match( vertices$name, edges$to ) ]
#Let's add information concerning the label we are going to add: angle, horizontal adjustement and potential flip
#calculate the ANGLE of the labels
vertices$id=NA
myleaves=which(is.na( match(vertices$name, edges$from) ))
nleaves=length(myleaves)
vertices$id[ myleaves ] = seq(1:nleaves)
vertices$angle= 90 - 360 * vertices$id / nleaves
# calculate the alignment of labels: right or left
# If I am on the left part of the plot, my labels have currently an angle < -90
vertices$hjust<-ifelse( vertices$angle < -90, 1, 0)
# flip angle BY to make them readable
vertices$angle<-ifelse(vertices$angle < -90, vertices$angle+180, vertices$angle)
# 查看测试数据
head(edges)
## from to
## 1 origin group1
## 2 origin group2
## 3 origin group3
## 4 origin group4
## 5 origin group5
## 6 origin group6
head(vertices)
## name value group id angle hjust
## 1 origin 0.47071871 <NA> NA NA NA
## 2 group1 0.35606308 origin NA NA NA
## 3 group2 0.73638466 origin NA NA NA
## 4 group3 0.35833810 origin NA NA NA
## 5 group4 0.09394485 origin NA NA NA
## 6 group5 0.34061367 origin NA NA NA
# Create a graph object
mygraph <- graph_from_data_frame( edges, vertices=vertices )
# Make the plot
ggraph(mygraph, layout = 'dendrogram', circular = TRUE) +
geom_edge_diagonal(colour="grey") + #设置节点边的颜色
# 设置节点的标签,字体大小,文本注释信息
geom_node_text(aes(x = x*1.15, y=y*1.15, filter = leaf, label=name, angle = angle, hjust=hjust*0.4, colour=group), size=2.5, alpha=1) +
# 设置节点的大小,颜色和透明度
geom_node_point(aes(filter = leaf, x = x*1.07, y=y*1.07, colour=group, size=value, alpha=0.2)) +
# 设置颜色的画板
scale_colour_manual(values= rep( brewer.pal(9,"Paired") , 30)) +
# 设置节点大小的范围
scale_size_continuous( range = c(1,10) ) +
theme_void() +
theme(
legend.position="none",
plot.margin=unit(c(0,0,0,0),"cm"),
) +
expand_limits(x = c(-1.3, 1.3), y = c(-1.3, 1.3))
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=Chinese (Simplified)_China.936
## [2] LC_CTYPE=Chinese (Simplified)_China.936
## [3] LC_MONETARY=Chinese (Simplified)_China.936
## [4] LC_NUMERIC=C
## [5] LC_TIME=Chinese (Simplified)_China.936
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] RColorBrewer_1.1-2 forcats_0.4.0 stringr_1.4.0
## [4] dplyr_1.0.2 purrr_0.3.2 readr_1.3.1
## [7] tidyr_1.1.2 tibble_2.1.3 tidyverse_1.2.1
## [10] igraph_1.2.4.1 ggraph_2.0.3 ggplot2_3.2.0
## [13] ggdendro_0.1.22
##
## loaded via a namespace (and not attached):
## [1] ggrepel_0.8.1 Rcpp_1.0.5 lubridate_1.7.4
## [4] lattice_0.20-38 assertthat_0.2.1 digest_0.6.20
## [7] ggforce_0.3.2 R6_2.4.0 cellranger_1.1.0
## [10] backports_1.1.4 evaluate_0.14 httr_1.4.0
## [13] pillar_1.4.2 rlang_0.4.7 lazyeval_0.2.2
## [16] readxl_1.3.1 rstudioapi_0.10 rmarkdown_1.13
## [19] labeling_0.3 polyclip_1.10-0 munsell_0.5.0
## [22] broom_0.5.2 compiler_3.6.0 modelr_0.1.4
## [25] xfun_0.8 pkgconfig_2.0.2 htmltools_0.3.6
## [28] tidyselect_1.1.0 gridExtra_2.3 graphlayouts_0.7.0
## [31] viridisLite_0.3.0 crayon_1.3.4 withr_2.1.2
## [34] MASS_7.3-51.4 grid_3.6.0 nlme_3.1-139
## [37] jsonlite_1.6 gtable_0.3.0 lifecycle_0.2.0
## [40] magrittr_1.5 scales_1.0.0 cli_1.1.0
## [43] stringi_1.4.3 farver_1.1.0 viridis_0.5.1
## [46] xml2_1.2.0 generics_0.0.2 vctrs_0.3.2
## [49] tools_3.6.0 glue_1.4.2 tweenr_1.0.1
## [52] hms_0.4.2 yaml_2.2.0 colorspace_1.4-1
## [55] tidygraph_1.2.0 rvest_0.3.4 knitr_1.23
## [58] haven_2.3.1