In this use case we show how to use tidycenso
to visualize the most important internal migration flows over the last
10 years.
First download the number of people by province of residence in 2011 and 2020.
library(tidycenso)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
df <- get_censo("per.ppal", c("ID_RESI_2011_N2", "ID_RESI_2020_N2"), "SPERSONAS")
head(df)
#> ID_RESI_2011_N2 ID_RESI_2020_N2 SPERSONAS
#> 1 02 Albacete 02 Albacete 324177
#> 2 02 Albacete 03 Alicante/Alacant 3849
#> 3 02 Albacete 04 Almería 366
#> 4 02 Albacete 01 Araba/Álava 72
#> 5 02 Albacete 33 Asturias 165
#> 6 02 Albacete 05 Ávila 42
We then do some data wrangling estimate the net flows, and select the largest ones.
df <-
df %>%
filter(
ID_RESI_2011_N2 != "No consta",
ID_RESI_2020_N2 != "No consta",
) %>%
mutate(across(c(ID_RESI_2020_N2, ID_RESI_2011_N2), ~substr(., 1, 2)))
entries <-
df %>%
rename(to = ID_RESI_2020_N2, from = ID_RESI_2011_N2)
exits <-
df %>%
rename(from = ID_RESI_2020_N2, to = ID_RESI_2011_N2) %>%
mutate(SPERSONAS = -SPERSONAS)
net_flows <-
rbind(entries, exits) %>%
group_by(from, to) %>%
summarise(SPERSONAS = sum(SPERSONAS)) %>%
filter(
SPERSONAS > 0,
!from %in% c("Ex", "No"),
!to %in% c("Ex", "No")
) %>%
ungroup() %>%
slice_max(SPERSONAS, prop = .10)
#> `summarise()` has grouped output by 'from'. You can override using the
#> `.groups` argument.
With the estimated net migration flows, we can now visualize them
We’ll use tidygraph
and ggraph
to create the graph, ggplot2
to
visualize it and mapSpain
to download the geometries of the provinces.
library(tidygraph)
#>
#> Attaching package: 'tidygraph'
#> The following object is masked from 'package:stats':
#>
#> filter
library(ggraph)
#> Loading required package: ggplot2
library(mapSpain)
library(ggplot2)
library(sf)
#> Linking to GEOS 3.12.1, GDAL 3.8.4, PROJ 9.4.0; sf_use_s2() is TRUE
sf_provs <- esp_get_prov() %>%
select(cpro, ine.prov.name)
nodes <-
sf_provs %>%
st_centroid() %>%
st_coordinates() %>%
as.data.frame() %>%
mutate(id = sf_provs$cpro)
#> Warning: st_centroid assumes attributes are constant over geometries
graph <- tbl_graph(nodes, edges = net_flows, node_key = "id", directed = T)
coord_layout <-
create_layout(
graph = graph,
layout = "manual",
x = nodes$X,
y = nodes$Y
)
Once we have created the graph and the layout for it, we can use ggplot2
to
visualise it.
p <-
ggraph(coord_layout) +
geom_sf(data = sf_provs, color = "black", fill = "grey90") +
geom_edge_arc(
aes(
width=SPERSONAS,
alpha = after_stat(index),
color = after_stat(index)
),
lineend = "round", n = 200,
strength = .1) +
scale_edge_width_continuous(range=c(0.1, 1.5)) +
scale_edge_color_gradientn(colors = scales::brewer_pal("seq", direction = 1)(5)[3:5]) +
scale_edge_alpha(range = c(.05, 1)) +
theme_void() +
theme(legend.position = "none")
p