set.seed(321) # For reproducibility
# Loading packages -------------------------------------------------------------
::p_load(
pacman# Easily Install and Load the 'Tidyverse'
tidyverse, # Working with chess PGN notation
rchess, # Improved Text Rendering Support for 'ggplot2'
ggtext, # Using Fonts More Easily in R Graphs
showtext, # Makes theming plots easier
ggeasy, # Parallel processing
furrr, # Interpreted String Literals
glue, # Pixel Filters for "ggplot2" and "grid"
ggfx
)
# Visualization Parameters -----------------------------------------------------
# Plot aesthetics
<- "gray20"
title_col <- "gray20"
subtitle_col <- "gray30"
caption_col <- "gray20"
text_col <- paletteer::paletteer_d("peRReo::don")[c(5,9)]
col_palette
<- c("burlywood3", "burlywood4") # Traditional board colours
chess_tiles
# Icons
<- str_glue("#TidyTuesday: { 2024 } Week { 40 } • Source: Chess Game Dataset (Lichess)<br>")
tt <- str_glue("<span style='font-family:fa6-brands'></span>")
li <- str_glue("<span style='font-family:fa6-brands'></span>")
gh
# Text
<- str_glue("Positional movements of major chess pieces")
title_text <- str_glue("{tt} {li} c-monaghan • {gh} c-monaghan • #rstats #ggplot2")
caption_text # Fonts
font_add("fa6-brands", here::here("fonts/6.4.2/Font Awesome 6 Brands-Regular-400.otf"))
font_add_google("Oswald", regular.wt = 400, family = "title")
font_add_google("Noto Sans", regular.wt = 400, family = "caption")
showtext_auto(enable = TRUE)
# Plot theme
theme_set(theme_void(base_size = 14, base_family = "text"))
theme_update(
plot.title.position = "plot",
plot.caption.position = "plot",
legend.position = 'plot',
panel.grid = element_blank(),
strip.text = element_text(size = 20, face = "bold",
margin = margin(0, 0, 2, 0)),
axis.text = element_blank(),
axis.ticks = element_blank()
)
# Variables --------------------------------------------------------------------
# Paths
<- "posts/2024/"
path <- "2024-09-28-TT-W40/"
folder
# White Pieces
<- c("a1 Rook", "b1 Knight", "c1 Bishop", "White Queen",
white_pieces "White King", "f1 Bishop", "g1 Knight", "h1 Rook")
# Reading in data --------------------------------------------------------------
<- tidytuesdayR::tt_load(2024, week = 40)
data <- data$chess data
This document analyzes a dataset of chess games from Lichess provided by #TidyTuesday. It contains over 20,000 games, including information such as player ratings, move sequences, and other metrics. The main focus of this analysis will be on the specific moves played during each game.
Setting up
Chess Data
The dataset includes the move sequences made by each player, represented as a string of moves. Below is a preview of the first two rows of the dataset.
%>%
data select(game_id, moves) %>%
mutate(game_id = seq(1:nrow(data))) %>%
rename(ID = game_id, Moves = moves) %>%
head(n = 2)
# A tibble: 2 × 2
ID Moves
<int> <chr>
1 1 d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5 Bf4
2 2 d4 Nc6 e4 e5 f4 f6 dxe5 fxe5 fxe5 Nxe5 Qd4 Nc6 Qe5+ Nxe5 c4 Bb4+
Each row displays the move sequence for a game, starting with White’s move. For example, the Slav Defense (1. d4 d5 2. c4 c6) or the Nimzowitsch Defense (1. d4 Nc6 2. e4 e5). Chess moves are typically recorded using Portable Game Notation (PGN), which makes it easy to replicate games.
PGN Conversion
To facilitate further analysis, we will convert the raw move strings into the PGN format, using a custom convert_to_pgn()
function that I wrote.
# Convert moves into PGN format ------------------------------------------------
<- function(moves, game_id) {
convert_to_pgn
# Split the moves string into a list of individual moves
<- strsplit(moves, " ")[[1]]
move_list
# Initialize an empty string for the PGN format
<- ""
pgn
# Loop through the moves two at a time (each move is a pair: white and black)
for (i in seq(1, length(move_list), by = 2)) {
<- (i + 1) / 2 # Move number calculation
move_number
# Add both white's and black's moves, if available
if (i < length(move_list)) {
<- paste0(pgn, move_number, ". ", move_list[i], " ", move_list[i+1], " ")
pgn else { # In case the game ends on white's move (no black move)
} <- paste0(pgn, move_number, ". ", move_list[i])
pgn
}
}
# Returning pgn string
return(pgn)
}
# Converting moves into pgn format
<- data %>%
chess_games select(game_id, moves) %>%
mutate(
game_id = seq(1:nrow(data)),
moves = mapply(convert_to_pgn, moves, game_id))
# Displaying converted dataset
%>%
chess_games rename(ID = game_id, Moves = moves) %>%
head(n = 2)
# A tibble: 2 × 2
ID Moves
<int> <chr>
1 1 "1. d4 d5 2. c4 c6 3. cxd5 e6 4. dxe6 fxe6 5. Nf3 Bb4+ 6. Nc3 Ba5 7. Bf…
2 2 "1. d4 Nc6 2. e4 e5 3. f4 f6 4. dxe5 fxe5 5. fxe5 Nxe5 6. Qd4 Nc6 7. Qe…
Extracting game history
Even with parallel processing the below code takes extremely long to run when rendering in quarto (4+ hrs). In a normal R session it takes about 20 minutes. I am not quite sure as to why the time difference is so extreme. However, to make my life a little easier and so I’m not waiting for hours when I need to re-render this document I will be reading in the data that this code outputs from an excel file.
However, for those interested I have left the parallel processing code below
Now that the data is in PGN format, we can use the history_detail()
function from the rhcess
package to extract the game history. However, because the dataset contains over 20,000 rows, processing every game sequentially would be time-consuming. To speed up this process, we will implement parallel processing using the furrr
package.
# Function to extract game history
<- function(p) {
process_moves <- Chess$new() # Initialize a new chess object
chss $load_pgn(p) # Load the PGN notation into the chess object
chss$history_detail() # Extract detailed history of the game
chss
}
plan(multisession, workers = parallel::detectCores() - 1)
# Parallel processing: Convert PGN format to game history
<- chess_games %>%
chess_games mutate(data = future_map(moves, process_moves)) %>%
select(-moves) %>%
unnest(cols = c(data))
The game history data contains 1,393,586 observations on both white and black’s moves. For the purpose of analysis we are only interested in plotting white’s moves. Additionally, we will sample 25,000 moves from white so as not to create an over-populated graph.
# White pieces
<- c(
white_pieces "a1 Rook", "b1 Knight", "c1 Bishop", "White Queen",
"White King", "f1 Bishop", "g1 Knight", "h1 Rook")
<- chess_games %>%
chess_games filter(piece %in% white_pieces) %>%
sample_n(25000)
<- readxl::read_xlsx(path = here::here("data/chess_history.xlsx"))
chess_games
%>%
chess_games select(game_id, piece, from, to) %>%
head(n = 4)
# A tibble: 4 × 4
game_id piece from to
<dbl> <chr> <chr> <chr>
1 7298 b1 Knight b1 d2
2 16254 g1 Knight e5 c4
3 19492 h1 Rook h1 e1
4 10827 White Queen b6 e3
Positional Movements
Creating a chess board
First, we will create a chessboard using ggplot
. We can do this using the .chessboard()
function, again from the rchess
package. This function generates the grid and coordinates for each square which can piped into ggplot.
# Creating a chess board -------------------------------------------------------
<- rchess:::.chessboarddata() %>%
board tibble() %>%
select(cell, col, row, x, y, cc)
%>%
board head(n = 4)
# A tibble: 4 × 6
cell col row x y cc
<chr> <chr> <int> <int> <int> <chr>
1 a1 a 1 1 1 b
2 b1 b 1 2 1 w
3 c1 c 1 3 1 b
4 d1 d 1 4 1 w
Adding board movements
Next, we will join the chessboard data with the game data to calculate the origin and destination of each piece movement. This allows us to visualize the trajectories of pieces as they move across the board.
# Join board data with game data to get move origin and destination
<- chess_games %>%
chess_games left_join(board %>% rename(from = cell, x.from = x, y.from = y), by = "from") %>%
left_join(board %>%
rename(to = cell, x.to = x, y.to = y) %>%
select(-cc, -col, -row), # Exclude unnecessary columns
by = "to") %>%
mutate(x_gt_y = abs(x.to - x.from) > abs(y.to - y.from), # Check if x movement is greater than y
xy_sign = sign((x.to - x.from)*(y.to - y.from)) == 1, # Check if X and Y move in the same direction
x_gt_y_equal_xy_sign = x_gt_y == xy_sign) # Check if both conditions hold
# Previewing
%>%
chess_games select(piece, from, to, x.from, y.from, x.to, y.to) %>%
head(n = 4)
# A tibble: 4 × 7
piece from to x.from y.from x.to y.to
<chr> <chr> <chr> <int> <int> <int> <int>
1 b1 Knight b1 d2 2 1 4 2
2 g1 Knight e5 c4 5 5 3 4
3 h1 Rook h1 e1 8 1 5 1
4 White Queen b6 e3 2 6 5 3
Plotting piece movements
Now we can visualize their paths on a chessboard.
<- chess_games %>%
p ggplot() +
# Adding board data
geom_tile(data = board, aes(x, y, fill = cc)) +
# Adding piece movement
geom_curve(
data = chess_games %>% filter(x_gt_y_equal_xy_sign),
aes(x = x.from, y = y.from, xend = x.to, yend = y.to),
position = position_jitter(width = 0.2, height = 0.2),
curvature = 0.50, angle = -45, alpha = 0.02, color = "white",
linewidth = 1.02) +
geom_curve(
data = chess_games %>% filter(!x_gt_y_equal_xy_sign),
aes(x = x.from, y = y.from, xend = x.to, yend = y.to),
position = position_jitter(width = 0.2, height = 0.2),
curvature = -0.50, angle = 45, alpha = 0.02, color = "white",
linewidth = 1.02) +
# Labs
labs(
title = title_text,
caption = caption_text,
x = NULL,
y = NULL) +
# Scales
scale_fill_manual(values = chess_tiles) +
coord_equal() +
# Facet
facet_wrap(~ factor(piece, levels = white_pieces), ncol = 4) +
# Theme
theme(
plot.title = element_text(
size = 50,
family = "title",
face = "bold",
colour = title_col,
lineheight = 1.1,
hjust = 0.5,
margin = margin(t = 5, b = 5)),
plot.caption = element_markdown(
size = rel(1.25),
family = "caption",
colour = caption_col,
lineheight = 1.1,
hjust = 0.5,
margin = margin(t = 5, b = 5))
)
p
Saving
# Saving plot
ggsave(
filename = here::here(path, folder, "tt_2024_w40.png"),
plot = p,
width = 7.5,
height = 5,
units = "in",
dpi = 320
)
# Thumbnail
::image_read(here::here(path, folder, "tt_2024_w40.png")) |>
magick::image_resize(geometry = "800") |>
magick::image_write(here::here(path, "thumbnails/tt_2024_w40_thumb.png")) magick
Appendix
─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.4.1 (2024-06-14 ucrt)
os Windows 11 x64 (build 22631)
system x86_64, mingw32
ui RTerm
language (EN)
collate English_Ireland.utf8
ctype English_Ireland.utf8
tz Europe/Dublin
date 2024-10-09
pandoc 3.2 @ C:/Coding/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
quarto 1.5.57
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
dplyr * 1.1.4 2023-11-17 [1] CRAN (R 4.4.1)
forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.4.1)
furrr * 0.3.1 2022-08-15 [1] CRAN (R 4.4.1)
future * 1.34.0 2024-07-29 [1] CRAN (R 4.4.1)
ggeasy * 0.1.4 2023-03-12 [1] CRAN (R 4.4.1)
ggfx * 1.0.1 2022-08-22 [1] CRAN (R 4.4.1)
ggplot2 * 3.5.1 2024-04-23 [1] CRAN (R 4.4.1)
ggtext * 0.1.2 2022-09-16 [1] CRAN (R 4.4.1)
glue * 1.7.0 2024-01-09 [1] CRAN (R 4.4.1)
lubridate * 1.9.3 2023-09-27 [1] CRAN (R 4.4.1)
purrr * 1.0.2 2023-08-10 [1] CRAN (R 4.4.1)
rchess * 0.1 2024-09-26 [1] Github (jbkunst/rchess@7982953)
readr * 2.1.5 2024-01-10 [1] CRAN (R 4.4.1)
sessioninfo * 1.2.2 2021-12-06 [1] CRAN (R 4.4.1)
showtext * 0.9-7 2024-03-02 [1] CRAN (R 4.4.1)
showtextdb * 3.0 2020-06-04 [1] CRAN (R 4.4.1)
stringr * 1.5.1 2023-11-14 [1] CRAN (R 4.4.1)
sysfonts * 0.8.9 2024-03-02 [1] CRAN (R 4.4.1)
tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.4.1)
tidyr * 1.3.1 2024-01-24 [1] CRAN (R 4.4.1)
tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.4.1)
[1] C:/Coding/R-4.4.1/library
──────────────────────────────────────────────────────────────────────────────
Citation
@online{monaghan2024,
author = {Monaghan, Cormac},
title = {Visualising Positional Moves in Chess},
date = {2024-09-28},
url = {https://c-monaghan.github.io/posts/2024/2024-09-28-TT-W40/},
langid = {en}
}