Analysis

Load Packages

# Load Packages
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
df <- read_csv("data/integrated_Education_EN copy.csv")
Rows: 108 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): level, type
dbl (2): start_year, no_of_students

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Data Analysis

1. What are the most common types of SEN students?

Creating dataframe for total number of primary students over the years

# Creating a dataframe for total primary SEN students 
ccp <- df |>
  filter(level == "Primary") |>
  group_by(level, type) |>
  summarise(n = sum(no_of_students), .groups = 'drop') |>
  arrange(desc(n))
head (ccp)
# A tibble: 6 × 3
  level   type      n
  <chr>   <chr> <dbl>
1 Primary SpLD  72470
2 Primary ASD   41430
3 Primary AD/HD 34500
4 Primary SLI   19940
5 Primary ID     6220
6 Primary HI     2220

Creating a bar graph

# Creating a bar graph based on previously filtered dataset
library(ggplot2)
library(forcats)

ccp |>
  ggplot(aes(x = fct_reorder(type, n), y = n, fill = type)) +
  geom_col() +
  labs(title = "Number of Students with SEN Conditions (Primary Level)",
       x = "Type of SEN Conditions",
       y = "Number of Students with that specific SEN from 2019 to 2024") +
  theme_bw() +
  coord_flip() +
  geom_text(aes(label = n), vjust = -0.5, hjust = 1.1, color = "white") +
  scale_fill_brewer(palette = "Set3")  # Choose a color palette

Installing Patchwork Package

# Installing patchwork package
options(repos = c(CRAN = "https://cloud.r-project.org/"))
install.packages("patchwork")

The downloaded binary packages are in
    /var/folders/ws/7cptw1bn6zg08wbvmf213h280000gn/T//RtmpJF1pdg/downloaded_packages
# Loading patchwork package
library(patchwork)

Creating dataframe for total number of secondary students over the years

# Creating a dataframe for total secondary SEN sutdents 
ccs <- df |>
  filter(level == "Secondary") |>
  group_by(level, type) |>
  summarise(m = sum(no_of_students), .groups = 'drop') |>
  arrange(desc(m)) 
head(ccs)
# A tibble: 6 × 3
  level     type      m
  <chr>     <chr> <dbl>
1 Secondary SpLD  78180
2 Secondary AD/HD 54190
3 Secondary ASD   33800
4 Secondary MI     5390
5 Secondary ID     4910
6 Secondary SLI    3940

Creating a bar graph

# Creating a bar graph based on previously filtered dataset
library(ggplot2)
library(forcats)

ccs |>
  ggplot(aes(x = fct_reorder(type, m), y = m, fill = type)) +
  geom_col() +
  labs(title = "Number of Students with SEN Conditions (Secondary Level)",
       x = "Type of SEN Conditions",
       y = "Number of Students from 2019 to 2024") +
  theme_bw() +
  coord_flip() +
  geom_text(aes(label = m), vjust = -0.5, hjust = 1.1, color = "white") +
  scale_fill_brewer(palette = "Set3") +
  scale_y_continuous(limits = c(0, 80000))  # Set y-axis limits

Merging the primary and secondary bar graphs to showcase the most common types of SEN student

# Merging two bar graphs together
library(ggplot2)
library(forcats)
library(patchwork)

## Calculate the order for the primary level graph
type_order <- ccp |>
  group_by(type) |>
  summarise(total = sum(n)) |>
  arrange(total) |>
  pull(type)

# Set factor levels for primary data frame
ccp$type <- factor(ccp$type, levels = type_order)

# Calculate the order for the secondary level graph based on the same total counts
ccs_order <- ccs |>
  group_by(type) |>
  summarise(total = sum(m)) |>
  arrange(total) |>
  pull(type)

# Set factor levels for secondary data frame
ccs$type <- factor(ccs$type, levels = ccs_order)

## Define color palette using Set3
color_palette <- RColorBrewer::brewer.pal(n = length(type_order), name = "Set3")

# Create a named vector for consistent color mapping
color_mapping <- setNames(color_palette, type_order)

## Primary Level Graph without legend
p1 <- ccp |>
  ggplot(aes(x = type, y = n, fill = type)) +
  geom_col(width = 0.8) +
  labs(title = "Number of Students with SEN Conditions (Primary Level)",
       x = "Type of SEN Conditions",
       y = "   ") +
  theme_bw() +
  coord_flip() +
  geom_text(aes(label = n), vjust = 0.5, hjust = -0.5, color = "black", size = 2) +  # Text outside the bar
  scale_fill_manual(values = color_mapping, guide = "none") +  # Use consistent color mapping
  scale_y_continuous(limits = c(0, 80000))  # Set y-axis limits 

# Secondary Level Graph
p2 <- ccs |>
  ggplot(aes(x = type, y = m, fill = type)) +
  geom_col(width = 0.8) +
  labs(title = "Number of Students with SEN Conditions (Secondary Level)",
       x = "Type of SEN Conditions",
       y = "Number of Students with that specific SEN from 2019 to 2024") +
  theme_bw() +
  coord_flip() +
  geom_text(aes(label = m), vjust = 0.5, hjust = -0.5, color = "black", size = 2) +  # Text outside the bar
  scale_fill_manual(values = color_mapping) +  # Use consistent color mapping
  scale_y_continuous(limits = c(0, 80000)) +  # Set y-axis limits
  theme(legend.position = "right", 
        legend.box = "vertical", 
        legend.margin = margin(t = -100))  # Adjust top margin to move legend higher 

## Combine the plots with a shared legend and add a caption
combined_plot <- p1 + p2 + 
  plot_layout(ncol = 1) + 
  plot_annotation(title = "Students with SEN Conditions from 2019 to 2024", 
                  caption = "Source: DATA.GOV.HK: Chan Lok Yiu Chloe")

combined_plot

ggsave("bar_ps_graph.png")
Saving 7 x 5 in image

2. When did the number of SEN students begin to increase rapidly?

Creating dataframe for primary SEN students over the years

# Creating a dataframe for numbers of primary SEN students over the years 
pri <- df|>
  filter(level == "Primary") |>
  count(type, start_year, no_of_students)
head(pri)
# A tibble: 6 × 4
  type  start_year no_of_students     n
  <chr>      <dbl>          <dbl> <int>
1 AD/HD       2019           5500     1
2 AD/HD       2020           6030     1
3 AD/HD       2021           5830     1
4 AD/HD       2022           5750     1
5 AD/HD       2023           5670     1
6 AD/HD       2024           5720     1

Creating dataframe for secondary SEN students over the years

# Creating a dataframe for numbers of secondary SEN students over the years 
sec <- df|>
  filter(level == "Secondary") |>
  count(type, start_year, no_of_students)
head(sec)
# A tibble: 6 × 4
  type  start_year no_of_students     n
  <chr>      <dbl>          <dbl> <int>
1 AD/HD       2019           7660     1
2 AD/HD       2020           8550     1
3 AD/HD       2021           9000     1
4 AD/HD       2022           9290     1
5 AD/HD       2023           9650     1
6 AD/HD       2024          10040     1

Interactive line graph to explain when the number of Secondary SEN students began to increase

# Interactive line graph for the number of secondary SEN students from 2019-2024
library(ggplot2)
library(dplyr)
library(plotly)

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
library(RColorBrewer)
library(colorspace)  

## Count the number of SEN students by year and type
data <- pri |>
  count(start_year, no_of_students, type)

## Define color palette
original_palette <- brewer.pal(n = length(unique(data$type)), name = "Set3")
darkened_palette <- darken(original_palette, amount = 0.3)  

## Create the line graph with ggplot2
p <- ggplot(data, aes(x = start_year, y = no_of_students, color = type, group = type)) +
  geom_line(size = 1, linetype = "dotted") + 
  geom_point(size = 2) +  
  scale_color_manual(values = darkened_palette) +  
  labs(title = "Number of Primary SEN Students by Type and Year",
       x = "Year",
       y = "Number of SEN Students") + 
  theme_bw() +
  theme(legend.position = "right")
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
## Convert ggplot to an interactive plotly graph
interactive_plot1 <- ggplotly(p, tooltip = c("start_year", "no_of_students", "type"))

# Add caption to the plot
interactive_plot1 <- interactive_plot1 %>%
  layout(
    annotations = list(
      text = "Source: DATA.GOV.HK: Chan Lok Yiu Chloe",
      x = 1.1,
      y = -0.12,
      xref = 'paper',
      yref = 'paper',
      showarrow = FALSE,
      font = list(size = 10)
    )
  )

# Show the interactive plot
interactive_plot1

Interactive line graph to explain when the number of Secondary SEN students began to increase

library(ggplot2)
library(dplyr)
library(plotly)
library(RColorBrewer)
library(colorspace)  

## Count the number of SEN students by year and type
data <- sec |>
  count(start_year, no_of_students, type)

## Define color palette
original_palette <- brewer.pal(n = length(unique(data$type)), name = "Set3")
darkened_palette <- darken(original_palette, amount = 0.3)  

## Create the line graph with ggplot2
p <- ggplot(data, aes(x = start_year, y = no_of_students, color = type, group = type)) +
  geom_line(size = 1, linetype = "dotted") + 
  geom_point(size = 2) +  
  scale_color_manual(values = darkened_palette) +  
  labs(title = "Number of Secondary SEN Students by Type and Year",
       x = "Year",
       y = "Number of SEN Students") + 
  theme_bw() +
  theme(legend.position = "right")

## Convert ggplot to an interactive plotly graph
interactive_plot2 <- ggplotly(p, tooltip = c("start_year", "no_of_students", "type"))

## Add caption to the plot
interactive_plot2 <- interactive_plot2 %>%
  layout(
    annotations = list(
      text = "Source: DATA.GOV.HK: Chan Lok Yiu Chloe",
      x = 1.1,
      y = -0.12,
      xref = 'paper',
      yref = 'paper',
      showarrow = FALSE,
      font = list(size = 10)
    )
  )

## Show the interactive plot
interactive_plot2