library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(canpumf)
options(canpumf.cache_path = Sys.getenv("COMPILE_VIG_CANPUMF"))

The LFS is one of the most-used PUMF series, since January 2021 the LFS PUMF is now easily available for direct download instead of needing to request it via EFT. This makes it very easy to integrate the LFS into reproducible workflows.

The canpumf package has two functions to facilitate access to the LFS PUMF. The first lists all LFS pumf versions that are available for direct download.

list_canpumf_collection() |> 
  filter(Acronym=="LFS")
#> # A tibble: 20 × 5
#>    Title               Acronym Version `Survey Number` url                      
#>    <chr>               <chr>   <chr>   <chr>           <chr>                    
#>  1 Labour Force Survey LFS     2024-02 3701            https://www150.statcan.g…
#>  2 Labour Force Survey LFS     2024-01 3701            https://www150.statcan.g…
#>  3 Labour Force Survey LFS     2023    3701            https://www150.statcan.g…
#>  4 Labour Force Survey LFS     2022    3701            https://www150.statcan.g…
#>  5 Labour Force Survey LFS     2021    3701            https://www150.statcan.g…
#>  6 Labour Force Survey LFS     2020    3701            https://www150.statcan.g…
#>  7 Labour Force Survey LFS     2019    3701            https://www150.statcan.g…
#>  8 Labour Force Survey LFS     2018    3701            https://www150.statcan.g…
#>  9 Labour Force Survey LFS     2017    3701            https://www150.statcan.g…
#> 10 Labour Force Survey LFS     2016    3701            https://www150.statcan.g…
#> 11 Labour Force Survey LFS     2015    3701            https://www150.statcan.g…
#> 12 Labour Force Survey LFS     2014    3701            https://www150.statcan.g…
#> 13 Labour Force Survey LFS     2013    3701            https://www150.statcan.g…
#> 14 Labour Force Survey LFS     2012    3701            https://www150.statcan.g…
#> 15 Labour Force Survey LFS     2011    3701            https://www150.statcan.g…
#> 16 Labour Force Survey LFS     2010    3701            https://www150.statcan.g…
#> 17 Labour Force Survey LFS     2009    3701            https://www150.statcan.g…
#> 18 Labour Force Survey LFS     2008    3701            https://www150.statcan.g…
#> 19 Labour Force Survey LFS     2007    3701            https://www150.statcan.g…
#> 20 Labour Force Survey LFS     2006    3701            https://www150.statcan.g…

The second one fetches and loads the LFS data. For example, to download the LFS pumf for 2022 we use

lfs_2022 <- get_pumf("LFS","2022")

By default the data is stored in the temporary session path, optionally we can set the canpumf.cache_path option to a path to permanently cache the data.

To convert to human readable column lables and column names we use the label_pumf_data function.

lfs_2022 <- lfs_2022 |> label_pumf_data() 

With this we can do some simple descriptive analysis. We could use the add_bootstrap_weights function to add bootstrap wegihts if desired. We focus in on February 2022 and add boodstrap weights. By default this only adds 16 weights, for more serious applications we would want to add more weights.

lfs_2022_02_data <- lfs_2022 |> 
  filter(`Survey month`=="February") |>
  add_bootstrap_weights("Standard final weight",seed=42)

For this vignette we look at gender-specific labour fource status statistics for the 20 to 64 year old population, computing age-adjusted rates to even out age-specific effects.

data <- lfs_2022_02_data %>%
  filter(substr(`Five-year age group of respondent`,0,2) %in% seq(20,60,5)) %>%
  filter(`Labour force status`!="Not in labour force") %>%
  group_by(`Labour force status`,`Five-year age group of respondent`,`Sex of respondent`,
           `Marital status of respondent`) %>%
  summarise(across(matches("Standard final weight|BSW\\d+"),sum),.groups="drop") %>%
  pivot_longer(matches("Standard final weight|BSW\\d+"),names_to="Weight",values_to="Count") %>%
  group_by(`Five-year age group of respondent`,`Sex of respondent`,
           `Marital status of respondent`, Weight) %>%
  mutate(Share=ifelse(Count==0,0,Count/sum(Count))) %>%
  ungroup()

data_age_adjusted <- data %>%
  left_join((.) %>% 
              group_by(`Five-year age group of respondent`,`Sex of respondent`,Weight) %>%
              summarize(Count=sum(Count),.groups="drop") %>%
              group_by(`Sex of respondent`,Weight) %>%
              mutate(P_age__sex=Count/sum(Count)) %>%
              ungroup() %>%
              select(`Sex of respondent`,`Five-year age group of respondent`,Weight,P_age__sex),
            by=c("Sex of respondent","Five-year age group of respondent","Weight")) %>%
  group_by(`Sex of respondent`,`Labour force status`,`Marital status of respondent`, Weight) %>%
  summarise(age_adjusted=sum(Share*P_age__sex),.groups="drop")
  
data_age_adjusted %>%
  filter(`Labour force status`=="Unemployed") %>%
ggplot(aes(x=age_adjusted, y=`Marital status of respondent`, fill=`Sex of respondent`)) +
  geom_boxplot() +
  geom_point(shape=21,data=~filter(.,Weight=="Standard final weight"),position=position_dodge(width=0.75)) +
  scale_x_continuous(labels=scales::percent) +
  labs(title="Unemployment rates of 20 to 64 year olds in February 2022",
       x="Age-adjusted unemployment rate",
       caption="StatCan LFS PUMF 2022-02")

data2 <- lfs_2022_02_data %>%
  filter(substr(`Five-year age group of respondent`,0,2) %in% seq(20,60,5)) %>%
  group_by(`Labour force status`,`Five-year age group of respondent`,`Sex of respondent`,
           `Marital status of respondent`) %>%
  summarise(across(matches("Standard final weight|BSW\\d+"),sum),.groups="drop") %>%
  pivot_longer(matches("Standard final weight|BSW\\d+"),names_to="Weight",values_to="Count") %>%
  group_by(`Five-year age group of respondent`,`Sex of respondent`,
           `Marital status of respondent`, Weight) %>%
  mutate(Share=ifelse(Count==0,0,Count/sum(Count))) %>%
  ungroup()

data_age_adjusted2 <- data2 %>%
  left_join((.) %>% 
              group_by(`Five-year age group of respondent`,`Sex of respondent`,Weight) %>%
              summarize(Count=sum(Count),.groups="drop") %>%
              group_by(`Sex of respondent`,Weight) %>%
              mutate(P_age__sex=Count/sum(Count)) %>%
              ungroup() %>%
              select(`Sex of respondent`,`Five-year age group of respondent`,Weight,P_age__sex),
            by=c("Sex of respondent","Five-year age group of respondent","Weight")) %>%
  group_by(`Sex of respondent`,`Labour force status`,`Marital status of respondent`, Weight) %>%
  summarise(age_adjusted=sum(Share*P_age__sex),.groups="drop")
  
data_age_adjusted2 %>%
  filter(`Labour force status`=="Not in labour force") %>%
ggplot(aes(x=1-age_adjusted, y=`Marital status of respondent`, fill=`Sex of respondent`)) +
  geom_boxplot() +
  geom_point(shape=21,data=~filter(.,Weight=="Standard final weight"),position=position_dodge(width=0.75)) +
  scale_x_continuous(labels=scales::percent) +
  labs(title="Labour force participation rates of 20 to 64 year olds in February 2022",
       x="Age-adjusted participation rate",
       caption="StatCan LFS PUMF 2022-02")

data_age_adjusted2 %>%
  filter(`Labour force status`=="Employed, at work") %>%
ggplot(aes(x=age_adjusted, y=`Marital status of respondent`, fill=`Sex of respondent`)) +
  geom_boxplot() +
  geom_point(shape=21,data=~filter(.,Weight=="Standard final weight"),position=position_dodge(width=0.75)) +
  scale_x_continuous(labels=scales::percent) +
  labs(title="Share of 20 to 64 year olds working in February 2022",
       x="Age-adjusted share at work",
       caption="StatCan LFS PUMF 2022-02")