Skip to content

Instantly share code, notes, and snippets.

@iangow
Last active February 28, 2025 20:27
Show Gist options
  • Save iangow/a7f61813c4d82844d0a4c7d5e8e3e5bc to your computer and use it in GitHub Desktop.
Save iangow/a7f61813c4d82844d0a4c7d5e8e3e5bc to your computer and use it in GitHub Desktop.
Code to download spreadsheet from Jobs and Skills.
library(readxl)
library(httr2)
library(tidyverse)

# Define file URL and destination path
url <- "https://www.jobsandskills.gov.au/sites/default/files/2024-12/occupation_profiles_data_-_november_2024.xlsx"
destfile <- "occupation_profiles_data.xlsx"

# Create request with headers
resp <- request(url) |>
  req_headers(
    `User-Agent` = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:135.0) Gecko/20100101 Firefox/135.0",
    Referer = "https://www.jobsandskills.gov.au/data/occupation-and-industry-profiles/occupations/2243-economists",
  ) |>
  req_perform()

# Write the response body (file content) to a local file
writeBin(resp$body, destfile)

table_1 <- read_excel(destfile, sheet = "Table_1", skip = 5, na = "N/A")
table_1
#> # A tibble: 1,236 × 8
#>    `ANZSCO Code` Occupation    Employed `Part-time share (%)` `Female share (%)`
#>            <dbl> <chr>         <chr>                    <dbl>              <dbl>
#>  1          1111 Chief Execut… 55800                       16                 30
#>  2          1112 General Mana… 71500                        9                 30
#>  3          1113 Legislators   3100                        19                 34
#>  4          1211 Aquaculture … 1800                        19                 24
#>  5          1212 Crop Farmers  37600                       19                 23
#>  6          1213 Livestock Fa… 80400                       30                 34
#>  7          1214 Mixed Crop a… 27000                       10                 20
#>  8          1311 Advertising,… 166300                      11                 48
#>  9          1321 Corporate Se… 23900                       24                 66
#> 10          1322 Finance Mana… 75100                        9                 44
#> # ℹ 1,226 more rows
#> # ℹ 3 more variables: `Median weekly earnings` <dbl>, `Median age` <dbl>,
#> #   `Annual employment growth` <dbl>

table_2 <- read_excel(destfile, sheet = "Table_2", skip = 5, na = "N/A")
table_2
#> # A tibble: 1,236 × 3
#>    `ANZSCO Code` Occupation                                      Description    
#>            <dbl> <chr>                                           <chr>          
#>  1          1111 Chief Executives and Managing Directors         Chief Executiv…
#>  2          1112 General Managers                                General Manage…
#>  3          1113 Legislators                                     Legislators re…
#>  4          1211 Aquaculture Farmers                             Aquaculture Fa…
#>  5          1212 Crop Farmers                                    Crop Farmers p…
#>  6          1213 Livestock Farmers                               Livestock Farm…
#>  7          1214 Mixed Crop and Livestock Farmers                Mixed Crop and…
#>  8          1311 Advertising, Public Relations and Sales Manager Advertising, P…
#>  9          1321 Corporate Services Managers                     Corporate Serv…
#> 10          1322 Finance Managers                                Finance Manage…
#> # ℹ 1,226 more rows

table_3 <- read_excel(destfile, sheet = "Table_3", skip = 5, na = "N/A")
table_3
#> # A tibble: 8,113 × 3
#>    `ANZSCO Code` Occupation                              Tasks                  
#>            <dbl> <chr>                                   <chr>                  
#>  1          1111 Chief Executives and Managing Directors Determining objectives…
#>  2          1111 Chief Executives and Managing Directors Providing overall dire…
#>  3          1111 Chief Executives and Managing Directors Authorising material, …
#>  4          1111 Chief Executives and Managing Directors Monitoring and evaluat…
#>  5          1111 Chief Executives and Managing Directors Consulting with senior…
#>  6          1111 Chief Executives and Managing Directors Preparing, or arrangin…
#>  7          1111 Chief Executives and Managing Directors Representing the organ…
#>  8          1111 Chief Executives and Managing Directors Selecting and approvin…
#>  9          1111 Chief Executives and Managing Directors Ensuring the organisat…
#> 10          1112 General Managers                        Planning policy, and s…
#> # ℹ 8,103 more rows

table_4 <- read_excel(destfile, sheet = "Table_4", skip = 5, na = "N/A")
table_4
#> # A tibble: 1,236 × 6
#>    `ANZSCO Code` Occupation        Share of workers who…¹ Average full-time ho…²
#>            <dbl> <chr>                              <dbl>                  <dbl>
#>  1          1111 Chief Executives…                     84                     50
#>  2          1112 General Managers                      91                     47
#>  3          1113 Legislators                           81                     52
#>  4          1211 Aquaculture Farm…                     81                     46
#>  5          1212 Crop Farmers                          81                     51
#>  6          1213 Livestock Farmers                     70                     55
#>  7          1214 Mixed Crop and L…                     90                     55
#>  8          1311 Advertising, Pub…                     89                     44
#>  9          1321 Corporate Servic…                     76                     44
#> 10          1322 Finance Managers                      91                     45
#> # ℹ 1,226 more rows
#> # ℹ abbreviated names: ¹​`Share of workers who work full-time hours (%)`,
#> #   ²​`Average full-time hours worked per week`
#> # ℹ 2 more variables: `Median full-time earnings per week` <dbl>,
#> #   `Median full-time hourly earnings` <dbl>

table_5 <- read_excel(destfile, sheet = "Table_5", skip = 5, na = "N/A")
table_5
#> # A tibble: 3,708 × 3
#>    `ANZSCO Code` Occupation                              `Industry (ranked)`    
#>            <dbl> <chr>                                   <chr>                  
#>  1          1111 Chief Executives and Managing Directors Professional, Scientif…
#>  2          1111 Chief Executives and Managing Directors Manufacturing          
#>  3          1111 Chief Executives and Managing Directors Health Care and Social…
#>  4          1112 General Managers                        Health Care and Social…
#>  5          1112 General Managers                        Manufacturing          
#>  6          1112 General Managers                        Professional, Scientif…
#>  7          1113 Legislators                             Public Administration …
#>  8          1113 Legislators                             <NA>                   
#>  9          1113 Legislators                             <NA>                   
#> 10          1211 Aquaculture Farmers                     Agriculture, Forestry …
#> # ℹ 3,698 more rows

table_6 <- read_excel(destfile, sheet = "Table_6", skip = 5, na = "N/A")
table_6 |>
  pivot_longer(cols = -1:-2) |>
  mutate(name = str_trim(str_replace(name, "\\s.*$", "")),
         value = value / 100)
#> # A tibble: 9,888 × 4
#>    `ANZSCO Code` Occupation                              name  value
#>            <dbl> <chr>                                   <chr> <dbl>
#>  1          1111 Chief Executives and Managing Directors NSW   0.334
#>  2          1111 Chief Executives and Managing Directors VIC   0.276
#>  3          1111 Chief Executives and Managing Directors QLD   0.172
#>  4          1111 Chief Executives and Managing Directors SA    0.06 
#>  5          1111 Chief Executives and Managing Directors WA    0.108
#>  6          1111 Chief Executives and Managing Directors TAS   0.02 
#>  7          1111 Chief Executives and Managing Directors NT    0.014
#>  8          1111 Chief Executives and Managing Directors ACT   0.017
#>  9          1112 General Managers                        NSW   0.286
#> 10          1112 General Managers                        VIC   0.284
#> # ℹ 9,878 more rows

table_7 <- read_excel(destfile, sheet = "Table_7", skip = 5, na = "N/A")
table_7 |>
  pivot_longer(cols = -1:-2) |>
  mutate(name = str_trim(str_replace(name, "\\s.*$", "")),
         value = value / 100)
#> # A tibble: 9,888 × 4
#>    `ANZSCO Code` Occupation                              name    value
#>            <dbl> <chr>                                   <chr>   <dbl>
#>  1          1111 Chief Executives and Managing Directors 15 - 19 0    
#>  2          1111 Chief Executives and Managing Directors 20 - 24 0.005
#>  3          1111 Chief Executives and Managing Directors 25 - 34 0.069
#>  4          1111 Chief Executives and Managing Directors 35 - 44 0.226
#>  5          1111 Chief Executives and Managing Directors 45 - 54 0.333
#>  6          1111 Chief Executives and Managing Directors 55 - 59 0.147
#>  7          1111 Chief Executives and Managing Directors 60 - 64 0.105
#>  8          1111 Chief Executives and Managing Directors 65 +    0.114
#>  9          1112 General Managers                        15 - 19 0.001
#> 10          1112 General Managers                        20 - 24 0.008
#> # ℹ 9,878 more rows

table_8 <- read_excel(destfile, sheet = "Table_8", skip = 5, na = "N/A")
table_8 |>
  pivot_longer(cols = -1:-2) |>
  mutate(name = str_trim(str_replace(name, "\\s.*$", "")),
         value = value / 100)
#> # A tibble: 8,652 × 4
#>    `ANZSCO Code` Occupation                              name              value
#>            <dbl> <chr>                                   <chr>             <dbl>
#>  1          1111 Chief Executives and Managing Directors Post Graduate/ G… 0.253
#>  2          1111 Chief Executives and Managing Directors Bachelor degree   0.315
#>  3          1111 Chief Executives and Managing Directors Advanced Diploma… 0.118
#>  4          1111 Chief Executives and Managing Directors Certificate III/… 0.118
#>  5          1111 Chief Executives and Managing Directors Year 12           0.102
#>  6          1111 Chief Executives and Managing Directors Year 11           0.019
#>  7          1111 Chief Executives and Managing Directors Year 10 and below 0.042
#>  8          1112 General Managers                        Post Graduate/ G… 0.236
#>  9          1112 General Managers                        Bachelor degree   0.3  
#> 10          1112 General Managers                        Advanced Diploma… 0.152
#> # ℹ 8,642 more rows

table_9 <- read_excel(destfile, sheet = "Table_9", skip = 5, na = "N/A")
# Need some clean-up below
table_9 |>
  pivot_longer(cols = -1) |>
  mutate(name = str_trim(str_replace(name, "\\s.*$", "")))
#> # A tibble: 42 × 3
#>    `All Occupations`   name            value
#>    <chr>               <chr>           <dbl>
#>  1 4-digit occupations "Female"         48  
#>  2 4-digit occupations "Median"       1697  
#>  3 4-digit occupations "Median"         39  
#>  4 4-digit occupations "Share"          70  
#>  5 4-digit occupations "Average"        44  
#>  6 4-digit occupations "Median"         43  
#>  7 4-digit occupations "Age\r\n15-19"    5.2
#>  8 4-digit occupations "Age\r\n20-24"    9.1
#>  9 4-digit occupations "Age\r\n25-34"   22.7
#> 10 4-digit occupations "Age\r\n35-44"   22.3
#> # ℹ 32 more rows

Created on 2025-02-28 with reprex v2.1.1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment