library(readxl)
library(httr2)
library(tidyverse)
# Define file URL and destination path
url <- "https://www.jobsandskills.gov.au/sites/default/files/2024-12/occupation_profiles_data_-_november_2024.xlsx"
destfile <- "occupation_profiles_data.xlsx"
# Create request with headers
resp <- request(url) |>
req_headers(
`User-Agent` = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:135.0) Gecko/20100101 Firefox/135.0",
Referer = "https://www.jobsandskills.gov.au/data/occupation-and-industry-profiles/occupations/2243-economists",
) |>
req_perform()
# Write the response body (file content) to a local file
writeBin(resp$body, destfile)
table_1 <- read_excel(destfile, sheet = "Table_1", skip = 5, na = "N/A")
table_1
#> # A tibble: 1,236 × 8
#> `ANZSCO Code` Occupation Employed `Part-time share (%)` `Female share (%)`
#> <dbl> <chr> <chr> <dbl> <dbl>
#> 1 1111 Chief Execut… 55800 16 30
#> 2 1112 General Mana… 71500 9 30
#> 3 1113 Legislators 3100 19 34
#> 4 1211 Aquaculture … 1800 19 24
#> 5 1212 Crop Farmers 37600 19 23
#> 6 1213 Livestock Fa… 80400 30 34
#> 7 1214 Mixed Crop a… 27000 10 20
#> 8 1311 Advertising,… 166300 11 48
#> 9 1321 Corporate Se… 23900 24 66
#> 10 1322 Finance Mana… 75100 9 44
#> # ℹ 1,226 more rows
#> # ℹ 3 more variables: `Median weekly earnings` <dbl>, `Median age` <dbl>,
#> # `Annual employment growth` <dbl>
table_2 <- read_excel(destfile, sheet = "Table_2", skip = 5, na = "N/A")
table_2
#> # A tibble: 1,236 × 3
#> `ANZSCO Code` Occupation Description
#> <dbl> <chr> <chr>
#> 1 1111 Chief Executives and Managing Directors Chief Executiv…
#> 2 1112 General Managers General Manage…
#> 3 1113 Legislators Legislators re…
#> 4 1211 Aquaculture Farmers Aquaculture Fa…
#> 5 1212 Crop Farmers Crop Farmers p…
#> 6 1213 Livestock Farmers Livestock Farm…
#> 7 1214 Mixed Crop and Livestock Farmers Mixed Crop and…
#> 8 1311 Advertising, Public Relations and Sales Manager Advertising, P…
#> 9 1321 Corporate Services Managers Corporate Serv…
#> 10 1322 Finance Managers Finance Manage…
#> # ℹ 1,226 more rows
table_3 <- read_excel(destfile, sheet = "Table_3", skip = 5, na = "N/A")
table_3
#> # A tibble: 8,113 × 3
#> `ANZSCO Code` Occupation Tasks
#> <dbl> <chr> <chr>
#> 1 1111 Chief Executives and Managing Directors Determining objectives…
#> 2 1111 Chief Executives and Managing Directors Providing overall dire…
#> 3 1111 Chief Executives and Managing Directors Authorising material, …
#> 4 1111 Chief Executives and Managing Directors Monitoring and evaluat…
#> 5 1111 Chief Executives and Managing Directors Consulting with senior…
#> 6 1111 Chief Executives and Managing Directors Preparing, or arrangin…
#> 7 1111 Chief Executives and Managing Directors Representing the organ…
#> 8 1111 Chief Executives and Managing Directors Selecting and approvin…
#> 9 1111 Chief Executives and Managing Directors Ensuring the organisat…
#> 10 1112 General Managers Planning policy, and s…
#> # ℹ 8,103 more rows
table_4 <- read_excel(destfile, sheet = "Table_4", skip = 5, na = "N/A")
table_4
#> # A tibble: 1,236 × 6
#> `ANZSCO Code` Occupation Share of workers who…¹ Average full-time ho…²
#> <dbl> <chr> <dbl> <dbl>
#> 1 1111 Chief Executives… 84 50
#> 2 1112 General Managers 91 47
#> 3 1113 Legislators 81 52
#> 4 1211 Aquaculture Farm… 81 46
#> 5 1212 Crop Farmers 81 51
#> 6 1213 Livestock Farmers 70 55
#> 7 1214 Mixed Crop and L… 90 55
#> 8 1311 Advertising, Pub… 89 44
#> 9 1321 Corporate Servic… 76 44
#> 10 1322 Finance Managers 91 45
#> # ℹ 1,226 more rows
#> # ℹ abbreviated names: ¹`Share of workers who work full-time hours (%)`,
#> # ²`Average full-time hours worked per week`
#> # ℹ 2 more variables: `Median full-time earnings per week` <dbl>,
#> # `Median full-time hourly earnings` <dbl>
table_5 <- read_excel(destfile, sheet = "Table_5", skip = 5, na = "N/A")
table_5
#> # A tibble: 3,708 × 3
#> `ANZSCO Code` Occupation `Industry (ranked)`
#> <dbl> <chr> <chr>
#> 1 1111 Chief Executives and Managing Directors Professional, Scientif…
#> 2 1111 Chief Executives and Managing Directors Manufacturing
#> 3 1111 Chief Executives and Managing Directors Health Care and Social…
#> 4 1112 General Managers Health Care and Social…
#> 5 1112 General Managers Manufacturing
#> 6 1112 General Managers Professional, Scientif…
#> 7 1113 Legislators Public Administration …
#> 8 1113 Legislators <NA>
#> 9 1113 Legislators <NA>
#> 10 1211 Aquaculture Farmers Agriculture, Forestry …
#> # ℹ 3,698 more rows
table_6 <- read_excel(destfile, sheet = "Table_6", skip = 5, na = "N/A")
table_6 |>
pivot_longer(cols = -1:-2) |>
mutate(name = str_trim(str_replace(name, "\\s.*$", "")),
value = value / 100)
#> # A tibble: 9,888 × 4
#> `ANZSCO Code` Occupation name value
#> <dbl> <chr> <chr> <dbl>
#> 1 1111 Chief Executives and Managing Directors NSW 0.334
#> 2 1111 Chief Executives and Managing Directors VIC 0.276
#> 3 1111 Chief Executives and Managing Directors QLD 0.172
#> 4 1111 Chief Executives and Managing Directors SA 0.06
#> 5 1111 Chief Executives and Managing Directors WA 0.108
#> 6 1111 Chief Executives and Managing Directors TAS 0.02
#> 7 1111 Chief Executives and Managing Directors NT 0.014
#> 8 1111 Chief Executives and Managing Directors ACT 0.017
#> 9 1112 General Managers NSW 0.286
#> 10 1112 General Managers VIC 0.284
#> # ℹ 9,878 more rows
table_7 <- read_excel(destfile, sheet = "Table_7", skip = 5, na = "N/A")
table_7 |>
pivot_longer(cols = -1:-2) |>
mutate(name = str_trim(str_replace(name, "\\s.*$", "")),
value = value / 100)
#> # A tibble: 9,888 × 4
#> `ANZSCO Code` Occupation name value
#> <dbl> <chr> <chr> <dbl>
#> 1 1111 Chief Executives and Managing Directors 15 - 19 0
#> 2 1111 Chief Executives and Managing Directors 20 - 24 0.005
#> 3 1111 Chief Executives and Managing Directors 25 - 34 0.069
#> 4 1111 Chief Executives and Managing Directors 35 - 44 0.226
#> 5 1111 Chief Executives and Managing Directors 45 - 54 0.333
#> 6 1111 Chief Executives and Managing Directors 55 - 59 0.147
#> 7 1111 Chief Executives and Managing Directors 60 - 64 0.105
#> 8 1111 Chief Executives and Managing Directors 65 + 0.114
#> 9 1112 General Managers 15 - 19 0.001
#> 10 1112 General Managers 20 - 24 0.008
#> # ℹ 9,878 more rows
table_8 <- read_excel(destfile, sheet = "Table_8", skip = 5, na = "N/A")
table_8 |>
pivot_longer(cols = -1:-2) |>
mutate(name = str_trim(str_replace(name, "\\s.*$", "")),
value = value / 100)
#> # A tibble: 8,652 × 4
#> `ANZSCO Code` Occupation name value
#> <dbl> <chr> <chr> <dbl>
#> 1 1111 Chief Executives and Managing Directors Post Graduate/ G… 0.253
#> 2 1111 Chief Executives and Managing Directors Bachelor degree 0.315
#> 3 1111 Chief Executives and Managing Directors Advanced Diploma… 0.118
#> 4 1111 Chief Executives and Managing Directors Certificate III/… 0.118
#> 5 1111 Chief Executives and Managing Directors Year 12 0.102
#> 6 1111 Chief Executives and Managing Directors Year 11 0.019
#> 7 1111 Chief Executives and Managing Directors Year 10 and below 0.042
#> 8 1112 General Managers Post Graduate/ G… 0.236
#> 9 1112 General Managers Bachelor degree 0.3
#> 10 1112 General Managers Advanced Diploma… 0.152
#> # ℹ 8,642 more rows
table_9 <- read_excel(destfile, sheet = "Table_9", skip = 5, na = "N/A")
# Need some clean-up below
table_9 |>
pivot_longer(cols = -1) |>
mutate(name = str_trim(str_replace(name, "\\s.*$", "")))
#> # A tibble: 42 × 3
#> `All Occupations` name value
#> <chr> <chr> <dbl>
#> 1 4-digit occupations "Female" 48
#> 2 4-digit occupations "Median" 1697
#> 3 4-digit occupations "Median" 39
#> 4 4-digit occupations "Share" 70
#> 5 4-digit occupations "Average" 44
#> 6 4-digit occupations "Median" 43
#> 7 4-digit occupations "Age\r\n15-19" 5.2
#> 8 4-digit occupations "Age\r\n20-24" 9.1
#> 9 4-digit occupations "Age\r\n25-34" 22.7
#> 10 4-digit occupations "Age\r\n35-44" 22.3
#> # ℹ 32 more rows
Created on 2025-02-28 with reprex v2.1.1