Created
May 26, 2025 02:50
-
-
Save larsvilhuber/d212f924b1b96ebe2a547fe7b1d2c1bd to your computer and use it in GitHub Desktop.
Day 1 Cleaning code
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import delimited "data/country-codes.csv", varnames(1) case(preserve) encoding("utf-8") clear | |
keep ISO31661Alpha2 ISO31661Alpha3 | |
save "data/country-codes.dta", replace | |
import delimited "data/ted-sample.csv", varnames(1) case(preserve) encoding("utf-8") clear | |
keep ID_NOTICE_CAN YEAR CAE_NAME ISO_COUNTRY_CODE VALUE_EURO WIN_NAME WIN_COUNTRY_CODE | |
rename ISO_COUNTRY_CODE ISO31661Alpha2 | |
merge m:1 ISO31661Alpha2 using "data/derived/country-codes.dta", nogenerate keep(master match) | |
rename ISO31661Alpha3 iso_d | |
drop ISO31661Alpha2 | |
rename WIN_COUNTRY_CODE ISO31661Alpha2 | |
merge m:1 ISO31661Alpha2 using "data/derived/country-codes.dta", nogenerate keep(master match) | |
rename ISO31661Alpha3 iso_o | |
* Romania country code has changed between datasets | |
replace iso_o = "ROM" if iso_o == "ROU" | |
replace iso_d = "ROM" if iso_d == "ROU" | |
collapse (sum) VALUE_EURO (count) N = VALUE_EURO, by(iso_o iso_d) | |
* there are 1,782 unmatched observations, but forget about them for now | |
merge m:1 iso_o iso_d using "data/dist_cepii.dta", nogenerate keep(match) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment