Created
December 17, 2024 14:08
-
-
Save knbknb/ab2ff6e38efd70d0277c0be6fcdb64ba to your computer and use it in GitHub Desktop.
".sh for data" Cheatsheet: curl, in2csv, csvkit...
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
curl -LOC myurl | |
# - L: folllow redirects | |
# - O: use Original Remote Filename | |
# - C: Continue interrupted downloads | |
wget -bc myurl | |
# -b : do it in background | |
# -c : continue broken downloads | |
in2csv | |
# Print all sheet names (works only for Excel Files) | |
in2csv --names MyFile.xlsx | |
in2csv MyData.xlsx --sheet "Sheet1" > MyData.csv | |
# (when there is only 1 Sheet, this is unnecessary) | |
csvlook MyData.csv | |
csvcut --names MyData.csv # print names. -n works, too | |
csvstat MyData.csv | |
csvcut -c 1,3 MyData.csv #no space | |
csvcut -c "Close,Open" #no space | |
csvcut -c "Close","Open" #no space, also works | |
csvgrep -m -r -f #use one of these three | |
# -m: exact row value to filter | |
# -r: regex pattern to filter | |
# -f: path to file | |
csvstack: stack multiple csv Files | |
# - do this only if filenames are equal or | |
# -g: add extra column called "group", | |
# to prepend something to column 1, e.g. the filename | |
# -n: do not use "group" as default column name, but your own, e.g. "source" | |
csvstack -g "mydata1,mydata2" MyData1.csv MyData2.csv > MyData.csv | |
csvsort -c 2 MyData | |
# four slashes! | |
sql2csv --db "sqlite:////myDB.db" \ | |
--query "Select * from table1" | |
sql2csv -l --db "sqlite:////$HOME/code/git/football-data-collection/football-data-small.sqlite" \ | |
--query "select * from Match" | csvlook | |
# -l: linenumbers | |
# use tow files for input when joining data | |
csvsql --query "$join_query" \ | |
Spotify_MusicAttributes.csv \ | |
Spotify_Popularity.csv > Spotify_FullData.csv | |
# creates a new table | |
csvsql --db ... MyData.csv --insert |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment