adewale · May 29, 2020 22:47
diff --git a/gistfile1.txt b/gistfile1.txt
 =Convert javascript data into JSON
 sed -e 's/window.YTD.tweet.part0 = //' ./data/tweet.js > ./data/tweet.json

 =Convert comma-delimited JSON into newline-delimited JSON
 cat data/tweet.json | jq -c '.[]' > newline.json

 =Extract all the tweets that match the desired format
 grep  'Theory:' newline.json  > theories.json

 = Extract the text and URL of each matching tweet. This is where we lose data because some tweets contain newlines (verify by comparing the number of tweets in both files) or because some tweets are retweets.
 cat theories.json | jq -r '.[].full_text' > theories.txt
 cat theories.json | jq -r '.[].id_str, .[].full_text' > theories.txt
 cat theories.json | jq -r '.[].id_str, .[].full_text, "\n"'
 cat theories.json | jq -r '"https://twitter.com/ade_oshineye/status/\(.[].id_str)", .[].full_text, "\n"' > theories.txt
 cat theories.json | jq -r '.[].full_text, "https://twitter.com/ade_oshineye/status/\(.[].id_str)", "\n"'  > theories.txt
	=Convert javascript data into JSON
	sed -e 's/window.YTD.tweet.part0 = //' ./data/tweet.js > ./data/tweet.json

	=Convert comma-delimited JSON into newline-delimited JSON
	cat data/tweet.json \| jq -c '.[]' > newline.json

	=Extract all the tweets that match the desired format
	grep 'Theory:' newline.json > theories.json

	= Extract the text and URL of each matching tweet. This is where we lose data because some tweets contain newlines (verify by comparing the number of tweets in both files) or because some tweets are retweets.
	cat theories.json \| jq -r '.[].full_text' > theories.txt
	cat theories.json \| jq -r '.[].id_str, .[].full_text' > theories.txt
	cat theories.json \| jq -r '.[].id_str, .[].full_text, "\n"'
	cat theories.json \| jq -r '"https://twitter.com/ade_oshineye/status/\(.[].id_str)", .[].full_text, "\n"' > theories.txt
	cat theories.json \| jq -r '.[].full_text, "https://twitter.com/ade_oshineye/status/\(.[].id_str)", "\n"' > theories.txt