rama100 · May 10, 2017 02:54
diff --git a/clods.txt b/clods.txt
 +----------------------------------------------------------------------------------------------------------------------------------------+
 |                          Task                                     |                  One-Liner                                        
 +----------------------------------------------------------------------------------------------------------------------------------------+
 | Count the number of lines in a file                               | wc -l filename                                                     |
 | ----------------------------------------------------------------------------------------------------------------------------------------
 | Show the field names, one in each line, preceded by line numbers  | head -1 filename | tr ‘\t’ ‘\n’ | nl    (tab-delimited file)       |
 | (helpful when you have numerous fields in a new file and want to  | head -1 filename | tr ‘,’ ‘\n’ | nl     (comma-delimited file)     |
 | get the lay of the land)                                          | head -1 filename | tr ‘ ’ ‘\n’ | nl     (space-delimited file)     |
 | ----------------------------------------------------------------------------------------------------------------------------------------
 | Page through the file with line numbers showing                   | less filename | nl                                                 |
 | ----------------------------------------------------------------------------------------------------------------------------------------                                                                                                
 | Show the first line                                               | head -1 filename                                                                                                           
 | Show the first few lines                                          | head filename                                                                                                              
 | Show the last few lines                                           | tail filename                                                                                                              
 | ----------------------------------------------------------------------------------------------------------------------------------------
 | Show line #4212                                                   | sed ‘4212q;d’ filename
 | (very useful when you are trying to load the file into a 		| (this will quit after printing the 4212nd line;
 | database and the load fails at line #4212)                        | very considerate if your file has a million lines!)                                                                                               
 | ----------------------------------------------------------------------------------------------------------------------------------------
 | Show lines with “foo” in any field                                | grep ’foo’ filename                                                                                                        
 | Show lines with “foo” in any field, ignoring foo’s case           | grep -i ‘foo’ filename                                                                                                     
 | ----------------------------------------------------------------------------------------------------------------------------------------
 | show lines with ‘foo’ in field #18                                | awk -F\t ‘$18 == “foo” ’ filename  (tab-delimited file) 
 |                                                                   | awk -F, ‘$18 == “foo” ’ filename  (comma-delimited file)   
 |                                                                   | awk  ‘$18 == “foo” ’ filename  (space-delimited file)
 | ---------------------------------------------------------------------------------------------------------------------------------------
 | Select every 10th row and save into a new file                    | awk ‘NR%10’ filename > newfile
 | (great for sampling and train-test splitting)
 | ---------------------------------------------------------------------------------------------------------------------------------------
 | Show rows that have fewer fields than the header row              | awk 'NR==1 {x=NF}; NF <  x’ filename
 | (to check if any rows are incomplete)
 | ----------------------------------------------------------------------------------------------------------------------------------------
 | Remove lines with “foo” in any field and save into a new file     | sed ’/foo/d’ filename > newfile                                                                                            
 | Remove lines with ‘foo’ in field #18 and save into a new file     | awk -F\t ‘$18 != “foo” ’ filename > newfile  (tab-delimited file)
 |                                                                   | awk -F, ‘$18 != “foo” ’ filename > newfile  (comma-delimited file)  
 |                                                                   | awk  ‘$18 != “foo” ’ filename > newfile     (space-delimited file)
 | ----------------------------------------------------------------------------------------------------------------------------------------
 | Remove the first line and save into a new file                    | sed ‘1d’ filename > newfile
 | (great for stripping a header row before further processing)
 | Remove the first 8 lines and save the rest into a new file        | sed ‘1,8d' filename > newfile
 | Remove line #42 and save the rest into a new file                 | sed ‘42d’ filename > newfile
 | Remove lines 233 to 718 and save the rest into a new file         | sed ‘233,718d’ filename > newfile
 | Remove the last line and save the rest into a new file            | sed '$d’ filename > newfile
 | Remove the last 8 lines and save the rest into a new file         | sed -e :a -e '$d;N;2,8ba' -e 'P;D’  filename > newfile
 |                                                                   | (Ugh! Let me know if you know of a better way)
 | ----------------------------------------------------------------------------------------------------------------------------------------
 | Remove blank lines from the file and save into a new file         | sed '/^$/d’ filename > newfile                                                                                             
 | ----------------------------------------------------------------------------------------------------------------------------------------
 | Remove duplicate lines and save into a new file                   | awk '!seen[$0]++’ filename > newfile
 |                                                                   | (if you want the original order preserved)
 |                                                                   | sort -u filename > newfile 
 |                                                                   | (if you don’t need the original order preserved)
 | ---------------------------------------------------------------------------------------------------------------------------------------
 | Remove lines with a missing value in field #18 and                | awk -F\t ‘!$18’ filename > newfile  (tab-delimited file)
 | save into a new file                                              | awk -F, ‘!$18’ filename > newfile  (comma-delimited file)
 |                                                                   | awk  ‘!$18’ filename > newfile  (space-delimited file)
 | ---------------------------------------------------------------------------------------------------------------------------------------
 | Show just col #42                                                 | cut -f42 filename  (tab-delimited file)
 |                                                                   | cut -d, -f42 filename (comma-delimited file)
 |                                                                   | cut -d’ ' -f42 filename (space-delimited file)
 | ----------------------------------------------------------------------------------------------------------------------------------------
 | Show the unique values in column #42 with counts                  | cut -f42 filename | sort | uniq -c  (tab-delimited file)
 | (a histogram, sorta. Useful for listing the distinct              | cut -d, -f42 filename | sort | uniq -c (comma-delimited file)
 | values in a categorical field)                                    | cut -d’ ' -f42 filename | sort | uniq -c (space-delimited file)
 | -----------------------------------------------------------------------------------------------------------------------------------------
 | Remove the 1st field and save into a new file                     | cut -f2- filename > newfile                           
 | Remove field #42 and save the rest into a new file                | cut -f1-41,43- filename > newfile
 | Remove fields #19-42 and save the rest into a new file            | cut -f1-18,43- filename > newfile
 | (the above 3 examples assume that file is tab-delimited.          |
 | For comma and space delimited files, modify as shown in           |
 | earlier examples involving 'cut)                                  |
 | ----------------------------------------------------------------------------------------------------------------------------------------
 | Stack files row-wise                                              | cat file1 file2 > newfile 
 | (useful if you have two or more files with the same columns       |
 | and you need to stack them vertically)                            |   
 | ----------------------------------------------------------------------------------------------------------------------------------------
 | Stack files column-wise                                           | paste file1 file2  > newfile  
 | (useful if you have two or more files with the same rows          | 
 | and you need to you need to combine them side-by-side)            |     
 | ----------------------------------------------------------------------------------------------------------------------------------------
 | Randomly shuffle the rows of a file and save to a new file        | awk 'BEGIN{srand();}{print rand()"\t"$0}’ filename | 
 |                                                                   |                                     sort -k1 -n | cut -f2- > newfile
 | --------------------+-------------------------------------------------------------------------------------------------------------------+
	+----------------------------------------------------------------------------------------------------------------------------------------+
	\| Task \| One-Liner
	+----------------------------------------------------------------------------------------------------------------------------------------+
	\| Count the number of lines in a file \| wc -l filename \|
	\| ----------------------------------------------------------------------------------------------------------------------------------------
	\| Show the field names, one in each line, preceded by line numbers \| head -1 filename \| tr ‘\t’ ‘\n’ \| nl (tab-delimited file) \|
	\| (helpful when you have numerous fields in a new file and want to \| head -1 filename \| tr ‘,’ ‘\n’ \| nl (comma-delimited file) \|
	\| get the lay of the land) \| head -1 filename \| tr ‘ ’ ‘\n’ \| nl (space-delimited file) \|
	\| ----------------------------------------------------------------------------------------------------------------------------------------
	\| Page through the file with line numbers showing \| less filename \| nl \|
	\| ----------------------------------------------------------------------------------------------------------------------------------------
	\| Show the first line \| head -1 filename
	\| Show the first few lines \| head filename
	\| Show the last few lines \| tail filename
	\| ----------------------------------------------------------------------------------------------------------------------------------------
	\| Show line #4212 \| sed ‘4212q;d’ filename
	\| (very useful when you are trying to load the file into a \| (this will quit after printing the 4212nd line;
	\| database and the load fails at line #4212) \| very considerate if your file has a million lines!)
	\| ----------------------------------------------------------------------------------------------------------------------------------------
	\| Show lines with “foo” in any field \| grep ’foo’ filename
	\| Show lines with “foo” in any field, ignoring foo’s case \| grep -i ‘foo’ filename
	\| ----------------------------------------------------------------------------------------------------------------------------------------
	\| show lines with ‘foo’ in field #18 \| awk -F\t ‘$18 == “foo” ’ filename (tab-delimited file)
	\| \| awk -F, ‘$18 == “foo” ’ filename (comma-delimited file)
	\| \| awk ‘$18 == “foo” ’ filename (space-delimited file)
	\| ---------------------------------------------------------------------------------------------------------------------------------------
	\| Select every 10th row and save into a new file \| awk ‘NR%10’ filename > newfile
	\| (great for sampling and train-test splitting)
	\| ---------------------------------------------------------------------------------------------------------------------------------------
	\| Show rows that have fewer fields than the header row \| awk 'NR==1 {x=NF}; NF < x’ filename
	\| (to check if any rows are incomplete)
	\| ----------------------------------------------------------------------------------------------------------------------------------------
	\| Remove lines with “foo” in any field and save into a new file \| sed ’/foo/d’ filename > newfile
	\| Remove lines with ‘foo’ in field #18 and save into a new file \| awk -F\t ‘$18 != “foo” ’ filename > newfile (tab-delimited file)
	\| \| awk -F, ‘$18 != “foo” ’ filename > newfile (comma-delimited file)
	\| \| awk ‘$18 != “foo” ’ filename > newfile (space-delimited file)
	\| ----------------------------------------------------------------------------------------------------------------------------------------
	\| Remove the first line and save into a new file \| sed ‘1d’ filename > newfile
	\| (great for stripping a header row before further processing)
	\| Remove the first 8 lines and save the rest into a new file \| sed ‘1,8d' filename > newfile
	\| Remove line #42 and save the rest into a new file \| sed ‘42d’ filename > newfile
	\| Remove lines 233 to 718 and save the rest into a new file \| sed ‘233,718d’ filename > newfile
	\| Remove the last line and save the rest into a new file \| sed '$d’ filename > newfile
	\| Remove the last 8 lines and save the rest into a new file \| sed -e :a -e '$d;N;2,8ba' -e 'P;D’ filename > newfile
	\| \| (Ugh! Let me know if you know of a better way)
	\| ----------------------------------------------------------------------------------------------------------------------------------------
	\| Remove blank lines from the file and save into a new file \| sed '/^$/d’ filename > newfile
	\| ----------------------------------------------------------------------------------------------------------------------------------------
	\| Remove duplicate lines and save into a new file \| awk '!seen[$0]++’ filename > newfile
	\| \| (if you want the original order preserved)
	\| \| sort -u filename > newfile
	\| \| (if you don’t need the original order preserved)
	\| ---------------------------------------------------------------------------------------------------------------------------------------
	\| Remove lines with a missing value in field #18 and \| awk -F\t ‘!$18’ filename > newfile (tab-delimited file)
	\| save into a new file \| awk -F, ‘!$18’ filename > newfile (comma-delimited file)
	\| \| awk ‘!$18’ filename > newfile (space-delimited file)
	\| ---------------------------------------------------------------------------------------------------------------------------------------
	\| Show just col #42 \| cut -f42 filename (tab-delimited file)
	\| \| cut -d, -f42 filename (comma-delimited file)
	\| \| cut -d’ ' -f42 filename (space-delimited file)
	\| ----------------------------------------------------------------------------------------------------------------------------------------
	\| Show the unique values in column #42 with counts \| cut -f42 filename \| sort \| uniq -c (tab-delimited file)
	\| (a histogram, sorta. Useful for listing the distinct \| cut -d, -f42 filename \| sort \| uniq -c (comma-delimited file)
	\| values in a categorical field) \| cut -d’ ' -f42 filename \| sort \| uniq -c (space-delimited file)
	\| -----------------------------------------------------------------------------------------------------------------------------------------
	\| Remove the 1st field and save into a new file \| cut -f2- filename > newfile
	\| Remove field #42 and save the rest into a new file \| cut -f1-41,43- filename > newfile
	\| Remove fields #19-42 and save the rest into a new file \| cut -f1-18,43- filename > newfile
	\| (the above 3 examples assume that file is tab-delimited. \|
	\| For comma and space delimited files, modify as shown in \|
	\| earlier examples involving 'cut) \|
	\| ----------------------------------------------------------------------------------------------------------------------------------------
	\| Stack files row-wise \| cat file1 file2 > newfile
	\| (useful if you have two or more files with the same columns \|
	\| and you need to stack them vertically) \|
	\| ----------------------------------------------------------------------------------------------------------------------------------------
	\| Stack files column-wise \| paste file1 file2 > newfile
	\| (useful if you have two or more files with the same rows \|
	\| and you need to you need to combine them side-by-side) \|
	\| ----------------------------------------------------------------------------------------------------------------------------------------
	\| Randomly shuffle the rows of a file and save to a new file \| awk 'BEGIN{srand();}{print rand()"\t"$0}’ filename \|
	\| \| sort -k1 -n \| cut -f2- > newfile
	\| --------------------+-------------------------------------------------------------------------------------------------------------------+