Created
March 9, 2018 06:10
-
-
Save samba/e7d341242df3f2b404f296c65ac09c52 to your computer and use it in GitHub Desktop.
Extract a subset of named fields from a CSV file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Loads a CSV file, assuming it has header names, and prints only the specified columns. | |
# ... like selecting two columns of a 7-column table. | |
# | |
# Arguments: | |
# - field names, comma-separated | |
# - input filename | |
# | |
# Output: | |
# - the header row for the specified columns, and those fields for all input records. | |
# | |
# Usage: | |
# python csvextract.py "fieldName1,fieldName2" ./source.csv > output.csv | |
# | |
# | |
# | |
import sys | |
import csv | |
import fileinput | |
def parse_field_expression(expr): | |
return expr.split(',') | |
def perform_extract(field_expr, filename): | |
_input = fileinput.input(filename) | |
reader = csv.DictReader(_input) | |
expression = parse_field_expression(field_expr) | |
for row in reader: | |
yield dict((e, row[e]) for e in expression) | |
def main(args): | |
writer = None | |
fieldnames = None | |
for record in perform_extract(args[0], args[1]): | |
if writer is None: | |
fieldnames = record.keys() | |
writer = csv.DictWriter(sys.stdout, | |
fieldnames=fieldnames) | |
try: | |
writer.writerow(record) | |
except ValueError, e: | |
print >>sys.stderr, repr(record) | |
raise e | |
if __name__ == '__main__': | |
main(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment