Last active
August 29, 2015 14:21
-
-
Save jBenes/76d1746c7a2642d847af to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import argparse | |
from collections import defaultdict | |
def load_routes(routes_file=None): | |
routes = defaultdict(set) | |
if routes_file: | |
with open(routes_file) as lines: | |
for line in lines: | |
src,dst = line.replace("\r","").replace("\n","").split("\t")[:2] | |
routes[src].add(dst) | |
return routes | |
def replace_row(prepared_rows,src,dst,row_id,reserved_rows,count_replaced): | |
replaced = False | |
while not replaced: | |
if row_id not in reserved_rows: | |
prepared_rows[row_id][143] = src | |
prepared_rows[row_id][146] = dst | |
replaced = True | |
count_replaced += 1 | |
# print row_id, replaced | |
row_id -= 1 | |
return row_id,count_replaced | |
def process_file(source_file, destination_file,routes_file=None): | |
print 'process_file: start source_file %s, destination_file %s'%(source_file, destination_file) | |
routes = load_routes(routes_file) | |
source = open(source_file, "rb") | |
destination = open(destination_file, 'wb') | |
datareader = csv.reader(source,delimiter='^') | |
datawriter = csv.writer(destination,delimiter='^') | |
prepared_rows = [] | |
reserved_rows = set() | |
id = 0 | |
for row in datareader: | |
id += 1 | |
row[1] = id | |
row[152] = 2 | |
row[189] = 'OW' | |
# print row[143], row[146] | |
if row[143] in routes and row[146] in routes[row[143]]: | |
reserved_rows.add(id) | |
routes[row[143]].remove(row[146]) | |
prepared_rows.append(row) | |
# datawriter.writerow(row) | |
# id += 1 | |
# row[1] = id | |
# row[152] = None | |
# row[153] = None | |
# row[189] = 'OW' | |
# datawriter.writerow(row) | |
# print reserved_rows | |
# print routes | |
row_id = id - 1 | |
count_replaced = 0 | |
for src,dsts in routes.iteritems(): | |
for dst in list(dsts): | |
row_id,count_replaced = replace_row(prepared_rows,src,dst,row_id,reserved_rows,count_replaced) | |
for row in prepared_rows: | |
datawriter.writerow(row) | |
print 'done', count_replaced | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--source_file", help="csv file to process", type=str) | |
parser.add_argument("--destination_file", help="where to store this shit", type=str) | |
parser.add_argument("--routes_file", help="prior routes", type=str) | |
args = parser.parse_args() | |
process_file(args.source_file,args.destination_file,args.routes_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment