Created
September 18, 2024 14:29
-
-
Save ronaldbradford/102aa689789ef98a548f64b2ea7b0725 to your computer and use it in GitHub Desktop.
Diff two csv files with two columns of name/value pairs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import sys | |
# Function to check if a value is numeric | |
def is_numeric(value): | |
try: | |
int(value) | |
return True | |
except ValueError: | |
return False | |
# Load CSV data into a dictionary, ignoring non-numeric values | |
def load_csv(file_name): | |
data = {} | |
with open(file_name, 'r') as f: | |
reader = csv.reader(f) | |
next(reader) # Skip the header if it exists | |
for row in reader: | |
name = row[0] | |
value = row[1] | |
if is_numeric(value): | |
data[name] = int(value) | |
return data | |
# Compare the two datasets and output differences to stdout | |
def compare_csv(file1, file2): | |
data1 = load_csv(file1) | |
data2 = load_csv(file2) | |
print(f"{'Name':<10}\t{'Value1':<10}\t{'Value2':<10}\t{'Difference':<10}") | |
# Iterate through the names and calculate differences | |
for name in data1: | |
if name in data2: | |
value1 = data1[name] | |
value2 = data2[name] | |
if value1 != value2: # Only output rows where there is a difference | |
diff = value2 - value1 | |
print(f"{name:<10}\t{value1:<10}\t{value2:<10}\t{diff:<10}") | |
if __name__ == "__main__": | |
if len(sys.argv) != 3: | |
printf("Usage: python {sys.argv[0]} <file1.csv> <file2.csv>") | |
sys.exit(1) | |
file1 = sys.argv[1] # First CSV file path | |
file2 = sys.argv[2] # Second CSV file path | |
compare_csv(file1, file2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment