Last active
May 29, 2025 16:28
-
-
Save highoncarbs/49e3e5f56fb80bec03fbaf6ce4fe4048 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import re | |
# Load CSV file | |
df = pd.read_csv('input.csv') | |
# Remove rows where phone is empty | |
df = df[df['phone'].notna() & (df['phone'].astype(str).str.strip() != '')] | |
# Function to format and validate Indian phone numbers | |
def format_indian_number(number): | |
number = re.sub(r'\D', '', str(number)) # Remove non-digit characters | |
if number.startswith('0') and len(number) == 11: | |
number = number[1:] | |
if len(number) == 10: | |
return '91' + number | |
elif len(number) == 12 and number.startswith('91'): | |
return number | |
else: | |
return None | |
# Function to format names | |
def format_name(name): | |
name = re.sub(r'[^\w\s]', '', str(name)) # Remove punctuation | |
name_parts = name.split()[:3] # Keep only first 3 words | |
return ' '.join(name_parts) | |
# Apply formatting and validation | |
df['phone'] = df['phone'].apply(format_indian_number) | |
df['name'] = df['name'].apply(format_name) | |
# Remove rows with invalid phone numbers | |
df = df[df['phone'].notna()] | |
# Keep only 'name' and 'phone' columns | |
df_final = df[['name', 'phone']] | |
# Save to new CSV file | |
df_final.to_csv('output.csv', index=False) | |
print('Filtered and formatted CSV saved as output.csv.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment