Skip to content

Instantly share code, notes, and snippets.

@highoncarbs
Last active May 29, 2025 16:28
Show Gist options
  • Save highoncarbs/49e3e5f56fb80bec03fbaf6ce4fe4048 to your computer and use it in GitHub Desktop.
Save highoncarbs/49e3e5f56fb80bec03fbaf6ce4fe4048 to your computer and use it in GitHub Desktop.
import pandas as pd
import re
# Load CSV file
df = pd.read_csv('input.csv')
# Remove rows where phone is empty
df = df[df['phone'].notna() & (df['phone'].astype(str).str.strip() != '')]
# Function to format and validate Indian phone numbers
def format_indian_number(number):
number = re.sub(r'\D', '', str(number)) # Remove non-digit characters
if number.startswith('0') and len(number) == 11:
number = number[1:]
if len(number) == 10:
return '91' + number
elif len(number) == 12 and number.startswith('91'):
return number
else:
return None
# Function to format names
def format_name(name):
name = re.sub(r'[^\w\s]', '', str(name)) # Remove punctuation
name_parts = name.split()[:3] # Keep only first 3 words
return ' '.join(name_parts)
# Apply formatting and validation
df['phone'] = df['phone'].apply(format_indian_number)
df['name'] = df['name'].apply(format_name)
# Remove rows with invalid phone numbers
df = df[df['phone'].notna()]
# Keep only 'name' and 'phone' columns
df_final = df[['name', 'phone']]
# Save to new CSV file
df_final.to_csv('output.csv', index=False)
print('Filtered and formatted CSV saved as output.csv.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment