Created
February 11, 2023 05:48
-
-
Save iam-mhaseeb/e480ee4edaabc1413284882c0035963b to your computer and use it in GitHub Desktop.
The simplest resume parser in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import json | |
# define a function to parse the resume | |
def parse_resume(resume_text): | |
# split the text into lines | |
lines = resume_text.split('\n') | |
# initialize variables to store the parsed information | |
name = '' | |
email = '' | |
phone = '' | |
education = [] | |
experience = [] | |
# loop through each line in the resume | |
for line in lines: | |
line = line.strip() | |
# extract the name | |
if not name: | |
name_match = re.search(r'^([\w\s]+)\s+[\w\s]+$', line) | |
if name_match: | |
name = name_match.group(1) | |
# extract the email address | |
if not email: | |
email_match = re.search(r'[\w\.-]+@[\w\.-]+', line) | |
if email_match: | |
email = email_match.group(0) | |
# extract the phone number | |
if not phone: | |
phone_match = re.search(r'\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}', line) | |
if phone_match: | |
phone = phone_match.group(0) | |
# extract the education | |
education_match = re.search(r'^Education.*$', line) | |
if education_match: | |
while True: | |
line = next(lines) | |
education_info = re.search(r'^([\w\s]+),\s([\w\s]+),\s(\d{4})$', line) | |
if education_info: | |
education.append({ | |
'degree': education_info.group(1), | |
'field_of_study': education_info.group(2), | |
'graduation_year': education_info.group(3) | |
}) | |
else: | |
break | |
# extract the experience | |
experience_match = re.search(r'^Experience.*$', line) | |
if experience_match: | |
while True: | |
line = next(lines) | |
experience_info = re.search(r'^([\w\s]+),\s([\w\s]+),\s(\d{4})-(\d{4})$', line) | |
if experience_info: | |
experience.append({ | |
'position': experience_info.group(1), | |
'company': experience_info.group(2), | |
'start_year': experience_info.group(3), | |
'end_year': experience_info.group(4) | |
}) | |
else: | |
break | |
# store the parsed information in a dictionary | |
parsed_resume = { | |
'name': name, | |
'email': email, | |
'phone': phone, | |
'education': education, | |
'experience': experience | |
} | |
# return the parsed information as a JSON string | |
return json.dumps(parsed_resume, indent=4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment