Harshdeep Singh HarshSingh16

Data Science Professional. Add me here : https://www.linkedin.com/in/harshsingh12/

HarshSingh16 / feature.py

Created February 25, 2019 06:48

	#Creating feature columns from our categorical data

	education1=tf.feature_column.categorical_column_with_hash_bucket("education",hash_bucket_size=16)
	workclass1=tf.feature_column.categorical_column_with_hash_bucket("workclass",hash_bucket_size=10)
	martial1=tf.feature_column.categorical_column_with_hash_bucket("marital_status",hash_bucket_size=7)
	occupation1=tf.feature_column.categorical_column_with_hash_bucket("occupation",hash_bucket_size=14)
	relationship1=tf.feature_column.categorical_column_with_hash_bucket("relationship",hash_bucket_size=6)
	race1=tf.feature_column.categorical_column_with_hash_bucket("race",hash_bucket_size=5)
	gender1=tf.feature_column.categorical_column_with_hash_bucket("gender",hash_bucket_size=2)
	native_country1=tf.feature_column.categorical_column_with_hash_bucket("native_country",hash_bucket_size=60)

HarshSingh16 / CleaningData.py

Created February 14, 2019 09:58

	# Doing a first cleaning of the texts
	def clean_text(text):
	text = text.lower()
	text = re.sub(r"i'm", "i am", text)
	text = re.sub(r"he's", "he is", text)
	text = re.sub(r"she's", "she is", text)
	text = re.sub(r"that's", "that is", text)
	text = re.sub(r"what's", "what is", text)
	text = re.sub(r"where's", "where is", text)
	text = re.sub(r"\'ll", " will", text)

HarshSingh16 / Conversations and QuestionsAnswers.py

Created February 14, 2019 09:26

	# Creating a list of all of the conversations
	conversations_ids = []
	for conversation in conversations[:-1]:
	_conversation = conversation.split(' +++$+++ ')[-1][1:-1].replace("'", "").replace(" ", "")
	conversations_ids.append(_conversation.split(','))

	# Getting separately the questions and the answers
	questions = []
	answers = []
	for conversation in conversations_ids:

HarshSingh16 / SortingQuestionAnswers.py

Created February 8, 2019 23:16

	#Sorting clean questions and answers by questions:
	sorted_clean_questions=[]
	sorted_clean_answers=[]
	for i in range(1,25):
	for question in enumerate(questions_int_sequence):
	if len(question[1])==i:
	sorted_clean_questions.append(questions_int_sequence[question[0]])
	sorted_clean_answers.append(answers_int_sequence[question[0]])

HarshSingh16 / Seq_integers.py

Created February 8, 2019 21:24

	#Conveting questions and answers into sequence of integers
	questions_int_sequence=[]
	for question in Clean_questions:
	int=[]
	for word in question.split():
	if word not in dict_word2integer:
	int.append("<OUT>")
	else:
	int.append(dict_word2integer[word])
	questions_int_sequence.append(int)

HarshSingh16 / AddingEOS.py

Created February 8, 2019 20:31

	#Adding EOS at end of every answer
	new_clean_answers=[]
	for answers in Clean_answers:
	new_clean_answers.append(answers+" <EOS>")

HarshSingh16 / InverseDict.py

Created February 8, 2019 19:36

	#Inverse Mapped Dictionary
	dict_integer2word={i:w for w,i in dict_word2integer.items()}

HarshSingh16 / InverseDict.py

Created February 8, 2019 19:35

	#Inverse Mapped Dictionary
	dict_integer2word={i:w for w,i in dict_word2integer.items()}

HarshSingh16 / word2integermapping.py

Created February 8, 2019 19:01

	#SETTING A THRESHOLD AND MAPPING EACH WORD TO A UNIQUE INTEGER

	threshold=20
	word_number=0
	dict_word2integer={}
	for word,frequency in word2count.items():
	if frequency>20:
	dict_word2integer[word]=word_number
	word_number+=1

HarshSingh16 / Tokens.py

Created February 8, 2019 18:55

	#Adding tokens to our dictionary
	Tokens=["<PAD>","<SOS>","<EOS>","<OUT>"]
	for token in Tokens:
	dict_integer[token]=len(dict_integer)+1