''' we're solving fizzbuzz with a little help from the web ''' from html.parser import HTMLParser from urllib.request import Request, urlopen import re import time base_url = 'https://www.answers.com/Q/' tag_name = 'div' class_name = 'answer-body' delimiter = '_' class SearchParser(HTMLParser): ''' Parses search results from the web ''' def __init__(self): HTMLParser.__init__(self) # track if the data we're encountering is part of an answer. we have # to do this because there can be nested tags inside the answer tag self.in_answer = False # keep track of nested tags self.tag_stack = [] # a list of all the answers found self.answers = [] # words that indicate whether the answer is yes or no self.positive_signs = [ 'yes', 'exactly', 'is divisible', ] self.negative_signs = [ 'no', 'not', 'decimal', ] def handle_starttag(self, tag, attrs): ''' identify the tag that we expect to contain the answer ''' if tag == tag_name: for attr in attrs: if attr[0] == 'class' and class_name in attr[1]: self.in_answer = True self.answers.append('') break if self.in_answer: self.tag_stack.append(tag) def handle_endtag(self, tag): ''' behavior on encountering a close tag ''' self.tag_stack = self.tag_stack[:-1] # if the stack is empty, we're done with that answer if self.in_answer and self.tag_stack == []: self.in_answer = False def handle_data(self, data): ''' extract the answer, if we're in an answer tag ''' if self.in_answer: self.answers[-1] += data def conclude_yes_no(self): ''' looks through all the answer to conclude either true or false ''' results = 0 for answer in self.answers: answer = answer.lower().strip() results += self.score_answer(answer) return results > 0 def score_answer(self, answer): ''' evaluates an answer to determine if it's a yes or a no ''' score = 0 positive_signs = re.findall( r'\b%s\b' % r'\b|\b'.join(self.positive_signs), answer, ) score += len(positive_signs) negative_signs = re.findall( r'\b%s\b' % r'\b|\b'.join(self.negative_signs), answer ) score -= len(negative_signs) return score def query_search_engine(numerator, denominator): ''' ask a search engine if these numbers are divisible ''' question = 'is %d divisible by %d' % (numerator, denominator) question = question.replace(' ', delimiter) url = '%s%s' % (base_url, question) request = Request(url) request.add_header( 'User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:72.0) ' \ 'Gecko/20100101 Firefox/72.0') response = urlopen(request) # politeness -- wait a few seconds before the next query time.sleep(3) if url != response.url: return False text = response.read().decode('unicode-escape') parser = SearchParser() parser.feed(text) return parser.conclude_yes_no() if __name__ == '__main__': divisors = [(3, 'fizz'), (5, 'buzz')] for i in range(1, 101): output = '' for d in divisors: if query_search_engine(i, d[0]): output += d[1] output = output or i print(output) # cool