Skip to content

Instantly share code, notes, and snippets.

@2018kguo
Created April 18, 2021 19:23
Show Gist options
  • Save 2018kguo/a77b143fd87955eb27d9ba09ef9df01e to your computer and use it in GitHub Desktop.
Save 2018kguo/a77b143fd87955eb27d9ba09ef9df01e to your computer and use it in GitHub Desktop.
web scraper aiohttp example
async def fetchHTML(self, url: str, session: ClientSession) -> str:
resp = await session.request(method="GET", url=url)
html = await resp.text()
if(len(html) == 0):
print("Hit rate limit for LinkedIn requests")
return html
async def gatherTopLevelSearchForLocations(self) -> List[str]:
htmlList = []
async with aiohttp.ClientSession() as session:
for location in self.locations:
requestStr = "https://www.linkedin.com/jobs/search?keywords={}&location={}".format(self.searchTerms[0], location)
html = await self.fetchHTML(requestStr, session)
htmlList.append(html)
return htmlList
def getTopLevelSearch(self) -> List[str]:
#doing this instead of asyncio.run because of this issue: https://github.com/aio-libs/aiohttp/issues/4324
return asyncio.get_event_loop().run_until_complete(self.gatherTopLevelSearchForLocations())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment