Skip to content

Instantly share code, notes, and snippets.

@QiZ213
Created May 3, 2016 19:20
Show Gist options
  • Save QiZ213/5b03b41f15c019f00e322dca026bae96 to your computer and use it in GitHub Desktop.
Save QiZ213/5b03b41f15c019f00e322dca026bae96 to your computer and use it in GitHub Desktop.
import pandas as pd
how = pd.read_csv('C:\Users\Qi\Downloads\howkeywordgt2000.csv')
na = pd.read_csv('C:\Users\Qi\Desktop\k2015.csv')
from selenium import webdriver
import time
driver = webdriver.Chrome(executable_path=r"C:\Python27\Scripts\chromedriver.exe")
driver.set_window_size(4,3)
url="https://www.semrush.com/info/how%20to%20boil%20eggs+(source)"
driver.get(url)
from selenium.common.exceptions import NoSuchElementException
def check_exist():
try:
web = driver.find_element_by_css_selector('#rso > div.g.mnr-c.g-blk')
except NoSuchElementException:
return False
return True
def check_available():
try:
web = driver.find_element_by_css_selector('#rso > div.srg')
except NoSuchElementException:
return False
return True
def appendurl(URL):
try:
ele = driver.find_element_by_css_selector('#rso > div.g.mnr-c.g-blk > div.kp-blk._Z7._Rqb._RJe > div > div._OKe > ol > div.g > div > h3 > a')
href = ele.get_attribute('href')
except NoSuchElementException:
return fURL.append("NA")
return URL.append(href)
# 201504
def check_exist():
try:
web = driver.find_element_by_css_selector('#rso > li')
except NoSuchElementException:
return False
return True
def check_available():
try:
web = driver.find_element_by_css_selector('#rso > div.srg')
except NoSuchElementException:
return False
return True
def appendurl(URL):
try:
ele = driver.find_element_by_css_selector('#rso > li.g.mnr-c.g-blk > div.kp-blk._Z7._Rqb._RJe > div > div._OKe > ol > li.g > div > h3 > a')
href = ele.get_attribute('href')
except NoSuchElementException:
return fURL.append("NA")
return URL.append(href)
#201510
def check_exist():
try:
web = driver.find_element_by_css_selector('#rso > li')
except NoSuchElementException:
return False
return True
def check_available():
try:
web = driver.find_element_by_css_selector('#rso > div.srg')
except NoSuchElementException:
return False
return True
def appendurl(URL):
try:
ele = driver.find_element_by_css_selector('#rso > li > div.kp-blk._Z7._Rqb._RJe > div > div._OKe > ol > div.g.featured-snippet > div > h3 > a')
href = ele.get_attribute('href')
except NoSuchElementException:
return fURL.append("NA")
return URL.append(href)
feature = []
fURL = []
for row in na.keyword:
url = "https://www.semrush.com/201504/info/"+row+"+(source)"
driver.get(url)
time.sleep(5)
if check_exist():
feature.append(True)
appendurl(fURL)
print True
elif check_available():
feature.append(False)
fURL.append("NA")
print False
else:
feature.append("NA")
fURL.append("NA")
print 'NA'
na['feature']=feature
na['url']=fURL
na2 = pd.DataFrame(na)
na2.to_csv('example.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment