Created
May 3, 2016 19:20
-
-
Save QiZ213/5b03b41f15c019f00e322dca026bae96 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
how = pd.read_csv('C:\Users\Qi\Downloads\howkeywordgt2000.csv') | |
na = pd.read_csv('C:\Users\Qi\Desktop\k2015.csv') | |
from selenium import webdriver | |
import time | |
driver = webdriver.Chrome(executable_path=r"C:\Python27\Scripts\chromedriver.exe") | |
driver.set_window_size(4,3) | |
url="https://www.semrush.com/info/how%20to%20boil%20eggs+(source)" | |
driver.get(url) | |
from selenium.common.exceptions import NoSuchElementException | |
def check_exist(): | |
try: | |
web = driver.find_element_by_css_selector('#rso > div.g.mnr-c.g-blk') | |
except NoSuchElementException: | |
return False | |
return True | |
def check_available(): | |
try: | |
web = driver.find_element_by_css_selector('#rso > div.srg') | |
except NoSuchElementException: | |
return False | |
return True | |
def appendurl(URL): | |
try: | |
ele = driver.find_element_by_css_selector('#rso > div.g.mnr-c.g-blk > div.kp-blk._Z7._Rqb._RJe > div > div._OKe > ol > div.g > div > h3 > a') | |
href = ele.get_attribute('href') | |
except NoSuchElementException: | |
return fURL.append("NA") | |
return URL.append(href) | |
# 201504 | |
def check_exist(): | |
try: | |
web = driver.find_element_by_css_selector('#rso > li') | |
except NoSuchElementException: | |
return False | |
return True | |
def check_available(): | |
try: | |
web = driver.find_element_by_css_selector('#rso > div.srg') | |
except NoSuchElementException: | |
return False | |
return True | |
def appendurl(URL): | |
try: | |
ele = driver.find_element_by_css_selector('#rso > li.g.mnr-c.g-blk > div.kp-blk._Z7._Rqb._RJe > div > div._OKe > ol > li.g > div > h3 > a') | |
href = ele.get_attribute('href') | |
except NoSuchElementException: | |
return fURL.append("NA") | |
return URL.append(href) | |
#201510 | |
def check_exist(): | |
try: | |
web = driver.find_element_by_css_selector('#rso > li') | |
except NoSuchElementException: | |
return False | |
return True | |
def check_available(): | |
try: | |
web = driver.find_element_by_css_selector('#rso > div.srg') | |
except NoSuchElementException: | |
return False | |
return True | |
def appendurl(URL): | |
try: | |
ele = driver.find_element_by_css_selector('#rso > li > div.kp-blk._Z7._Rqb._RJe > div > div._OKe > ol > div.g.featured-snippet > div > h3 > a') | |
href = ele.get_attribute('href') | |
except NoSuchElementException: | |
return fURL.append("NA") | |
return URL.append(href) | |
feature = [] | |
fURL = [] | |
for row in na.keyword: | |
url = "https://www.semrush.com/201504/info/"+row+"+(source)" | |
driver.get(url) | |
time.sleep(5) | |
if check_exist(): | |
feature.append(True) | |
appendurl(fURL) | |
print True | |
elif check_available(): | |
feature.append(False) | |
fURL.append("NA") | |
print False | |
else: | |
feature.append("NA") | |
fURL.append("NA") | |
print 'NA' | |
na['feature']=feature | |
na['url']=fURL | |
na2 = pd.DataFrame(na) | |
na2.to_csv('example.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment