QuantumCalzone · April 4, 2018 18:17
diff --git a/ScrapeSavedFacebookLinksAndExportToTxt.py b/ScrapeSavedFacebookLinksAndExportToTxt.py
 from selenium import webdriver
 from selenium.webdriver.common.keys import Keys
 from selenium.common.exceptions import TimeoutException
 from selenium.webdriver.common.by import By
 import selenium.webdriver.support.ui as ui
 import selenium.webdriver.support.expected_conditions as EC
 import os
 import time
 import sys
 import bs4
 from urllib.parse import unquote

 import unittest, time, re

 from bs4 import BeautifulSoup as soup

 #create the initial csv file
 filename = "SavedFacebookLinks.text"
 
 #f is the normal convention for a File Writer | w stands for "Write"
 f = open(filename, "w")

 print("\n")
 loops = int(input("Enter loops: "))
 print("\n")
 sleepTime = int(input("Enter sleepTime: "))

 debug = "Done!"

 class Sel(unittest.TestCase):

    def setUp(self):

        #set chrome settings
        options = webdriver.ChromeOptions()
        options.add_argument("--ignore-certificate-errors")
        options.add_argument("--ignore-ssl-errors")
        #load real user data
        options.add_argument(r"--user-data-dir=C:\Users\georg_000\AppData\Local\Google\Chrome\User Data\Default")
        #fullscreen
        #options.add_argument("--start-maximized")
        #assign driver location
        chromedriver = "C:\Python27\DownloadedTools\chromedriver_win32\chromedriver.exe"
        os.environ["webdriver.chrome.driver"] = chromedriver
        self.driver = webdriver.Chrome(options=options, executable_path=chromedriver)
        self.verificationErrors = []
        self.accept_next_alert = True

    def test_sel(self):

        driver = self.driver
        driver.get("https://www.facebook.com/saved/")
        #driver.find_element_by_link_text("All").click()
        
        print("\n")
        time.sleep(sleepTime)

        for i in range(1,loops):
            print("loop "+str(i)+" / "+str(loops))
            time.sleep(sleepTime)
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        
        html_source = driver.page_source

        #data = html_source.encode('utf-8')

        #html parsing
        page_soup = soup(html_source, "html.parser")

        savedLinks = page_soup.findAll("div", {"class":"_4bl9 _5yjp"})

        print("\n")
        for savedLink in savedLinks:

            title = savedLink.a

            print(title)

            link = savedLink.a["href"]
            link = unquote(link)
            #clean up the links
            link = link.replace("https://l.facebook.com/l.php?u=", "")
            link = link.replace("https://facebook.com", "")
            if ("/videos/" in link and "." not in link): 
                link = "https://facebook.com"+link

            #clean up the links
            link = re.sub("&h=?.*", "", link)
            link = re.sub("\?(.*)", "", link)

            print(link)
            print("\n")

            f.write(link + "\n")

        print("\n")
        print("savedLinks: "+str(len(savedLinks)))
        f.close()

        print("\n")

        
        print(debug)

 if __name__ == "__main__":
    unittest.main()
	from selenium import webdriver
	from selenium.webdriver.common.keys import Keys
	from selenium.common.exceptions import TimeoutException
	from selenium.webdriver.common.by import By
	import selenium.webdriver.support.ui as ui
	import selenium.webdriver.support.expected_conditions as EC
	import os
	import time
	import sys
	import bs4
	from urllib.parse import unquote

	import unittest, time, re

	from bs4 import BeautifulSoup as soup

	#create the initial csv file
	filename = "SavedFacebookLinks.text"

	#f is the normal convention for a File Writer \| w stands for "Write"
	f = open(filename, "w")

	print("\n")
	loops = int(input("Enter loops: "))
	print("\n")
	sleepTime = int(input("Enter sleepTime: "))

	debug = "Done!"

	class Sel(unittest.TestCase):

	def setUp(self):

	#set chrome settings
	options = webdriver.ChromeOptions()
	options.add_argument("--ignore-certificate-errors")
	options.add_argument("--ignore-ssl-errors")
	#load real user data
	options.add_argument(r"--user-data-dir=C:\Users\georg_000\AppData\Local\Google\Chrome\User Data\Default")
	#fullscreen
	#options.add_argument("--start-maximized")
	#assign driver location
	chromedriver = "C:\Python27\DownloadedTools\chromedriver_win32\chromedriver.exe"
	os.environ["webdriver.chrome.driver"] = chromedriver
	self.driver = webdriver.Chrome(options=options, executable_path=chromedriver)
	self.verificationErrors = []
	self.accept_next_alert = True

	def test_sel(self):

	driver = self.driver
	driver.get("https://www.facebook.com/saved/")
	#driver.find_element_by_link_text("All").click()

	print("\n")
	time.sleep(sleepTime)

	for i in range(1,loops):
	print("loop "+str(i)+" / "+str(loops))
	time.sleep(sleepTime)
	self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

	html_source = driver.page_source

	#data = html_source.encode('utf-8')

	#html parsing
	page_soup = soup(html_source, "html.parser")

	savedLinks = page_soup.findAll("div", {"class":"_4bl9 _5yjp"})

	print("\n")
	for savedLink in savedLinks:

	title = savedLink.a

	print(title)

	link = savedLink.a["href"]
	link = unquote(link)
	#clean up the links
	link = link.replace("https://l.facebook.com/l.php?u=", "")
	link = link.replace("https://facebook.com", "")
	if ("/videos/" in link and "." not in link):
	link = "https://facebook.com"+link

	#clean up the links
	link = re.sub("&h=?.*", "", link)
	link = re.sub("\?(.*)", "", link)

	print(link)
	print("\n")

	f.write(link + "\n")

	print("\n")
	print("savedLinks: "+str(len(savedLinks)))
	f.close()

	print("\n")


	print(debug)

	if __name__ == "__main__":
	unittest.main()