PKartavkin · January 8, 2021 10:03
diff --git a/ProxyScrapper.py b/ProxyScrapper.py
 """
 This script retrieves all proxy addresses from https://free-proxy-list.net (all pages).
 Proxy list can be used for ip rotation to get around bot protection.
 """

 from selenium import webdriver
 from bs4 import BeautifulSoup


 class Proxy:

    def __init__(self, ip, code, type, https):
        self.ip = ip
        self.code = code
        self.type = type
        self.https = https

 class ProxyScrapper:

    def init(self):
        chromeOptions = webdriver.ChromeOptions()
        prefs = {'profile.managed_default_content_settings.images': 2, 'disk-cache-size': 4096}
        chromeOptions.add_experimental_option('prefs', prefs)
        chromeOptions.add_argument("--headless")
        self.driver = webdriver.Chrome(executable_path='drivers/chromedriver', options=chromeOptions)
        self.driver.implicitly_wait(10)

    def tear_down(self):
        self.driver.close()

    def scrape_proxies(self):
        home_url = 'https://free-proxy-list.net'
        self.driver.get(home_url)
        proxy_list = []
        is_next_disabled = 'disabled' in self.driver.find_element_by_css_selector('#proxylisttable_next').get_attribute(
            "class")

        while not is_next_disabled:
            soup = BeautifulSoup(self.driver.page_source, 'html.parser')
            table = soup.findAll('tr', {'class': 'odd'}) + soup.findAll('tr', {'class': 'even'})
            for row in table:
                data = row.findAll('td')
                proxy_list.append(Proxy(data[0].string + ':' + data[1].string, data[2], data[4], data[6]))

            next_btn = self.driver.find_element_by_css_selector('#proxylisttable_next>a')
            next_btn.click()
            is_next_disabled = 'disabled' in self.driver.find_element_by_css_selector('#proxylisttable_next').get_attribute(
                "class")

        return proxy_list

    def get_proxy_list(self):
        self.init()
        result = []
        try:
            result = self.scrape_proxies()
        finally:
            self.tear_down()
        return result


 proxy_scrapper = ProxyScrapper()
 for proxy in proxy_scrapper.get_proxy_list():
    print(proxy.ip)
	"""
	This script retrieves all proxy addresses from https://free-proxy-list.net (all pages).
	Proxy list can be used for ip rotation to get around bot protection.
	"""

	from selenium import webdriver
	from bs4 import BeautifulSoup


	class Proxy:

	def __init__(self, ip, code, type, https):
	self.ip = ip
	self.code = code
	self.type = type
	self.https = https

	class ProxyScrapper:

	def init(self):
	chromeOptions = webdriver.ChromeOptions()
	prefs = {'profile.managed_default_content_settings.images': 2, 'disk-cache-size': 4096}
	chromeOptions.add_experimental_option('prefs', prefs)
	chromeOptions.add_argument("--headless")
	self.driver = webdriver.Chrome(executable_path='drivers/chromedriver', options=chromeOptions)
	self.driver.implicitly_wait(10)

	def tear_down(self):
	self.driver.close()

	def scrape_proxies(self):
	home_url = 'https://free-proxy-list.net'
	self.driver.get(home_url)
	proxy_list = []
	is_next_disabled = 'disabled' in self.driver.find_element_by_css_selector('#proxylisttable_next').get_attribute(
	"class")

	while not is_next_disabled:
	soup = BeautifulSoup(self.driver.page_source, 'html.parser')
	table = soup.findAll('tr', {'class': 'odd'}) + soup.findAll('tr', {'class': 'even'})
	for row in table:
	data = row.findAll('td')
	proxy_list.append(Proxy(data[0].string + ':' + data[1].string, data[2], data[4], data[6]))

	next_btn = self.driver.find_element_by_css_selector('#proxylisttable_next>a')
	next_btn.click()
	is_next_disabled = 'disabled' in self.driver.find_element_by_css_selector('#proxylisttable_next').get_attribute(
	"class")

	return proxy_list

	def get_proxy_list(self):
	self.init()
	result = []
	try:
	result = self.scrape_proxies()
	finally:
	self.tear_down()
	return result


	proxy_scrapper = ProxyScrapper()
	for proxy in proxy_scrapper.get_proxy_list():
	print(proxy.ip)