ProxyRequestsSession
是一个旨在通过代理简化HTTP请求的Python类。它会自动从https://www.sslproxies.org/
获取代理列表,并在每个请求时随机选择一个代理。如果某个代理失败,它会重试,直至达到最大尝试次数限制。
特性
- 自动获取并轮换免费代理列表中的代理。
- 支持GET和POST请求。
- 支持日志记录,以追踪代理使用情况和错误。
- 可自定义的超时和最大尝试次数设置。
环境要求
- Python 3
requests
库beautifulsoup4
库
使用方法
首先,确保你安装了所需的库:
pip install requests beautifulsoup4
使用方法
from proxy_requests_session import ProxyRequestsSession
# 创建一个 ProxyRequestsSession 实例
proxy_requests = ProxyRequestsSession()
# 执行一个GET请求
response = proxy_requests.get('http://ifconfig.me')
print(response.text)
# 如有需要,执行一个POST请求
# response = proxy_requests.post('http://example.com', data={'key': 'value'})
# print(response.text)
代码
import requests
from bs4 import BeautifulSoup
import random
import logging
class ProxyRequestsSession:
def __init__(self):
self.session = requests.Session()
self.proxies = self.get_proxies()
self.logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
def get_proxies(self):
url = 'https://www.sslproxies.org/'
response = self.session.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
proxies = []
for row in soup.find("table", class_="table-striped").find_all('tr')[1:]:
tds = row.find_all('td')
try:
ip = tds[0].text.strip()
port = tds[1].text.strip()
proxies.append(f'http://{ip}:{port}')
except IndexError:
continue
return proxies
def request(self, method, url, headers=None, data=None, json=None, timeout=5, max_attempts=5):
for attempt in range(max_attempts):
if not self.proxies:
raise ConnectionError("Proxy list is empty. Exiting.")
proxy = random.choice(self.proxies)
self.logger.info(f"Trying Proxy: {proxy}")
try:
# 设置代理
proxies_dict = {
'http': proxy,
'https': proxy,
}
response = self.session.request(method, url, headers=headers, proxies=proxies_dict, data=data, json=json, timeout=timeout)
return response
except (requests.ConnectionError, requests.Timeout):
self.logger.error(f"Failed to connect using proxy: {proxy}")
self.proxies.remove(proxy)
def get(self, url, headers=None, timeout=5, max_attempts=20):
return self.request('GET', url, headers=headers, timeout=timeout, max_attempts=max_attempts)
def post(self, url, headers=None, data=None, json=None, timeout=5, max_attempts=20):
return self.request('POST', url, headers=headers, data=data, json=json, timeout=timeout, max_attempts=max_attempts)
if __name__ == '__main__':
# 使用方式
proxy_requests = ProxyRequestsSession()
response = proxy_requests.get('http://ifconfig.me')
print(response.text)