ProxyRequestsSession是一个旨在通过代理简化HTTP请求的Python类。它会自动从https://www.sslproxies.org/获取代理列表,并在每个请求时随机选择一个代理。如果某个代理失败,它会重试,直至达到最大尝试次数限制。

特性

  • 自动获取并轮换免费代理列表中的代理。
  • 支持GET和POST请求。
  • 支持日志记录,以追踪代理使用情况和错误。
  • 可自定义的超时和最大尝试次数设置。

环境要求

  • Python 3
  • requests
  • beautifulsoup4

使用方法

首先,确保你安装了所需的库:

pip install requests beautifulsoup4

使用方法

from proxy_requests_session import ProxyRequestsSession

# 创建一个 ProxyRequestsSession 实例
proxy_requests = ProxyRequestsSession()

# 执行一个GET请求
response = proxy_requests.get('http://ifconfig.me')
print(response.text)

# 如有需要,执行一个POST请求
# response = proxy_requests.post('http://example.com', data={'key': 'value'})
# print(response.text)

代码

import requests
from bs4 import BeautifulSoup
import random
import logging

class ProxyRequestsSession:
    def __init__(self):
        self.session = requests.Session()
        self.proxies = self.get_proxies()
        self.logger = logging.getLogger(__name__)
        logging.basicConfig(level=logging.INFO)

    def get_proxies(self):
        url = 'https://www.sslproxies.org/'
        response = self.session.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        proxies = []
        for row in soup.find("table", class_="table-striped").find_all('tr')[1:]:
            tds = row.find_all('td')
            try:
                ip = tds[0].text.strip()
                port = tds[1].text.strip()
                proxies.append(f'http://{ip}:{port}')
            except IndexError:
                continue
        return proxies

    def request(self, method, url, headers=None, data=None, json=None, timeout=5, max_attempts=5):
        for attempt in range(max_attempts):
            if not self.proxies:
                raise ConnectionError("Proxy list is empty. Exiting.")
            proxy = random.choice(self.proxies)
            self.logger.info(f"Trying Proxy: {proxy}")
            try:
                # 设置代理
                proxies_dict = {
                    'http': proxy,
                    'https': proxy,
                }
                response = self.session.request(method, url, headers=headers, proxies=proxies_dict, data=data, json=json, timeout=timeout)
                return response
            except (requests.ConnectionError, requests.Timeout):
                self.logger.error(f"Failed to connect using proxy: {proxy}")
                self.proxies.remove(proxy)

    def get(self, url, headers=None, timeout=5, max_attempts=20):
        return self.request('GET', url, headers=headers, timeout=timeout, max_attempts=max_attempts)

    def post(self, url, headers=None, data=None, json=None, timeout=5, max_attempts=20):
        return self.request('POST', url, headers=headers, data=data, json=json, timeout=timeout, max_attempts=max_attempts)

if __name__ == '__main__':
    # 使用方式
    proxy_requests = ProxyRequestsSession()
    response = proxy_requests.get('http://ifconfig.me')
    print(response.text)