12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667 |
- import os
- import requests
- from datetime import datetime
- from urllib.parse import urlparse
- from utils.logger_helper import LoggerHelper
- from utils.config_helper import ConfigHelper
- class FileHelper:
- logger = LoggerHelper.get_logger()
- config = ConfigHelper()
- DEFAULT_ATTACH_PATH = "./attaches/"
- def __init__(self):
- path = self.config.get("save.attach_file_path", self.DEFAULT_ATTACH_PATH)
- path = path.replace("\\", "/")
- path = path.replace("//", "/")
- self._attach_file_path = path
- def download_remote_file(self, file_url, file_name) -> str | None:
- self.logger.info(f"下载远程文件: {file_url} 文件名:{file_name}")
- current_timestamp = datetime.now().strftime("%H%M%S%f")[:-3] # 取前三位毫秒
- file_name = f"{current_timestamp}@{file_name}"
- file_path = os.path.join(self._attach_file_path, f'{datetime.now().strftime("%Y-%m-%d")}')
- if not os.path.exists(file_path):
- os.makedirs(file_path)
- path = os.path.join(file_path, file_name)
- path = path.replace("\\", "/")
- path = path.replace("//", "/")
- # 10个不同的 User-Agent
- user_agents = [
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15",
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/91.0.4472.124 Safari/605.1.15",
- "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59 Safari/537.36",
- "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1",
- "Mozilla/5.0 (iPad; CPU OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1",
- "Mozilla/5.0 (Linux; Android 11; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Mobile Safari/537.36"
- ]
- # 根据文件名长度选择一个 User-Agent
- ua_index = len(file_name) % len(user_agents)
- # 解析 file_url 获取 Referer
- parsed_url = urlparse(file_url)
- referer = f"{parsed_url.scheme}://{parsed_url.netloc}/".replace("//download.", "//www.")
- headers = {
- 'User-Agent': user_agents[ua_index],
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
- 'Accept-Encoding': 'gzip, deflate, br',
- 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7',
- 'Referer': referer
- }
- try:
- response = requests.get(file_url, headers=headers, allow_redirects=True)
- response.raise_for_status()
- with open(path, 'wb') as f:
- f.write(response.content)
- self.logger.info(f"文件下载成功: {file_name}")
- return path
- except requests.exceptions.HTTPError as http_err:
- self.logger.error(f"HTTP 错误: {http_err}")
- except Exception as e:
- self.logger.error(f"文件下载失败: {file_name}。Exception: {e}")
- return None
|