file_helper.py 3.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. import os
  2. import requests
  3. from datetime import datetime
  4. from urllib.parse import urlparse
  5. from utils.logger_helper import LoggerHelper
  6. from utils.config_helper import ConfigHelper
  7. class FileHelper:
  8. logger = LoggerHelper.get_logger()
  9. config = ConfigHelper()
  10. DEFAULT_ATTACH_PATH = "./attaches/"
  11. def __init__(self):
  12. path = self.config.get("save.attach_file_path", self.DEFAULT_ATTACH_PATH)
  13. path = path.replace("\\", "/")
  14. path = path.replace("//", "/")
  15. self._attach_file_path = path
  16. def download_remote_file(self, file_url, file_name) -> str | None:
  17. self.logger.info(f"下载远程文件: {file_url} 文件名:{file_name}")
  18. current_timestamp = datetime.now().strftime("%H%M%S%f")[:-3] # 取前三位毫秒
  19. file_name = f"{current_timestamp}@{file_name}"
  20. file_path = os.path.join(self._attach_file_path, f'{datetime.now().strftime("%Y-%m-%d")}')
  21. if not os.path.exists(file_path):
  22. os.makedirs(file_path)
  23. path = os.path.join(file_path, file_name)
  24. path = path.replace("\\", "/")
  25. path = path.replace("//", "/")
  26. # 10个不同的 User-Agent
  27. user_agents = [
  28. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
  29. "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15",
  30. "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
  31. "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
  32. "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/91.0.4472.124 Safari/605.1.15",
  33. "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0",
  34. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59 Safari/537.36",
  35. "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1",
  36. "Mozilla/5.0 (iPad; CPU OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1",
  37. "Mozilla/5.0 (Linux; Android 11; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Mobile Safari/537.36"
  38. ]
  39. # 根据文件名长度选择一个 User-Agent
  40. ua_index = len(file_name) % len(user_agents)
  41. # 解析 file_url 获取 Referer
  42. parsed_url = urlparse(file_url)
  43. referer = f"{parsed_url.scheme}://{parsed_url.netloc}/".replace("//download.", "//www.")
  44. headers = {
  45. 'User-Agent': user_agents[ua_index],
  46. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
  47. 'Accept-Encoding': 'gzip, deflate, br',
  48. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7',
  49. 'Referer': referer
  50. }
  51. try:
  52. response = requests.get(file_url, headers=headers, allow_redirects=True)
  53. response.raise_for_status()
  54. with open(path, 'wb') as f:
  55. f.write(response.content)
  56. self.logger.info(f"文件下载成功: {file_name}")
  57. return path
  58. except requests.exceptions.HTTPError as http_err:
  59. self.logger.error(f"HTTP 错误: {http_err}")
  60. except Exception as e:
  61. self.logger.error(f"文件下载失败: {file_name}。Exception: {e}")
  62. return None