浏览代码

Update 代码格式化,优化utils包调用

YueYunyun 8 月之前
父节点
当前提交
d437ee1399
共有 30 个文件被更改,包括 1196 次插入983 次删除
  1. 3 0
      SourceCode/TenderCrawler/app/__init__.py
  2. 10 0
      SourceCode/TenderCrawler/app/adapters/__init__.py
  3. 78 55
      SourceCode/TenderCrawler/app/adapters/ccgp_data_collection_adapter.py
  4. 55 48
      SourceCode/TenderCrawler/app/adapters/chinabidding_data_collection_adapter.py
  5. 23 26
      SourceCode/TenderCrawler/app/adapters/data_collection_adapter_interface.py
  6. 1 0
      SourceCode/TenderCrawler/app/config.yml
  7. 7 0
      SourceCode/TenderCrawler/app/drivers/__init__.py
  8. 34 39
      SourceCode/TenderCrawler/app/drivers/driver_creator.py
  9. 36 34
      SourceCode/TenderCrawler/app/jobs/data_clean.py
  10. 23 26
      SourceCode/TenderCrawler/app/jobs/data_collector.py
  11. 50 45
      SourceCode/TenderCrawler/app/jobs/data_process.py
  12. 32 24
      SourceCode/TenderCrawler/app/jobs/data_send.py
  13. 99 91
      SourceCode/TenderCrawler/app/jobs/job_runner.py
  14. 13 13
      SourceCode/TenderCrawler/app/main.py
  15. 20 14
      SourceCode/TenderCrawler/app/models/area_email.py
  16. 90 90
      SourceCode/TenderCrawler/app/models/collect_data.py
  17. 106 107
      SourceCode/TenderCrawler/app/models/process_data.py
  18. 99 97
      SourceCode/TenderCrawler/app/models/process_result_data.py
  19. 17 18
      SourceCode/TenderCrawler/app/models/url_setting.py
  20. 26 30
      SourceCode/TenderCrawler/app/stores/default_data_store.py
  21. 28 30
      SourceCode/TenderCrawler/app/stores/mysql_data_store.py
  22. 147 1
      SourceCode/TenderCrawler/app/utils/__init__.py
  23. 34 31
      SourceCode/TenderCrawler/app/utils/ai_helper.py
  24. 11 13
      SourceCode/TenderCrawler/app/utils/config_helper.py
  25. 45 55
      SourceCode/TenderCrawler/app/utils/email_helper.py
  26. 40 29
      SourceCode/TenderCrawler/app/utils/file_helper.py
  27. 29 23
      SourceCode/TenderCrawler/app/utils/logger_helper.py
  28. 30 33
      SourceCode/TenderCrawler/app/utils/mysql_helper.py
  29. 7 9
      SourceCode/TenderCrawler/app/utils/string_helper.py
  30. 3 2
      SourceCode/TenderCrawler/init.sql

+ 3 - 0
SourceCode/TenderCrawler/app/__init__.py

@@ -0,0 +1,3 @@
+import utils
+
+utils.reload_config()

+ 10 - 0
SourceCode/TenderCrawler/app/adapters/__init__.py

@@ -0,0 +1,10 @@
+from data_collection_adapter_interface import IDataCollectionAdapter
+from stores.data_store_interface import IDataStore
+
+
+def collect(adapter: IDataCollectionAdapter, keyword: str, store: IDataStore = None):
+    adapter.collect(keyword, store)
+
+
+def teardown(adapter: IDataCollectionAdapter):
+    adapter.teardown()

+ 78 - 55
SourceCode/TenderCrawler/app/adapters/ccgp_data_collection_adapter.py

@@ -1,23 +1,21 @@
 from time import sleep
 
+from selenium.common.exceptions import TimeoutException, NoSuchElementException
 from selenium.webdriver.common.by import By
-from selenium.webdriver.support.wait import WebDriverWait
 from selenium.webdriver.support import expected_conditions as ec
-from selenium.common.exceptions import TimeoutException, NoSuchElementException
+from selenium.webdriver.support.wait import WebDriverWait
 
-from stores.data_store_interface import IDataStore
+import utils
 from adapters.data_collection_adapter_interface import IDataCollectionAdapter
-from utils.file_helper import FileHelper
-
+from stores.data_store_interface import IDataStore
 
 
 class CcgpDataCollectionAdapter(IDataCollectionAdapter):
     """
     中国政府采购网数据采集适配器
     """
-    file_helper = FileHelper()
 
-    def __init__(self, url: str,store:IDataStore=None):
+    def __init__(self, url: str, store: IDataStore = None):
         self._url = url
         self._store = store
         self._driver = None
@@ -25,7 +23,7 @@ class CcgpDataCollectionAdapter(IDataCollectionAdapter):
         self._adapter_type = "ccgp"
 
     def login(self, username: str, password: str) -> None:
-       pass
+        pass
 
     def collect(self, keyword: str, store: IDataStore):
         if store:
@@ -33,7 +31,7 @@ class CcgpDataCollectionAdapter(IDataCollectionAdapter):
         self._keyword = keyword
         items = self._search(keyword)
         self._process_list(items)
-        if self.config.get_bool(self.batch_save_key):
+        if utils.get_config_bool(self.batch_save_key):
             self.store.save_collect_data(True)
 
     def _search(self, keyword: str) -> list:
@@ -41,20 +39,22 @@ class CcgpDataCollectionAdapter(IDataCollectionAdapter):
             if not keyword:
                 raise Exception("搜索关键字不能为空")
             wait = WebDriverWait(self.driver, 10, 1)
-            wait.until(
-                ec.presence_of_element_located((By.ID, "searchForm")))
+            wait.until(ec.presence_of_element_located((By.ID, "searchForm")))
             search_el = self.driver.find_element(By.ID, "kw")
             sleep(2)
             search_el.clear()
             search_el.send_keys(keyword)
             search_btn = self.driver.find_element(
-                By.XPATH, "//form[@id='searchForm']/input[@id='doSearch2']")
+                By.XPATH, "//form[@id='searchForm']/input[@id='doSearch2']"
+            )
             sleep(1)
             search_btn.click()
-            wait.until(ec.presence_of_element_located((By.CLASS_NAME, "vT-srch-result")))
+            wait.until(
+                ec.presence_of_element_located((By.CLASS_NAME, "vT-srch-result"))
+            )
             default_search_txt = "近1周"
-            search_txt = self.config.get(self.search_day_key, default_search_txt)
-            self.logger.info(f"搜索关键字: {keyword},搜索条件: {search_txt}")
+            search_txt = utils.get_config_value(self.search_day_key, default_search_txt)
+            utils.get_logger().info(f"搜索关键字: {keyword},搜索条件: {search_txt}")
             if search_txt != default_search_txt:
                 last_els = self.driver.find_elements(By.XPATH, "//ul[@id='datesel']/li")
                 for last_el in last_els:
@@ -62,40 +62,44 @@ class CcgpDataCollectionAdapter(IDataCollectionAdapter):
                         sleep(1)
                         last_el.click()
                         break
-                wait.until(ec.presence_of_element_located((By.CLASS_NAME, "vT-srch-result")))
+                wait.until(
+                    ec.presence_of_element_located((By.CLASS_NAME, "vT-srch-result"))
+                )
             else:
                 sleep(1)
             try:
-                p_els = self.driver.find_elements(By.XPATH, "//body/div[@class='vT_z']/div/div/p")
+                p_els = self.driver.find_elements(
+                    By.XPATH, "//body/div[@class='vT_z']/div/div/p"
+                )
                 if len(p_els) > 0:
-                    self.logger.info(f" {p_els[0].text}")
+                    utils.get_logger().info(f" {p_els[0].text}")
                 else:
                     a_links = self.driver.find_elements(
-                        By.XPATH, "//div[@class='vT-srch-result-list']/p/a")
+                        By.XPATH, "//div[@class='vT-srch-result-list']/p/a"
+                    )
                     count = len(a_links)
                     if count > 1:
                         count = count - 1
-                    self.logger.info(f"共查询到 {count} 页,每页 20 条")
+                    utils.get_logger().info(f"共查询到 {count} 页,每页 20 条")
             except Exception as e:
-                self.logger.error(f"搜索失败[尝试查询页数]: {e}")
-            items = self.driver.find_elements(By.XPATH,
-                                         "//ul[@class='vT-srch-result-list-bid']/li/a")
+                utils.get_logger().error(f"搜索失败[尝试查询页数]: {e}")
+            items = self.driver.find_elements(
+                By.XPATH, "//ul[@class='vT-srch-result-list-bid']/li/a"
+            )
             return items
         except TimeoutException as e:
             raise Exception(f"搜索失败 [{self._adapter_type}] [超时]: {e}")
         except NoSuchElementException as e:
             raise Exception(f"搜索失败 [{self._adapter_type}] [找不到元素]: {e}")
 
-
-    def _process_list(self,  items: list) -> list:
+    def _process_list(self, items: list) -> list:
         if not items:
             return []
         for item in items:
-            self._process_item( item)
+            self._process_item(item)
         sleep(2)
         next_items = self._next_page()
-        return self._process_list( next_items)
-
+        return self._process_list(next_items)
 
     def _next_page(self) -> list:
         try:
@@ -104,30 +108,33 @@ class CcgpDataCollectionAdapter(IDataCollectionAdapter):
             try:
                 btn = self.driver.find_element(By.XPATH, next_path)
             except NoSuchElementException:
-                self.logger.info(f"翻页结束 [{self._adapter_type}]")
+                utils.get_logger().info(f"翻页结束 [{self._adapter_type}]")
                 return []
             btn.click()
-            self.logger.info(f"跳转到下页: {self.driver.current_url}")
+            utils.get_logger().info(f"跳转到下页: {self.driver.current_url}")
             sleep(5)
-            wait.until(ec.presence_of_element_located((By.CLASS_NAME, "vT-srch-result")))
-            items = self.driver.find_elements(By.XPATH,
-                                         "//ul[@class='vT-srch-result-list-bid']/li/a")
+            wait.until(
+                ec.presence_of_element_located((By.CLASS_NAME, "vT-srch-result"))
+            )
+            items = self.driver.find_elements(
+                By.XPATH, "//ul[@class='vT-srch-result-list-bid']/li/a"
+            )
             return items
         except NoSuchElementException as e:
             raise Exception(f"翻页失败 [{self._adapter_type}] [找不到元素]: {e}")
         except TimeoutException as e:
             raise Exception(f"翻页结束 [{self._adapter_type}] [超时]: {e}")
 
-    def _process_item(self,  item):
+    def _process_item(self, item):
         main_handle = self.driver.current_window_handle
         wait = WebDriverWait(self.driver, 10, 1)
         close = True
         try:
-            url = item.get_attribute('href')
+            url = item.get_attribute("href")
             if self._check_is_collect_by_url(url):
                 close = False
                 return
-            # self.logger.info(f"跳转详情")
+            # utils.get_logger().info(f"跳转详情")
             print(".", end="")
             sleep(1)
             item.click()
@@ -139,25 +146,34 @@ class CcgpDataCollectionAdapter(IDataCollectionAdapter):
                     break
             wait.until(ec.presence_of_element_located((By.TAG_NAME, "body")))
 
-            content = self.driver.find_element(By.XPATH, "//div[@class='vF_deail_maincontent']").text
+            content = self.driver.find_element(
+                By.XPATH, "//div[@class='vF_deail_maincontent']"
+            ).text
             # 排除其他公告
             if self._check_type("其他公告"):
                 self._save_db(url, content, 3, is_invalid=True)
                 return
             # 判断是否为投标公告
-            data_type = 1 if self._check_type("中标公告") or self._check_type("成交公告") or self._check_type(
-                    "终止公告")  else 0
+            data_type = (
+                1
+                if self._check_type("中标公告")
+                or self._check_type("成交公告")
+                or self._check_type("终止公告")
+                else 0
+            )
             if self._check_content(content):
                 attach_str = self._attach_download()
                 self._save_db(url, content, data_type, attach_str)
             else:
                 self._save_db(url, content, data_type, is_invalid=True)
         except TimeoutException as e:
-            self.logger.error(
-                f"采集发生异常 [{self._adapter_type}] Timeout: {self.driver.current_url}。Exception: {e}")
+            utils.get_logger().error(
+                f"采集发生异常 [{self._adapter_type}] Timeout: {self.driver.current_url}。Exception: {e}"
+            )
         except NoSuchElementException as e:
-            self.logger.error(
-                f"采集发生异常 [{self._adapter_type}] NoSuchElement: {self.driver.current_url}。Exception: {e}")
+            utils.get_logger().error(
+                f"采集发生异常 [{self._adapter_type}] NoSuchElement: {self.driver.current_url}。Exception: {e}"
+            )
             raise Exception(f"采集失败 [{self._adapter_type}] [找不到元素]: {e}")
         finally:
             if close:
@@ -165,18 +181,19 @@ class CcgpDataCollectionAdapter(IDataCollectionAdapter):
                 self.driver.close()
                 self.driver.switch_to.window(main_handle)
 
-    def _check_type(self,type_str: str)->bool:
+    def _check_type(self, type_str: str) -> bool:
         links = self.driver.find_elements(By.LINK_TEXT, type_str)
         if len(links) > 0:
-            self.logger.info(f"{type_str}")
+            utils.get_logger().info(f"{type_str}")
             return True
         return False
 
-
     def _attach_download(self):
         paths = []
 
-        attach_els = self.driver.find_elements(By.XPATH, "//td[@class='bid_attachtab_content']/a")
+        attach_els = self.driver.find_elements(
+            By.XPATH, "//td[@class='bid_attachtab_content']/a"
+        )
         attach_2_els = self.driver.find_elements(By.XPATH, "//a[@ignore='1']")
 
         # 合并两个列表
@@ -184,23 +201,29 @@ class CcgpDataCollectionAdapter(IDataCollectionAdapter):
         attach_urls = []
         if len(all_attachments) > 0:
             for attach_el in attach_els:
-                attach_url = attach_el.get_attribute('href')
+                attach_url = attach_el.get_attribute("href")
                 if attach_url not in attach_urls:
                     attach_urls.append(attach_url)
                 else:
-                    self.logger.info(f"重复附件: {attach_url}")
+                    utils.get_logger().info(f"重复附件: {attach_url}")
                     continue
-                file_name = attach_el.text or attach_el.get_attribute('download') or attach_url.split('/')[-1]
+                file_name = (
+                    attach_el.text
+                    or attach_el.get_attribute("download")
+                    or attach_url.split("/")[-1]
+                )
                 if not file_name:
                     continue
                 # 检查 file_name 是否包含文件扩展名
-                if '.' not in file_name:
-                    self.logger.warning(f"文件名 {file_name} 不包含扩展名,跳过下载。")
+                if "." not in file_name:
+                    utils.get_logger().warning(
+                        f"文件名 {file_name} 不包含扩展名,跳过下载。"
+                    )
                     continue
-                path = self.file_helper.download_remote_file(attach_url, file_name)
+                path = utils.download_remote_file(attach_url, file_name)
                 if path:
                     paths.append(path)
         attach_str = ",".join(paths)
         if attach_str:
-            self.logger.info(f"附件下载完成: {attach_str}")
-        return attach_str
+            utils.get_logger().info(f"附件下载完成: {attach_str}")
+        return attach_str

+ 55 - 48
SourceCode/TenderCrawler/app/adapters/chinabidding_data_collection_adapter.py

@@ -1,13 +1,13 @@
 from time import sleep
 
-
+from selenium.common.exceptions import TimeoutException, NoSuchElementException
 from selenium.webdriver.common.by import By
-from selenium.webdriver.support.wait import WebDriverWait
 from selenium.webdriver.support import expected_conditions as ec
-from selenium.common.exceptions import TimeoutException, NoSuchElementException
+from selenium.webdriver.support.wait import WebDriverWait
 
-from stores.data_store_interface import IDataStore
+import utils
 from adapters.data_collection_adapter_interface import IDataCollectionAdapter
+from stores.data_store_interface import IDataStore
 
 
 class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
@@ -15,7 +15,7 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
     中国招标网数据采集适配器
     """
 
-    def __init__(self, url: str,store:IDataStore=None):
+    def __init__(self, url: str, store: IDataStore = None):
         self._url = url
         self._store = store
         self._driver = None
@@ -25,7 +25,8 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
     def login(self, username: str, password: str) -> None:
         try:
             login_el = self.driver.find_element(
-                By.XPATH, "//div[@id='loginRight']/a[@class='login']")
+                By.XPATH, "//div[@id='loginRight']/a[@class='login']"
+            )
             login_el.click()
             wait = WebDriverWait(self.driver, 10, 1)
             wait.until(ec.presence_of_element_located((By.ID, "userpass")))
@@ -41,28 +42,31 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
         except NoSuchElementException as e:
             raise Exception(f"登录失败 [{self._adapter_type}] [找不到元素]: {e}")
 
-
     def collect(self, keyword: str, store: IDataStore):
         if store:
             self._store = store
         self._keyword = keyword
         items = self._search_by_type(keyword, 0)
-        self._process_list(items,0)
+        self._process_list(items, 0)
         sleep(2)
-        items = self._search_by_type(keyword,1)
-        self._process_list(items,1)
-        if self.config.get_bool(self.batch_save_key):
+        items = self._search_by_type(keyword, 1)
+        self._process_list(items, 1)
+        if utils.get_config_bool(self.batch_save_key):
             self.store.save_collect_data(True)
 
-    def _search_by_type(self, keyword: str,data_type):
+    def _search_by_type(self, keyword: str, data_type):
         try:
             self.driver.get(self._url)
             if data_type == 0:
-                self.logger.info(f"开始采集 招标公告")
-                el = self.driver.find_element(By.XPATH, "//div[@id='z-b-g-g']/h2/a[@class='more']")
+                utils.get_logger().info(f"开始采集 招标公告")
+                el = self.driver.find_element(
+                    By.XPATH, "//div[@id='z-b-g-g']/h2/a[@class='more']"
+                )
             else:
-                self.logger.info(f"开始采集 中标结果公告")
-                el = self.driver.find_element(By.XPATH, "//div[@id='z-b-jg-gg']/h2/a[@class='more']")
+                utils.get_logger().info(f"开始采集 中标结果公告")
+                el = self.driver.find_element(
+                    By.XPATH, "//div[@id='z-b-jg-gg']/h2/a[@class='more']"
+                )
             el.click()
             wait = WebDriverWait(self.driver, 10, 1)
             wait.until(ec.number_of_windows_to_be(2))
@@ -74,21 +78,22 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
         except NoSuchElementException as e:
             raise Exception(f"搜索失败 [{self._adapter_type}] [找不到元素]: {e}")
 
-
     def _search(self, keyword: str) -> list:
         wait = WebDriverWait(self.driver, 10, 1)
-        wait.until(
-            ec.presence_of_element_located((By.ID, "searchBidProjForm")))
-        search_el = self.driver.find_element(By.XPATH, "//form[@id='searchBidProjForm']/ul/li/input[@id='fullText']")
+        wait.until(ec.presence_of_element_located((By.ID, "searchBidProjForm")))
+        search_el = self.driver.find_element(
+            By.XPATH, "//form[@id='searchBidProjForm']/ul/li/input[@id='fullText']"
+        )
         search_el.clear()
         search_el.send_keys(keyword)
         search_btn = self.driver.find_element(
-            By.XPATH, "//form[@id='searchBidProjForm']/ul/li/button")
+            By.XPATH, "//form[@id='searchBidProjForm']/ul/li/button"
+        )
         search_btn.click()
         wait.until(ec.presence_of_element_located((By.ID, "site-content")))
         default_search_txt = "全部"
-        search_txt = self.config.get(self.search_day_key, default_search_txt)
-        self.logger.info(f"搜索关键字: {keyword},搜索条件: {search_txt}")
+        search_txt = utils.get_config_value(self.search_day_key, default_search_txt)
+        utils.get_logger().info(f"搜索关键字: {keyword},搜索条件: {search_txt}")
         if search_txt != default_search_txt:
             last_el = self.driver.find_element(By.LINK_TEXT, search_txt)
             sleep(1)
@@ -98,50 +103,53 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
             sleep(1)
         try:
             a_links = self.driver.find_elements(
-                By.XPATH, "//form[@id='pagerSubmitForm']/a")
+                By.XPATH, "//form[@id='pagerSubmitForm']/a"
+            )
             count = len(a_links)
             if count > 1:
                 count = count - 1
-            self.logger.info(f"共查询到 {count} 页,每页 10 条")
+            utils.get_logger().info(f"共查询到 {count} 页,每页 10 条")
         except Exception as e:
-            self.logger.error(f"搜索失败[尝试查询页数]: {e}")
-        items = self.driver.find_elements(By.XPATH,
-                                          "//ul[@class='as-pager-body']/li/a")
+            utils.get_logger().error(f"搜索失败[尝试查询页数]: {e}")
+        items = self.driver.find_elements(By.XPATH, "//ul[@class='as-pager-body']/li/a")
         return items
 
-    def _process_list(self, items: list,data_type) -> list:
+    def _process_list(self, items: list, data_type) -> list:
         if not items:
             return []
         for item in items:
-            self._process_item(item,data_type)
+            self._process_item(item, data_type)
         sleep(2)
         next_items = self._next_page()
-        return self._process_list(next_items,data_type)
+        return self._process_list(next_items, data_type)
 
     def _next_page(self) -> list:
         try:
             wait = WebDriverWait(self.driver, 10, 1)
             try:
-                btn = self.driver.find_element(By.XPATH, "//form[@id='pagerSubmitForm']/a[@class='next']")
+                btn = self.driver.find_element(
+                    By.XPATH, "//form[@id='pagerSubmitForm']/a[@class='next']"
+                )
             except NoSuchElementException:
-                self.logger.info(f"翻页结束 [{self._adapter_type}]")
+                utils.get_logger().info(f"翻页结束 [{self._adapter_type}]")
                 return []
             btn.click()
-            self.logger.info(f"跳转到下页: {self.driver.current_url}")
+            utils.get_logger().info(f"跳转到下页: {self.driver.current_url}")
             wait.until(ec.presence_of_element_located((By.ID, "site-content")))
-            items = self.driver.find_elements(By.XPATH,
-                                         "//ul[@class='as-pager-body']/li/a")
+            items = self.driver.find_elements(
+                By.XPATH, "//ul[@class='as-pager-body']/li/a"
+            )
             return items
         except NoSuchElementException as e:
             raise Exception(f"翻页失败 [{self._adapter_type}] [找不到元素]: {e}")
-        except TimeoutException:
+        except TimeoutException as e:
             raise Exception(f"翻页结束 [{self._adapter_type}] [超时]: {e}")
 
-    def _process_item(self, item,data_type):
+    def _process_item(self, item, data_type):
         main_handle = self.driver.current_window_handle
         close = True
         try:
-            url = item.get_attribute('href')
+            url = item.get_attribute("href")
             if self._check_is_collect_by_url(url):
                 close = False
                 return
@@ -154,8 +162,8 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
                     self.driver.switch_to.window(handle)
                     break
             url = self.driver.current_url
-            # self.logger.info(f"跳转详情")
-            print(".",end="")
+            # utils.get_logger().info(f"跳转详情")
+            print(".", end="")
             wait.until(ec.presence_of_element_located((By.CLASS_NAME, "content")))
             content = self.driver.find_element(By.CLASS_NAME, "content").text
             if self._check_content(content):
@@ -164,18 +172,17 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
                 self._save_db(url, content, data_type, is_invalid=True)
 
         except TimeoutException as e:
-            self.logger.error(
-                f"采集发生异常 [{self._adapter_type}] Timeout: {self.driver.current_url}。Exception: {e}")
+            utils.get_logger().error(
+                f"采集发生异常 [{self._adapter_type}] Timeout: {self.driver.current_url}。Exception: {e}"
+            )
             # raise Exception(f"采集失败 [超时]: {e}")
         except NoSuchElementException as e:
-            self.logger.error(
-                f"采集发生异常 [{self._adapter_type}] NoSuchElement: {self.driver.current_url}。Exception: {e}")
+            utils.get_logger().error(
+                f"采集发生异常 [{self._adapter_type}] NoSuchElement: {self.driver.current_url}。Exception: {e}"
+            )
             raise Exception(f"采集失败 [{self._adapter_type}] [找不到元素]: {e}")
         finally:
             if close:
                 sleep(2)
                 self.driver.close()
                 self.driver.switch_to.window(main_handle)
-
-
-

+ 23 - 26
SourceCode/TenderCrawler/app/adapters/data_collection_adapter_interface.py

@@ -1,35 +1,32 @@
-
-
 from abc import ABC, abstractmethod
+
 from selenium import webdriver
 
-from stores.data_store_interface import IDataStore
-from drivers.driver_creator import DriverCreator
-from utils.logger_helper import LoggerHelper
-from utils.config_helper import ConfigHelper
+import drivers
+import utils
 from models.collect_data import CollectData
+from stores.data_store_interface import IDataStore
 
 
 class IDataCollectionAdapter(ABC):
     """
     数据收集适配器抽象类
     """
+
     _url = ""
     _store = None
     _driver = None
     _keyword = None
     _adapter_type = ""
 
-    logger = LoggerHelper.get_logger()
-    config = ConfigHelper()
-
-
     @property
     def search_day_key(self) -> str:
         return f"adapter.{self._adapter_type}.search_day"
+
     @property
     def batch_save_key(self) -> str:
         return f"adapter.{self._adapter_type}.batch_save"
+
     @property
     def store(self) -> IDataStore:
         return self._store
@@ -50,8 +47,7 @@ class IDataCollectionAdapter(ABC):
 
     def _create_driver(self) -> webdriver:
         try:
-            return DriverCreator().gen_remote_driver(self.url)
-            # return DriverCreator().gen_chrome_driver(self.url)
+            return drivers.gen_driver(self.url)
         except Exception as e:
             raise Exception(f"创建驱动器失败: {e}")
 
@@ -124,32 +120,31 @@ class IDataCollectionAdapter(ABC):
     def _check_is_collect_by_url(self, url: str) -> bool:
         old = self.store.query_one_collect_url(url)
         if old:
-            self.logger.info(f"已采集过: {url}")
+            utils.get_logger().info(f"已采集过: {url}")
             return True
         return False
-    def _check_content(self,content) -> bool:
-        collect_data_key = self.config.get("save.collect_data_key")
+
+    def _check_content(self, content) -> bool:
+        collect_data_key = utils.get_config_value("save.collect_data_key")
         if not collect_data_key:
-            self.logger.info("未配置 save.collect_data_key,跳过内容检查")
+            utils.get_logger().info("未配置 save.collect_data_key,跳过内容检查")
             return True
-        # self.logger.info(f"检查数据有效性: {collect_data_key}")
+        # utils.get_logger().info(f"检查数据有效性: {collect_data_key}")
         collect_data_key = collect_data_key.replace(",", ",")
         keys = collect_data_key.split(",")
         keys = [key.strip() for key in keys]
         for key in keys:
             key = key.strip()
-            # self.logger.info(f"检查数据有效性: {key}")
+            # utils.get_logger().info(f"检查数据有效性: {key}")
             if key in content:
-                self.logger.info(f"有效数据: {self.driver.current_url}")
+                utils.get_logger().info(f"有效数据: {self.driver.current_url}")
                 return True
 
         return False
 
-
-
-    def _save_db(self, url, content, data_type=0, attach_str = None,is_invalid=False):
+    def _save_db(self, url, content, data_type=0, attach_str=None, is_invalid=False):
         if not self.store:
-            self.logger.info(f"DataStore 未指定: {url},关键字{self.keyword}")
+            utils.get_logger().info(f"DataStore 未指定: {url},关键字{self.keyword}")
             return False
         else:
             status = 2 if is_invalid else 0
@@ -159,7 +154,9 @@ class IDataCollectionAdapter(ABC):
                 content=content,
                 data_type=data_type,
                 attach_path=attach_str,
-                status=status)
-            self.store.insert_collect_data(data, self.config.get_bool(self.batch_save_key))
+                status=status,
+            )
+            self.store.insert_collect_data(
+                data, utils.get_config_bool(self.batch_save_key)
+            )
             return True
-

+ 1 - 0
SourceCode/TenderCrawler/app/config.yml

@@ -1,3 +1,4 @@
+#file: noinspection SpellCheckingInspection,SpellCheckingInspection,SpellCheckingInspection
 adapter:
   chinabidding:
     #search_day: '今天'

+ 7 - 0
SourceCode/TenderCrawler/app/drivers/__init__.py

@@ -0,0 +1,7 @@
+from selenium import webdriver
+
+from driver_creator import DriverCreator
+
+
+def gen_driver(url: str) -> webdriver:
+    return DriverCreator().gen_remote_driver(url)

+ 34 - 39
SourceCode/TenderCrawler/app/drivers/driver_creator.py

@@ -1,61 +1,55 @@
 from selenium import webdriver
 
-from utils.logger_helper import LoggerHelper
-from utils.config_helper import ConfigHelper
+import utils
 
 
 class DriverCreator:
 
-    logger = LoggerHelper.get_logger()
-
     default_remote_driver_url = "http://127.0.0.1:4444/wd/hub"
 
     def gen_remote_driver(self, url):
         # 设置Chrome选项
         options = webdriver.ChromeOptions()
 
-        options.add_argument('--headless')  # 无头模式运行
-        options.add_argument('--no-sandbox')
-        options.add_argument('--disable-dev-shm-usage')
-        options.add_experimental_option('excludeSwitches',
-                                        ['enable-automation'])
-        options.add_argument('--disable-blink-features=AutomationControlled')
-        options.add_argument('--disable-extensions')
+        options.add_argument("--headless")  # 无头模式运行
+        options.add_argument("--no-sandbox")
+        options.add_argument("--disable-dev-shm-usage")
+        options.add_experimental_option("excludeSwitches", ["enable-automation"])
+        options.add_argument("--disable-blink-features=AutomationControlled")
+        options.add_argument("--disable-extensions")
         # 最大化窗口
-        options.add_argument('--start-maximized')
+        options.add_argument("--start-maximized")
         # 无痕浏览模式
-        options.add_argument('--incognito')
-
+        options.add_argument("--incognito")
 
-        remote_driver_url = ConfigHelper().get('selenium.remote_driver_url')
+        remote_driver_url = utils.get_config_value("selenium.remote_driver_url")
         if not remote_driver_url:
             remote_driver_url = self.default_remote_driver_url
-            self.logger.error(
-                f"未配置远程驱动地址,使用默认地址{self.default_remote_driver_url}")
-        self.logger.info(f"远程驱动地址{remote_driver_url}")
+            utils.get_logger().error(
+                f"未配置远程驱动地址,使用默认地址{self.default_remote_driver_url}"
+            )
+        utils.get_logger().info(f"远程驱动地址{remote_driver_url}")
 
         # 创建远程浏览器驱动实例
-        driver = webdriver.Remote(command_executor=remote_driver_url,
-                                  options=options)
+        driver = webdriver.Remote(command_executor=remote_driver_url, options=options)
         return self._gen_driver(driver, url)
 
     def gen_chrome_driver(self, url):
         # 设置Chrome选项,包括隐藏Selenium特征、设置代理IP和排除或关闭一些Selenium相关开关
         options = webdriver.ChromeOptions()
-        options.add_experimental_option('excludeSwitches',
-                                        ['enable-automation'])
-        options.add_argument('--disable-blink-features=AutomationControlled')
-        options.add_argument('--disable-extensions')
+        options.add_experimental_option("excludeSwitches", ["enable-automation"])
+        options.add_argument("--disable-blink-features=AutomationControlled")
+        options.add_argument("--disable-extensions")
         # options.add_argument('--disable-gpu')
-        options.add_argument('--disable-notifications')
+        options.add_argument("--disable-notifications")
         # options.add_argument('--disable-popup-blocking')
         # options.add_argument('--disable-web-security')
         # options.add_argument('--ignore-certificate-errors')
         # options.add_argument('--no-sandbox')
         # 最大化窗口
-        options.add_argument('--start-maximized')
+        options.add_argument("--start-maximized")
         # 无痕浏览模式
-        options.add_argument('--incognito')
+        options.add_argument("--incognito")
         # options.add_argument('--user-data-dir=/dev/null')
         # options.add_argument('--proxy-server={}'.format(proxy_address + ':' + proxy_port))
         # options.add_argument('--proxy-auth={}:{}'.format(proxy_username, proxy_password))
@@ -65,16 +59,17 @@ class DriverCreator:
         driver = webdriver.Chrome(options=options)  # 创建Chrome浏览器驱动实例
         return self._gen_driver(driver, url)
 
-
-    def _gen_driver(self, driver, url):
+    @staticmethod
+    def _gen_driver(driver, url):
         # 设置user-agent,改变user-agent的值
-        if hasattr(driver, 'execute_cdp_cmd'):
+        if hasattr(driver, "execute_cdp_cmd"):
             # 隐藏navigator.webdriver标志,将其值修改为false或undefined
             driver.execute_cdp_cmd(
-                'Page.addScriptToEvaluateOnNewDocument', {
-                    'source':
-                    'Object.defineProperty(navigator, "webdriver", {get: () => undefined})'
-                })
+                "Page.addScriptToEvaluateOnNewDocument",
+                {
+                    "source": 'Object.defineProperty(navigator, "webdriver", {get: () => undefined})'
+                },
+            )
 
             user_agents = [
                 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36",
@@ -87,19 +82,19 @@ class DriverCreator:
 
             user_agent = user_agents[len(url) % len(user_agents)]
             # 设置user-agent,改变user-agent的值
-            driver.execute_cdp_cmd("Network.setUserAgentOverride",
-                                   {"userAgent": user_agent})
+            driver.execute_cdp_cmd(
+                "Network.setUserAgentOverride", {"userAgent": user_agent}
+            )
         else:
-            self.logger.warning("当前驱动不支持 execute_cdp_cmd 方法")
+            utils.get_logger().warning("当前驱动不支持 execute_cdp_cmd 方法")
         # url 去除空字符串
         url = url.strip()
         driver.get(url)
 
         # 设置隐式等待 5s
         driver.implicitly_wait(5)
-        self.logger.info(f"创建浏览器驱动,URL: {url}")
+        utils.get_logger().info(f"创建浏览器驱动,URL: {url}")
         return driver
 
-
     # def shutdown_driver(self,driver):
     #     driver.quit()

+ 36 - 34
SourceCode/TenderCrawler/app/jobs/data_clean.py

@@ -1,82 +1,84 @@
-from utils.config_helper import ConfigHelper
-from utils.logger_helper import LoggerHelper
-from utils.file_helper import FileHelper
+import utils
 from stores.mysql_data_store import MysqlDataStore
 
+
 class DataClean:
     _store = None
-    config = ConfigHelper()
-    logger = LoggerHelper.get_logger()
-
 
     def __init__(self):
-        self._clean_day = self.config.get_int("clean.day", 30)
-        self._clean_attach_day = self.config.get_int("clean.attach", self._clean_day)
-        self._clean_log_day = self.config.get_int("clean.log", self._clean_day)
-        self._clean_collect_data_day = self.config.get_int("clean.collect_data", self._clean_day)
-        self._clean_process_data_day = self.config.get_int("clean.process_data", self._clean_day)
-        self._clean_process_result_data_day = self.config.get_int("clean.process_result_data", self._clean_day)
+        self._clean_day = utils.get_config_int("clean.day", 30)
+        self._clean_attach_day = utils.get_config_int("clean.attach", self._clean_day)
+        self._clean_log_day = utils.get_config_int("clean.log", self._clean_day)
+        self._clean_collect_data_day = utils.get_config_int(
+            "clean.collect_data", self._clean_day
+        )
+        self._clean_process_data_day = utils.get_config_int(
+            "clean.process_data", self._clean_day
+        )
+        self._clean_process_result_data_day = utils.get_config_int(
+            "clean.process_result_data", self._clean_day
+        )
         if self._clean_process_result_data_day < 45:
             self._clean_process_result_data_day = 45
         self._store = MysqlDataStore()
 
     def clean(self):
         try:
-            self.logger.info("开始 清除历史文件数据")
+            utils.get_logger().info("开始 清除历史文件数据")
             self._clean_attach()
             self._clean_log()
             self._clean_collect_data()
             self._clean_process_data()
             self._clean_process_result_data()
-            self.logger.info("清除历史文件数据 完成")
+            utils.get_logger().info("清除历史文件数据 完成")
         except Exception as e:
-            self.logger.error(e)
+            utils.get_logger().error(e)
 
     def _clean_attach(self):
         if self._clean_attach_day == 0:
-            self.logger.info("跳过 清除历史附件数据")
+            utils.get_logger().info("跳过 清除历史附件数据")
             return
-        self.logger.info("开始 清除历史附件数据")
-        FileHelper().clean_attach_file(self._clean_attach_day)
-        self.logger.info("清除历史附件数据 完成")
+        utils.get_logger().info("开始 清除历史附件数据")
+        utils.clean_attach_file(self._clean_attach_day)
+        utils.get_logger().info("清除历史附件数据 完成")
 
     def _clean_log(self):
         if self._clean_log_day == 0:
-            self.logger.info("跳过 清除历史日志数据")
+            utils.get_logger().info("跳过 清除历史日志数据")
             return
-        self.logger.info("开始 清除历史日志数据")
-        LoggerHelper.delete_log(self._clean_log_day)
-        self.logger.info("清除历史日志数据 完成")
+        utils.get_logger().info("开始 清除历史日志数据")
+        utils.clean_log_file(self._clean_log_day)
+        utils.get_logger().info("清除历史日志数据 完成")
 
     def _clean_collect_data(self):
         if self._clean_collect_data_day == 0:
-            self.logger.info("跳过 清除历史采集数据")
+            utils.get_logger().info("跳过 清除历史采集数据")
             return
-        self.logger.info("开始 清除历史采集数据")
+        utils.get_logger().info("开始 清除历史采集数据")
         date = self._get_before_date(self._clean_collect_data_day)
         self._store.delete_collect_data_before_date(date)
-        self.logger.info("清除历史采集数据 完成")
+        utils.get_logger().info("清除历史采集数据 完成")
 
     def _clean_process_data(self):
         if self._clean_process_data_day == 0:
-            self.logger.info("跳过 清除历史处理数据[招标]")
+            utils.get_logger().info("跳过 清除历史处理数据[招标]")
             return
-        self.logger.info("开始 清除历史处理数据[招标]")
+        utils.get_logger().info("开始 清除历史处理数据[招标]")
         date = self._get_before_date(self._clean_process_data_day)
         self._store.delete_process_data_before_date(date)
-        self.logger.info("清除历史处理数据[招标] 完成")
+        utils.get_logger().info("清除历史处理数据[招标] 完成")
 
     def _clean_process_result_data(self):
         if self._clean_process_result_data_day == 0:
-            self.logger.info("跳过 清除历史处理数据[中标]")
+            utils.get_logger().info("跳过 清除历史处理数据[中标]")
             return
-        self.logger.info("开始 清除历史处理数据[中标]")
+        utils.get_logger().info("开始 清除历史处理数据[中标]")
         date = self._get_before_date(self._clean_process_data_day)
         self._store.delete_process_result_data_before_date(date)
-        self.logger.info("清除历史处理数据[中标] 完成")
+        utils.get_logger().info("清除历史处理数据[中标] 完成")
 
     @staticmethod
-    def  _get_before_date(day:int) -> str:
+    def _get_before_date(day: int) -> str:
         from datetime import datetime, timedelta
-        return (datetime.now() - timedelta(days=day)).strftime("%Y-%m-%d")
 
+        return (datetime.now() - timedelta(days=day)).strftime("%Y-%m-%d")

+ 23 - 26
SourceCode/TenderCrawler/app/jobs/data_collector.py

@@ -1,32 +1,23 @@
 import importlib
+
 from selenium import webdriver
 
+import adapters
+import utils
+from adapters.data_collection_adapter_interface import IDataCollectionAdapter
 from stores.data_store_interface import IDataStore
 from stores.default_data_store import DefaultDataStore
-from adapters.data_collection_adapter_interface import IDataCollectionAdapter
-from utils.logger_helper import LoggerHelper
-from utils.config_helper import ConfigHelper
 
 
 class DataCollector:
 
-    logger = LoggerHelper.get_logger()
-    config = ConfigHelper()
     _adapter = None
     _driver = None
     _store = None
 
-    # 使用字典映射域名和适配器类
-    # _adapterModelMap = {"chinabidding": "chinabidding_data_collection_adapter"}
-
-    # _adapterClassMap = {"chinabidding": "ChinabiddingDataCollectionAdapter"}
-
-    def __init__(self,
-                 adapter_type: str,
-                 url: str,
-                 un: str,
-                 up: str,
-                 store: IDataStore = None):
+    def __init__(
+        self, adapter_type: str, url: str, un: str, up: str, store: IDataStore = None
+    ):
         self._adapter = self._gen_adapter(adapter_type, url)
         self._driver = self.adapter.driver
         # if type == "chinabidding":
@@ -53,23 +44,29 @@ class DataCollector:
         self._store = store
 
     def collect(self, keyword: str):
-        self.adapter.collect(keyword, self.store)
+        adapters.collect(self.adapter, keyword, self.store)
 
     def close(self):
-        self.logger.info(f"关闭浏览器驱动,URL: {self.adapter.url}")
-        self.adapter.teardown()
-
+        utils.get_logger().info(f"关闭浏览器驱动,URL: {self.adapter.url}")
+        adapters.teardown(self.adapter)
 
-    def _gen_adapter(self, adapter_type: str, url: str):
-        adapter_model_name = self.config.get(f"adapter.{adapter_type}.model_name")
-        adapter_class_name = self.config.get(f"adapter.{adapter_type}.class_name")
+    @staticmethod
+    def _gen_adapter(adapter_type: str, url: str):
+        adapter_model_name = utils.get_config_value(
+            f"adapter.{adapter_type}.model_name"
+        )
+        adapter_class_name = utils.get_config_value(
+            f"adapter.{adapter_type}.class_name"
+        )
         if adapter_class_name:
             try:
-                self.logger.info(
-                    f"生成适配器 TYPE:{adapter_type},适配器: {adapter_class_name},URL:{url}")
+                utils.get_logger().info(
+                    f"生成适配器 TYPE:{adapter_type},适配器: {adapter_class_name},URL:{url}"
+                )
                 # 使用 importlib 动态导入模块
                 adapter_module = importlib.import_module(
-                    f"adapters.{adapter_model_name}")
+                    f"adapters.{adapter_model_name}"
+                )
                 adapter_class = getattr(adapter_module, adapter_class_name)
                 adapter = adapter_class(url)
             except ImportError as e:

+ 50 - 45
SourceCode/TenderCrawler/app/jobs/data_process.py

@@ -1,15 +1,12 @@
-from models.process_result_data import ProcessResultData
-from utils.logger_helper import LoggerHelper
-from utils.config_helper import ConfigHelper
-from utils.ai_helper import AiHelper
-from stores.data_store_interface import IDataStore
+import utils
 from models.collect_data import CollectData
 from models.process_data import ProcessData
+from models.process_result_data import ProcessResultData
+from stores.data_store_interface import IDataStore
 
 
 class DataProcess:
-    logger = LoggerHelper.get_logger()
-    config = ConfigHelper()
+
     _store = None
 
     DEFAULT_AI_SYSTEM_PROMPT = "请帮我分析以下文字,提取出关键信息,并以json格式字符串返回,如果部分信息为空,则该字段返回为空。"
@@ -24,12 +21,15 @@ class DataProcess:
 
     def __init__(self, store: IDataStore):
         self._store = store
-        self._ai_system_prompt = self.config.get("ai.system_prompt",
-                                                 self.DEFAULT_AI_SYSTEM_PROMPT)
-        self._ai_prompt_template_1 = self.config.get(
-            "ai.prompt_template_1", self.DEFAULT_AI_PROMPT_TEMPLATE_1)
-        self._ai_prompt_template_2 = self.config.get(
-            "ai.prompt_template_2", self.DEFAULT_AI_PROMPT_TEMPLATE_2)
+        self._ai_system_prompt = utils.get_config_value(
+            "ai.system_prompt", self.DEFAULT_AI_SYSTEM_PROMPT
+        )
+        self._ai_prompt_template_1 = utils.get_config_value(
+            "ai.prompt_template_1", self.DEFAULT_AI_PROMPT_TEMPLATE_1
+        )
+        self._ai_prompt_template_2 = utils.get_config_value(
+            "ai.prompt_template_2", self.DEFAULT_AI_PROMPT_TEMPLATE_2
+        )
 
     @property
     def store(self) -> IDataStore:
@@ -43,35 +43,40 @@ class DataProcess:
             self.store.save_process_data(True)
             self.store.save_process_result_data(True)
         except Exception as e:
-            self.logger.error(f"数据处理发生异常: {e}")
+            utils.get_logger().error(f"数据处理发生异常: {e}")
             raise Exception(f"数据处理发生异常: {e}")
 
     def _process_item(self, url: str) -> None:
         try:
-            self.logger.info(f"START ==>URL:{url}")
+            utils.get_logger().info(f"START ==>URL:{url}")
             item = self.store.query_one_collect_by_url(url)
             if not item:
-                self.logger.info(f"END==> NOT FOUND URL:{url}")
+                utils.get_logger().info(f"END==> NOT FOUND URL:{url}")
                 return
             if item.status == 1:
-                self.logger.info(f"ALREADY1 URL:{url}")
+                utils.get_logger().info(f"ALREADY1 URL:{url}")
                 return
-            data = self.store.query_one_process_by_url(
-                url
-            ) if item.data_type == 0 else self.store.query_one_process_result_by_url(
-                url)
+            data = (
+                self.store.query_one_process_by_url(url)
+                if item.data_type == 0
+                else self.store.query_one_process_result_by_url(url)
+            )
             if data:
-                self.logger.info(f"ALREADY2 [{item.data_type}] URL==> {url}")
+                utils.get_logger().info(f"ALREADY2 [{item.data_type}] URL==> {url}")
                 return
-            data = self._ai_process_1(
-                item) if item.data_type == 0 else self._ai_process_2(item)
+            data = (
+                self._ai_process_1(item)
+                if item.data_type == 0
+                else self._ai_process_2(item)
+            )
             if data:
                 old = None
                 if data.no:
-                    old = self.store.query_one_process_result_by_no(
-                        data.no
-                    ) if item.data_type == 0 else self.store.query_one_process_by_no(
-                        data.no)
+                    old = (
+                        self.store.query_one_process_result_by_no(data.no)
+                        if item.data_type == 0
+                        else self.store.query_one_process_by_no(data.no)
+                    )
                 if not old:
                     data.url = url
                     data.keyword = item.keyword
@@ -87,30 +92,30 @@ class DataProcess:
                         else:
                             old.other_urls = url
                         if item.data_type == 0:
-                            self.store.set_process_other_urls(
-                                data.url, old.other_urls)
+                            self.store.set_process_other_urls(data.url, old.other_urls)
                         else:
                             self.store.set_process_result_other_urls(
-                                data.url, old.other_urls)
+                                data.url, old.other_urls
+                            )
                     self.store.set_collect_process(old.url)
-                    self.logger.info(
-                        f"ALREADY 编号: {data.no} URL:{old.other_urls}")
+                    utils.get_logger().info(
+                        f"ALREADY 编号: {data.no} URL:{old.other_urls}"
+                    )
 
-            self.logger.info("END   ==>" + url)
+            utils.get_logger().info("END   ==>" + url)
         except Exception as e:
-            self.logger.error(f"数据处理发生异常: {url} {e}")
+            utils.get_logger().error(f"数据处理发生异常: {url} {e}")
 
     def _ai_process_1(self, item: CollectData) -> ProcessData | None:
         try:
-            data = AiHelper().call_openai(
-                self._ai_system_prompt,
-                f"{item.content} {self._ai_prompt_template_1}")
+            data = utils.call_openai(
+                self._ai_system_prompt, f"{item.content} {self._ai_prompt_template_1}"
+            )
             area_str = data.get("area")
 
             if "省" in area_str:
                 area_str_arr = area_str.split("省")
-                area_str = area_str_arr[1] if len(
-                    area_str_arr) > 1 else area_str_arr[0]
+                area_str = area_str_arr[1] if len(area_str_arr) > 1 else area_str_arr[0]
             if "市" in area_str:
                 area_str_arr = area_str.split("市")
                 area_str = area_str_arr[0]
@@ -129,14 +134,14 @@ class DataProcess:
                 total_tokens=data.get("total_tokens"),
             )
         except Exception as e:
-            self.logger.error(f"AI 提取数据失败1: {item.url} {e}")
+            utils.get_logger().error(f"AI 提取数据失败1: {item.url} {e}")
             return None
 
     def _ai_process_2(self, item: CollectData) -> ProcessResultData | None:
         try:
-            data = AiHelper().call_openai(
-                self._ai_system_prompt,
-                f"{item.content} {self._ai_prompt_template_2}")
+            data = utils.call_openai(
+                self._ai_system_prompt, f"{item.content} {self._ai_prompt_template_2}"
+            )
             return ProcessResultData(
                 no=data.get("no"),
                 title=data.get("title"),
@@ -149,5 +154,5 @@ class DataProcess:
                 total_tokens=data.get("total_tokens"),
             )
         except Exception as e:
-            self.logger.error(f"AI 提取数据失败2: {item.url} {e}")
+            utils.get_logger().error(f"AI 提取数据失败2: {item.url} {e}")
             return None

+ 32 - 24
SourceCode/TenderCrawler/app/jobs/data_send.py

@@ -1,17 +1,13 @@
-from datetime import datetime
 import calendar
+from datetime import datetime
 
-from utils.logger_helper import LoggerHelper
-from utils.config_helper import ConfigHelper
-from utils.email_helper import EmailHelper
-from stores.data_store_interface import IDataStore
+import utils
 from models.process_data import ProcessData
 from models.process_result_data import ProcessResultData
+from stores.data_store_interface import IDataStore
 
 
 class DataSend:
-    logger = LoggerHelper.get_logger()
-    config = ConfigHelper()
     _error_arr = []
     _email_area_arr = []
     _email_area_virtual_arr = []
@@ -28,7 +24,7 @@ class DataSend:
     def send(self) -> None:
         self._error_arr = []
         list = self.store.query_to_send()
-        self.logger.info(f"开始发送邮件,数量为 {len(list)}")
+        utils.get_logger().info(f"开始发送邮件,数量为 {len(list)}")
         for item in list:
             self._send_item(item)
         if len(self._error_arr) > 0:
@@ -45,34 +41,40 @@ class DataSend:
         self._send_reports(start_date, end_date)
 
     def _send_reports(self, start_date, end_date):
-        self.logger.info(f"开始发送中标报告邮件,开始日期:{start_date.strftime("%Y-%m-%d")},结束日期:{end_date.strftime("%Y-%m-%d")}")
+        utils.get_logger().info(
+            f"开始发送中标报告邮件,开始日期:{start_date.strftime("%Y-%m-%d")},结束日期:{end_date.strftime("%Y-%m-%d")}"
+        )
         email = self.store.query_master_email()
         if not email:
-            self.logger.error("没有找到master email")
+            utils.get_logger().error("没有找到master email")
             return
         items = self.store.query_to_report_by_date(start_date, end_date)
-        title_prev = self.config.get("email.report_title_prev", "【中标报告】")
+        title_prev = utils.get_config_value("email.report_title_prev", "【中标报告】")
         title = f"{start_date.month}月中标结果报告"
         body = self._build_report_email_html(title, items)
-        flag = EmailHelper().send_email(email, f"{title_prev} {title}",body, True)
+        flag = utils.send_email(email, f"{title_prev} {title}", body, True)
         if flag:
-            self.logger.info("发送中标报告邮件成功")
+            utils.get_logger().info("发送中标报告邮件成功")
 
     def _send_item(self, item: ProcessData) -> None:
-        self.logger.info(f"开始发送邮件,地区为:{item.area} ,URL为 {item.url}")
+        utils.get_logger().info(f"开始发送邮件,地区为:{item.area} ,URL为 {item.url}")
         email = self._get_email_by_area(item.area)
         if not email:
-            self.logger.error(f"{item.area} 下没有找到email")
+            utils.get_logger().error(f"{item.area} 下没有找到email")
             if item.area not in self._error_arr:
                 self._error_arr.append(item.area)
             return
-        title_prev = self.config.get("email.title_prev", "【招标信息】")
+        title_prev = utils.get_config_value("email.title_prev", "【招标信息】")
         body = self._build_email_html(item)
-        flag = EmailHelper().send_email(email, f"{title_prev} {item.title}", body, True, item.attach_path)
+        flag = utils.send_email(
+            email, f"{title_prev} {item.title}", body, True, item.attach_path
+        )
         if flag:
             self.store.set_send(item.no)
 
-    def _get_email_by_area(self, area: str, count: int = 0, virtual_area: str = None) -> str:
+    def _get_email_by_area(
+        self, area: str, count: int = 0, virtual_area: str = None
+    ) -> str:
         email = None
         area_str = area
         # if "省" in area:
@@ -92,7 +94,9 @@ class DataSend:
         if not email and count < 3:
             area_name = self._get_email_by_area_virtual(area_str)
             if area_name:
-                virtual_area = f"{area_str},{virtual_area}" if virtual_area else area_str
+                virtual_area = (
+                    f"{area_str},{virtual_area}" if virtual_area else area_str
+                )
                 email = self._get_email_by_area(area_name, count + 1, virtual_area)
         return email
 
@@ -282,15 +286,15 @@ class DataSend:
         return body
 
     def _send_email_no_found(self) -> None:
-        email = EmailHelper().config.get("email.error_email")
-        self.logger.info(f"开始发送区域邮箱未匹配邮件: {email}")
+        email = utils.get_config_value("email.error_email")
+        utils.get_logger().info(f"开始发送区域邮箱未匹配邮件: {email}")
         if not email:
             return
         title = "Warning: 相关地区没有匹配到邮箱,请及时添加相关配置"
         content = "以下区域中没有配置邮箱:\n\n    "
         content += "、".join(self._error_arr)
         content += "\n\n请及时添加相关配置。"
-        EmailHelper().send_email(email, title, content, False, None)
+        utils.send_email(email, title, content, False, None)
 
     @staticmethod
     def _get_first_and_last_day_of_current_month():
@@ -300,7 +304,9 @@ class DataSend:
         first_day_of_current_month = datetime(today.year, today.month, 1, 0, 0, 0)
         # 获取这个月的最后一天
         _, last_day = calendar.monthrange(today.year, today.month)
-        last_day_of_current_month = datetime(today.year, today.month, last_day, 23, 59, 59)
+        last_day_of_current_month = datetime(
+            today.year, today.month, last_day, 23, 59, 59
+        )
         return first_day_of_current_month, last_day_of_current_month
 
     @staticmethod
@@ -318,5 +324,7 @@ class DataSend:
         first_day_prev_month = datetime(prev_month_year, prev_month, 1, 0, 0, 0)
         # 获取上个月的最后一天
         _, last_day = calendar.monthrange(prev_month_year, prev_month)
-        last_day_of_prev_month = datetime(prev_month_year, prev_month, last_day, 23, 59, 59)
+        last_day_of_prev_month = datetime(
+            prev_month_year, prev_month, last_day, 23, 59, 59
+        )
         return first_day_prev_month, last_day_of_prev_month

+ 99 - 91
SourceCode/TenderCrawler/app/jobs/job_runner.py

@@ -1,92 +1,98 @@
-import schedule,threading
-from dateutil import parser
+import threading
 from datetime import datetime
 
-from utils.logger_helper import LoggerHelper
-from utils.config_helper import ConfigHelper
-from utils.email_helper import EmailHelper
-from stores.mysql_data_store import MysqlDataStore
-from models.url_setting import UrlSetting
+import schedule
+from dateutil import parser
+
+import utils
+from jobs.data_clean import DataClean
 from jobs.data_collector import DataCollector
 from jobs.data_process import DataProcess
 from jobs.data_send import DataSend
-from jobs.data_clean import DataClean
+from models.url_setting import UrlSetting
+from stores.mysql_data_store import MysqlDataStore
 
 
 class JobRunner:
-    logger = LoggerHelper.get_logger()
-    config = ConfigHelper()
+
     store = MysqlDataStore()  # 复用 store 对象
 
     def run_job(self, is_run_now=True):
         try:
-            self.logger.info("加载任务")
-
+            utils.get_logger().info("加载任务")
 
-            collect_time = self.config.get("job.collect")
-            process_time = self.config.get("job.process")
-            send_email_time = self.config.get("job.send_email")
-            clean_data_time = self.config.get("job.clean_data")
+            collect_time = utils.get_config_value("job.collect")
+            process_time = utils.get_config_value("job.process")
+            send_email_time = utils.get_config_value("job.send_email")
+            clean_data_time = utils.get_config_value("job.clean_data")
 
-            collect_times = self._validate_and_format_time(
-                collect_time, ["06:00"])
+            collect_times = self._validate_and_format_time(collect_time, ["06:00"])
             for time in collect_times:
-                self.logger.info(f"{time} 执行 采集处理数据 任务")
+                utils.get_logger().info(f"{time} 执行 采集处理数据 任务")
                 schedule.every().day.at(time).do(self._collect_process_job)
 
             process_times = self._validate_and_format_time(
-                process_time, ["10:00", "15:00", "19:00"])
+                process_time, ["10:00", "15:00", "19:00"]
+            )
             for time in process_times:
-                self.logger.info(f"{time} 执行 AI处理数据  任务")
+                utils.get_logger().info(f"{time} 执行 AI处理数据  任务")
                 schedule.every().day.at(time).do(self._process_job)
 
             send_email_times = self._validate_and_format_time(
-                send_email_time, ["08:20", "14:00"])
+                send_email_time, ["08:20", "14:00"]
+            )
             for time in send_email_times:
-                self.logger.info(f"{time} 执行  发送邮件   任务")
+                utils.get_logger().info(f"{time} 执行  发送邮件   任务")
                 schedule.every().day.at(time).do(self._send_job)
 
-            if self.config.get_int("job.send_current_month_report_day")>0:
-                report_time = self.config.get("job.send_current_month_report_time")
-                times = self._validate_and_format_time(report_time,["08:20"])
+            if utils.get_config_int("job.send_current_month_report_day") > 0:
+                report_time = utils.get_config_value(
+                    "job.send_current_month_report_time"
+                )
+                times = self._validate_and_format_time(report_time, ["08:20"])
                 for time in times:
-                    self.logger.info(f"每月{str(self._get_current_month_report_day()).rjust(2,"0")}日 {time} 执行  发送当月报告   任务")
+                    utils.get_logger().info(
+                        f"每月{str(self._get_current_month_report_day()).rjust(2,"0")}日 {time} 执行  发送当月报告   任务"
+                    )
                     schedule.every().day.at(time).do(self._send_prev_month_report_job)
 
-            if self.config.get_int("job.send_prev_month_report_day")>0:
-                report_time = self.config.get("job.send_prev_month_report_time")
+            if utils.get_config_int("job.send_prev_month_report_day") > 0:
+                report_time = utils.get_config_value("job.send_prev_month_report_time")
                 times = self._validate_and_format_time(report_time, ["08:20"])
                 for time in times:
-                    self.logger.info(f"每月{str(self._get_prev_month_report_day()).rjust(2,"0")}日 {time} 执行  发送上月报告   任务")
+                    utils.get_logger().info(
+                        f"每月{str(self._get_prev_month_report_day()).rjust(2,"0")}日 {time} 执行  发送上月报告   任务"
+                    )
                     schedule.every().day.at(time).do(self._send_prev_month_report_job)
 
             clean_data_times = self._validate_and_format_time(
-                clean_data_time, ["00:05"])
-            self.logger.info(f"{clean_data_times[0]} 执行 清理数据 任务")
+                clean_data_time, ["00:05"]
+            )
+            utils.get_logger().info(f"{clean_data_times[0]} 执行 清理数据 任务")
             schedule.every().day.at(clean_data_times[0]).do(self._clean_job)
 
             urls = UrlSetting().fetch_all()
             if not urls or len(urls) == 0:
-                self.logger.error("未找到任何 URL 设置")
+                utils.get_logger().error("未找到任何 URL 设置")
                 return
-            self.logger.info(f"共找到 {len(urls)} 个 URL 设置")
+            utils.get_logger().info(f"共找到 {len(urls)} 个 URL 设置")
             for url in urls:
-                self.logger.info(f"{url}")
+                utils.get_logger().info(f"{url}")
 
-            if is_run_now and self.config.get_bool("job.run_now"):
-                self.logger.info("立即执行采集任务")
+            if is_run_now and utils.get_config_bool("job.run_now"):
+                utils.get_logger().info("立即执行采集任务")
                 self._collect_process_job()
                 # self._clean_job()
                 # self._process_job()
                 # self._send_job()
 
         except Exception as e:
-            self.logger.error(f"应用程序停止: {e}")
+            utils.get_logger().error(f"应用程序停止: {e}")
             raise e
 
     def restart_job(self):
         schedule.clear()
-        self.logger.info("定时配置更新,重启任务")
+        utils.get_logger().info("定时配置更新,重启任务")
         self.run_job(False)
 
     def _collect_process_job(self):
@@ -94,84 +100,87 @@ class JobRunner:
 
     def _collect_process(self):
         try:
-            self.logger.info("开始执行 数据采集处理 任务")
+            utils.get_logger().info("开始执行 数据采集处理 任务")
             url_setting = UrlSetting()
             for url_setting in url_setting.fetch_all():
                 data_collector = None
                 try:
-                    self.logger.info(f"开始采集: {url_setting.url}")
-                    data_collector = DataCollector(url_setting.adapter_type,
-                                                   url_setting.url,
-                                                   url_setting.username,
-                                                   url_setting.password,
-                                                   self.store)
+                    utils.get_logger().info(f"开始采集: {url_setting.url}")
+                    data_collector = DataCollector(
+                        url_setting.adapter_type,
+                        url_setting.url,
+                        url_setting.username,
+                        url_setting.password,
+                        self.store,
+                    )
                     keywords = url_setting.keywords
-                    keyword_array = keywords.split(',')
+                    keyword_array = keywords.split(",")
                     for keyword in keyword_array:
                         data_collector.collect(keyword)
-                    self.logger.info(f"采集完成: {url_setting.url}")
+                    utils.get_logger().info(f"采集完成: {url_setting.url}")
                 except Exception as e:
                     self._send_error_email(
                         "数据采集",
-                        f"\n    Type: {url_setting.adapter_type} \n    Url: {url_setting.url}\n    错误: {str(e)}"
+                        f"\n    Type: {url_setting.adapter_type} \n    Url: {url_setting.url}\n    错误: {str(e)}",
                     )
-                    self.logger.error(f"采集发生异常: {e}")
+                    utils.get_logger().error(f"采集发生异常: {e}")
                 finally:
                     if data_collector:
                         data_collector.close()
 
                 try:
-                    self.logger.info(f"开始AI处理: {url_setting.url}")
+                    utils.get_logger().info(f"开始AI处理: {url_setting.url}")
                     data_process = DataProcess(self.store)
                     data_process.process()
                 except Exception as e:
                     self._send_error_email(
                         "AI数据处理",
-                        f"\n    Type: {url_setting.adapter_type} \n    Url: {url_setting.url}\n    错误: {str(e)}"
+                        f"\n    Type: {url_setting.adapter_type} \n    Url: {url_setting.url}\n    错误: {str(e)}",
                     )
-                    self.logger.error(f"AI处理发生异常: {e}")
+                    utils.get_logger().error(f"AI处理发生异常: {e}")
                     break  # 中断当前 URL 设置的处理
-            self.logger.info("数据采集处理 任务执行完毕")
+            utils.get_logger().info("数据采集处理 任务执行完毕")
         except Exception as e:
-            self.logger.error(f"数据采集处理 任务执行失败: {e}")
+            utils.get_logger().error(f"数据采集处理 任务执行失败: {e}")
 
     def _process_job(self):
         threading.Thread(target=self._process).start()
 
     def _process(self):
         try:
-            self.logger.info("开始执行 AI处理数据 任务")
+            utils.get_logger().info("开始执行 AI处理数据 任务")
             data_process = DataProcess(self.store)
             data_process.process()
-            self.logger.info("AI处理数据 任务执行完毕")
+            utils.get_logger().info("AI处理数据 任务执行完毕")
         except Exception as e:
             self._send_error_email("AI数据处理", f"\n    错误: {str(e)}")
-            self.logger.error(f"AI任务 执行失败: {e}")
+            utils.get_logger().error(f"AI任务 执行失败: {e}")
 
     def _send_job(self):
         try:
-            self.logger.info("开始执行 邮件发送 任务")
+            utils.get_logger().info("开始执行 邮件发送 任务")
             DataSend(self.store).send()
-            self.logger.info("邮件发送 任务执行完毕")
+            utils.get_logger().info("邮件发送 任务执行完毕")
         except Exception as e:
             self._send_error_email("邮件发送", f"\n    错误: {str(e)}")
-            self.logger.error(f"邮件发送 任务执行失败: {e}")
+            utils.get_logger().error(f"邮件发送 任务执行失败: {e}")
 
     def _send_current_month_report_job(self):
         try:
             if datetime.today().day == self._get_current_month_report_day():
-                self.logger.info("开始执行 邮件发送当月报告 任务")
+                utils.get_logger().info("开始执行 邮件发送当月报告 任务")
                 DataSend(self.store).send_report_current_month()
-                self.logger.info("邮件发送当月报告 任务执行完毕")
+                utils.get_logger().info("邮件发送当月报告 任务执行完毕")
         except Exception as e:
             self._send_error_email("邮件发送", f"\n    错误: {str(e)}")
-            self.logger.error(f"邮件发送当月报告 任务执行失败: {e}")
+            utils.get_logger().error(f"邮件发送当月报告 任务执行失败: {e}")
 
-    def _get_current_month_report_day(self):
-        day = self.config.get_int("job.send_current_month_report_day",30)
-        if datetime.today().month==2 and day > 28 :
+    @staticmethod
+    def _get_current_month_report_day():
+        day = utils.get_config_int("job.send_current_month_report_day", 30)
+        if datetime.today().month == 2 and day > 28:
             day = 28
-        if datetime.today().month in [4,6,9,11] and day > 30:
+        if datetime.today().month in [4, 6, 9, 11] and day > 30:
             day = 30
         if day > 31:
             day = 31
@@ -180,15 +189,16 @@ class JobRunner:
     def _send_prev_month_report_job(self):
         try:
             if datetime.today().day == self._get_prev_month_report_day():
-                self.logger.info("开始执行 邮件发送上月报告 任务")
+                utils.get_logger().info("开始执行 邮件发送上月报告 任务")
                 DataSend(self.store).send_report_prev_month()
-                self.logger.info("邮件发送上月报告 任务执行完毕")
+                utils.get_logger().info("邮件发送上月报告 任务执行完毕")
         except Exception as e:
             self._send_error_email("邮件发送", f"\n    错误: {str(e)}")
-            self.logger.error(f"邮件发送上月报告 任务执行失败: {e}")
+            utils.get_logger().error(f"邮件发送上月报告 任务执行失败: {e}")
 
-    def _get_prev_month_report_day(self):
-        day = self.config.get_int("job.send_prev_month_report_day",1)
+    @staticmethod
+    def _get_prev_month_report_day():
+        day = utils.get_config_int("job.send_prev_month_report_day", 1)
         if datetime.today().month == 2 and day > 28:
             day = 28
         if datetime.today().month in [4, 6, 9, 11] and day > 30:
@@ -199,22 +209,21 @@ class JobRunner:
 
     def _clean_job(self):
         try:
-            self.logger.info("开始执行 清理数据 任务")
+            utils.get_logger().info("开始执行 清理数据 任务")
             DataClean().clean()
-            self.logger.info("清理数据 任务执行完毕")
+            utils.get_logger().info("清理数据 任务执行完毕")
         except Exception as e:
             self._send_error_email("清理数据", f"\n    错误: {str(e)}")
-            self.logger.error(f"清理数据 任务执行失败: {e}")
+            utils.get_logger().error(f"清理数据 任务执行失败: {e}")
 
-    def _validate_and_format_time(self, time_str, default_time: list):
+    @staticmethod
+    def _validate_and_format_time(time_str, default_time: list):
         """验证并格式化时间字符串"""
         if not time_str:
             return default_time
-        time_str = time_str.strip().replace(',', ',')
+        time_str = time_str.strip().replace(",", ",")
         # 分割字符串为列表
-        items = [
-            item.strip().strip("'").strip('"') for item in time_str.split(',')
-        ]
+        items = [item.strip().strip("'").strip('"') for item in time_str.split(",")]
 
         # 初始化结果列表
         formatted_times = []
@@ -223,24 +232,23 @@ class JobRunner:
             if not item:
                 continue  # 跳过空字符串
             try:
-                item = item.replace(':', ':')
+                item = item.replace(":", ":")
                 # 使用 dateutil.parser 解析时间字符串
-                parsed_time = parser.parse(item).time().strftime('%H:%M:%S')
+                parsed_time = parser.parse(item).time().strftime("%H:%M:%S")
                 formatted_times.append(parsed_time)
             except Exception as e:
-                self.logger.error(f"配置时间解析错误: {item},: {e} ")
+                utils.get_logger().error(f"配置时间解析错误: {item},: {e} ")
         if len(formatted_times) == 0:
-            self.logger.error(f"解析时间失败,使用默认时间 {default_time}")
+            utils.get_logger().error(f"解析时间失败,使用默认时间 {default_time}")
             return default_time
         return formatted_times
 
-    def _send_error_email(self, title: str, error: str) -> None:
-        email_helper = EmailHelper()
-        email = self.config.get("email.error_email")
-        self.logger.info(f"发送错误邮件: {email}")
+    @staticmethod
+    def _send_error_email(title: str, error: str) -> None:
+        email = utils.get_config_value("email.error_email")
+        utils.get_logger().info(f"发送错误邮件: {email}")
         if not email:
             return
         title = f"{title}异常"
         content = f"{title},请及时处理。\n\n异常信息:{error}"
-        email_helper.send_email(email, title, content, False, None)
-
+        utils.send_email(email, title, content, False, None)

+ 13 - 13
SourceCode/TenderCrawler/app/main.py

@@ -1,31 +1,31 @@
-import time
 import datetime
+import time
+
 import schedule
 
-from utils.config_helper import ConfigHelper
-from utils.logger_helper import LoggerHelper
+import utils
 from jobs.job_runner import JobRunner
 
-logger = LoggerHelper.get_logger()
-config =  ConfigHelper()
 DEFAULT_USER_SLEEP_INTERVAL = 10  # 配置默认时间间隔10秒
-logger.info("应用程序启动...")
+
+utils.get_logger().info("应用程序启动...")
 
 job = JobRunner()
 job.run_job()
 
-interval = config.get_int("job.sleep_interval",DEFAULT_USER_SLEEP_INTERVAL)
+interval = utils.get_config_int("job.sleep_interval", DEFAULT_USER_SLEEP_INTERVAL)
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     while True:
         schedule.run_pending()
         now = datetime.datetime.now()
         time.sleep(interval)
         # 重新加载配置及任务
         if now.minute == 0 and now.second <= interval:
-            job_id = config.get("job.event_id")
-            config.load_config()
-            interval = config.get_int("job.sleep_interval", DEFAULT_USER_SLEEP_INTERVAL)
-            if job_id != config.get("job.event_id"):
+            job_id = utils.get_config_int("job.event_id")
+            utils.reload_config()
+            interval = utils.get_config_int(
+                "job.sleep_interval", DEFAULT_USER_SLEEP_INTERVAL
+            )
+            if job_id != utils.get_config_int("job.event_id"):
                 job.restart_job()
-

+ 20 - 14
SourceCode/TenderCrawler/app/models/area_email.py

@@ -3,13 +3,15 @@ from utils.mysql_helper import MySQLHelper
 
 class AreaEmail:
 
-    def __init__(self,
-                 name=None,
-                 area=None,
-                 email=None,
-                 is_virtual=None,
-                 is_active=None,
-                 remark=None):
+    def __init__(
+        self,
+        name=None,
+        area=None,
+        email=None,
+        is_virtual=None,
+        is_active=None,
+        remark=None,
+    ):
         self.name = name
         self.area = area
         if email is None:
@@ -23,12 +25,13 @@ class AreaEmail:
     def __repr__(self):
         return (
             f"<AreaEmail(name={self.name},area={self.area}, email={self.email}, "
-            f"is_active={self.is_active}, remark={self.remark})>")
+            f"is_active={self.is_active}, remark={self.remark})>"
+        )
 
     def to_dict(self):
         return {
-            'area': self.area,
-            'email': self.email,
+            "area": self.area,
+            "email": self.email,
         }
 
     # # 插入 AreaEmail 数据
@@ -45,7 +48,9 @@ class AreaEmail:
 
     _query = "SELECT name,area,email FROM t_area_email WHERE is_virtual = 0 and is_active = 1"
     _query_virtual = "SELECT name,area,email FROM t_area_email WHERE is_virtual = 1 and is_active = 1"
-    _query_master = "SELECT email FROM t_area_email WHERE name='master' AND is_active = 1"
+    _query_master = (
+        "SELECT email FROM t_area_email WHERE name='master' AND is_active = 1"
+    )
     _query_by_area = "SELECT email FROM t_area_email WHERE CONCAT(area,',') like %s AND is_active = 1"
 
     # 查询 AreaEmail 数据
@@ -63,7 +68,7 @@ class AreaEmail:
 
     def fetch_one_by_area(self, area: str):
         with MySQLHelper() as db_helper:
-            params = ('%' + area + ',%', )
+            params = ("%" + area + ",%",)
             result = db_helper.fetch_one(self._query_by_area, params)
             if result is None:
                 return None
@@ -76,8 +81,9 @@ class AreaEmail:
                 return None
             return result["email"]
 
+    _update_area_query = "UPDATE t_area_email SET area = %s WHERE name = %s"
+
     def update_area_email_area_by_name(self, name: str, area: str):
         with MySQLHelper() as db_helper:
-            query = "UPDATE t_area_email SET area = %s WHERE name = %s"
             params = (area, name)
-            db_helper.execute_non_query(query, params)
+            db_helper.execute_non_query(self._update_area_query, params)

+ 90 - 90
SourceCode/TenderCrawler/app/models/collect_data.py

@@ -1,11 +1,11 @@
 from datetime import datetime
+
+import utils
 from utils.mysql_helper import MySQLHelper
-from utils.logger_helper import LoggerHelper
 
 
 class CollectData:
 
-    logger = LoggerHelper.get_logger()
     UNPROCESSED = 0
     PROCESSED = 1
     INVALID = 2
@@ -13,15 +13,17 @@ class CollectData:
     DATA_TYPE_0 = 0
     DATA_TYPE_RESULT = 1
 
-    def __init__(self,
-                 url=None,
-                 keyword=None,
-                 content=None,
-                 data_type=None,
-                 attach_path=None,
-                 status=UNPROCESSED,
-                 create_time=None,
-                 process_time=None):
+    def __init__(
+        self,
+        url=None,
+        keyword=None,
+        content=None,
+        data_type=None,
+        attach_path=None,
+        status=UNPROCESSED,
+        create_time=None,
+        process_time=None,
+    ):
         self.url = url
         self.keyword = keyword
         self.content = content
@@ -47,8 +49,7 @@ class CollectData:
          VALUES (%s, %s, %s, %s, %s, %s, %s);
          """
     _delete_query = """
-         DELETE FROM t_collect_data
-         WHERE url = %s;
+         DELETE FROM t_collect_data  WHERE url = %s;
          """
 
     def insert(self, collect_data):
@@ -56,10 +57,15 @@ class CollectData:
             raise TypeError("collect_data 不是 CollectData 的实例")
         with MySQLHelper() as db_helper:
 
-            params = (collect_data.url, collect_data.keyword,
-                      collect_data.content, collect_data.data_type,
-                      collect_data.attach_path, collect_data.status,
-                      datetime.now())
+            params = (
+                collect_data.url,
+                collect_data.keyword,
+                collect_data.content,
+                collect_data.data_type,
+                collect_data.attach_path,
+                collect_data.status,
+                datetime.now(),
+            )
             if collect_data.status == self.INVALID:
                 db_helper.execute_non_query(self._insert_query_history, params)
             else:
@@ -67,8 +73,9 @@ class CollectData:
 
     def insert_batch(self, collect_data_list):
         if not all(
-                isinstance(collect_data, self.__class__)
-                for collect_data in collect_data_list):
+            isinstance(collect_data, self.__class__)
+            for collect_data in collect_data_list
+        ):
             raise TypeError("collect_data_list 中的所有元素必须是 CollectData 的实例")
 
         params = [
@@ -79,8 +86,10 @@ class CollectData:
                 collect_data.data_type,
                 collect_data.attach_path,
                 collect_data.status,
-                datetime.now()  # 每次调用 datetime.now() 获取当前时间
-            ) for collect_data in collect_data_list if collect_data.status != 2
+                datetime.now(),  # 每次调用 datetime.now() 获取当前时间
+            )
+            for collect_data in collect_data_list
+            if collect_data.status != 2
         ]
         params2 = [
             (
@@ -90,17 +99,19 @@ class CollectData:
                 collect_data.data_type,
                 collect_data.attach_path,
                 collect_data.status,
-                datetime.now()  # 每次调用 datetime.now() 获取当前时间
-            ) for collect_data in collect_data_list if collect_data.status == 2
+                datetime.now(),  # 每次调用 datetime.now() 获取当前时间
+            )
+            for collect_data in collect_data_list
+            if collect_data.status == 2
         ]
 
         with MySQLHelper() as db_helper:
             db_helper.execute_non_query(self._insert_query, params)
             affected_rows1 = db_helper.connection.affected_rows()
-            self.logger.info(f"成功插入 {affected_rows1} 条有效数据")
+            utils.get_logger().info(f"成功插入 {affected_rows1} 条有效数据")
             db_helper.execute_non_query(self._insert_query_history, params2)
             affected_rows2 = db_helper.connection.affected_rows()
-            self.logger.info(f"成功插入 {affected_rows2} 条无效历史数据")
+            utils.get_logger().info(f"成功插入 {affected_rows2} 条无效历史数据")
             return affected_rows1 + affected_rows2
 
     # def insert_url(self, url: str, keyword: str, content: str):
@@ -119,51 +130,48 @@ class CollectData:
     #         data = [CollectData(**result) for result in results]
     #         return data
 
+    _query = "SELECT url FROM t_collect_data"
+
     def fetch_all_urls(self) -> list[str]:
         with MySQLHelper() as db_helper:
-            query = "SELECT url FROM t_collect_data"
-            results = db_helper.execute_query(query)
+            results = db_helper.execute_query(self._query)
             # 使用列表推导式一次性提取所有 'url' 值
-            data = [result['url'] for result in results]
+            data = [result["url"] for result in results]
             return data
 
+    _process_url_query = "SELECT url FROM t_collect_data WHERE status = 0"
+
     def fetch_urls_to_process(self) -> list[str]:
         with MySQLHelper() as db_helper:
-            query = """
-            SELECT url
-            FROM t_collect_data
-            WHERE status = 0
-            """
-
-            results = db_helper.execute_query(query)
-            data = [result['url'] for result in results]
+            results = db_helper.execute_query(self._process_url_query)
+            data = [result["url"] for result in results]
             return data
 
+    _one_url_query = "SELECT url FROM `t_collect_data_history` WHERE url= %s UNION SELECT url FROM `t_collect_data`  WHERE url= %s LIMIT 1"
+
     def fetch_one_url(self, url: str):
         with MySQLHelper() as db_helper:
-            query = """
-             SELECT url FROM `t_collect_data_history` WHERE url= %s UNION SELECT url FROM `t_collect_data`  WHERE url= %s LIMIT 1
-            """
-            result = db_helper.fetch_one(query, (url, url))
+            result = db_helper.fetch_one(self._one_url_query, (url, url))
             if not result:
                 return None
             data = result["url"]
             return data
 
+    _one_collect_by_url_query = "ELECT url,keyword,content,data_type,attach_path,status FROM t_collect_data WHERE url = %s  LIMIT 1"
+
     def fetch_one_collect_by_url(self, url: str):
         with MySQLHelper() as db_helper:
-            query = """
-                SELECT url,keyword,content,data_type,attach_path,status FROM t_collect_data WHERE url = %s  LIMIT 1
-            """
-            result = db_helper.fetch_one(query, (url, ))
+            result = db_helper.fetch_one(self._one_collect_by_url_query, (url,))
             if not result:
                 return None
-            data = CollectData(url=result["url"],
-                               keyword=result["keyword"],
-                               content=result["content"],
-                               data_type=result["data_type"],
-                               attach_path=result["attach_path"],
-                               status=result["status"])
+            data = CollectData(
+                url=result["url"],
+                keyword=result["keyword"],
+                content=result["content"],
+                data_type=result["data_type"],
+                attach_path=result["attach_path"],
+                status=result["status"],
+            )
             return data
 
     def set_process(self, url):
@@ -183,24 +191,32 @@ class CollectData:
     def move_to_history_and_delete(self, urls: list):
         with MySQLHelper() as db_helper:
             # 查询 t_collect_data 中的数据
-            placeholders = ', '.join(['%s'] * len(urls))
+            placeholders = ", ".join(["%s"] * len(urls))
             query = f"""
-             SELECT url, keyword, content, data_type, attach_path, status, create_time, process_time
-             FROM t_collect_data
-             WHERE url IN  ({placeholders})
-             """
+                       SELECT url, keyword, content, data_type, attach_path, status, create_time, process_time
+                       FROM t_collect_data
+                       WHERE url IN  ({placeholders})
+                       """
             results = db_helper.execute_query(query, urls)
             if not results:
-                self.logger.warning(
-                    f"URLs {urls} 未在 t_collect_data 中找到,无法移动到历史表并删除。")
+                utils.get_logger().warning(
+                    f"URLs {urls} 未在 t_collect_data 中找到,无法移动到历史表并删除。"
+                )
                 return False
 
             # 将数据插入到 t_collect_data_history
             insert_query = self._insert_query_history
             insert_params = [
-                (result["url"], result["keyword"], result["content"],
-                 result["data_type"], result["attach_path"], result["status"],
-                 result["create_time"]) for result in results
+                (
+                    result["url"],
+                    result["keyword"],
+                    result["content"],
+                    result["data_type"],
+                    result["attach_path"],
+                    result["status"],
+                    result["create_time"],
+                )
+                for result in results
             ]
             db_helper.execute_non_query(insert_query, insert_params)
 
@@ -208,47 +224,31 @@ class CollectData:
             delete_query = f"DELETE FROM t_collect_data WHERE url IN ({placeholders})"
             db_helper.execute_non_query(delete_query, urls)
 
-            self.logger.info(
+            utils.get_logger().info(
                 f"URLs {urls} 已从 t_collect_data 移动到 t_collect_data_history 并删除。"
             )
             return True
 
-    def fetch_by_status(self, status=0):
-        with MySQLHelper() as db_helper:
-            query = """
-            SELECT url, keyword, content, status, create_time, process_time
-            FROM t_collect_data
-            WHERE status = %s
-            """
-            results = db_helper.execute_query(query, (status, ))
-            data = [CollectData(**result) for result in results]
-            return data
+    _update_status_query = " UPDATE t_collect_data SET status = %s WHERE url = %s"
 
     def set_status(self, collect_data):
-        if not isinstance(collect_data, self):
+        if not isinstance(collect_data, CollectData):
             raise TypeError("collect_data 不是 CollectData 的实例")
         with MySQLHelper() as db_helper:
-            query = """
-            UPDATE t_collect_data
-            SET status = %s
-            WHERE url = %s
-            """
             params = (collect_data.status, collect_data.url)
-            db_helper.execute_non_query(query, params)
+            db_helper.execute_non_query(self._update_status_query, params)
+
+    _delete_before_date_history_query = (
+        "DELETE FROM t_collect_data_history WHERE create_time < %s"
+    )
+    _delete_before_date_query = "DELETE FROM t_collect_data WHERE create_time < %s "
 
     def delete_before_date(self, date: str):
         with MySQLHelper() as db_helper:
-            query1 = """
-                     DELETE FROM t_collect_data_history WHERE create_time < %s ;
-                     """
-            query2 = """
-                     DELETE FROM t_collect_data WHERE create_time < %s ;
-                     """
-            params = (date, )
-            db_helper.execute_non_query(query1, params)
+            params = (date,)
+            db_helper.execute_non_query(self._delete_before_date_history_query, params)
             affected_rows = db_helper.connection.affected_rows()
-            db_helper.execute_non_query(query2, params)
+            db_helper.execute_non_query(self._delete_before_date_query, params)
             affected_rows += db_helper.connection.affected_rows()
-            self.logger.info(
-                f"删除 {date} 之前共 {affected_rows} 条 采集记录。")
-            return affected_rows
+            utils.get_logger().info(f"删除 {date} 之前共 {affected_rows} 条 采集记录。")
+            return affected_rows

+ 106 - 107
SourceCode/TenderCrawler/app/models/process_data.py

@@ -1,39 +1,39 @@
+import utils
 from datetime import datetime
+
 from utils.mysql_helper import MySQLHelper
-from utils.config_helper import ConfigHelper
-from utils.logger_helper import LoggerHelper
 
 
 class ProcessData:
 
-    logger = LoggerHelper.get_logger()
-
-    def __init__(self,
-                 no=None,
-                 title=None,
-                 url=None,
-                 keyword=None,
-                 date=None,
-                 area=None,
-                 address=None,
-                 summary=None,
-                 release_date=None,
-                 devices=None,
-                 attach_path=None,
-                 status=None,
-                 create_time=None,
-                 send_time=None,
-                 other_urls=None,
-                 prompt_tokens=None,
-                 completion_tokens=None,
-                 total_tokens=None,
-                 remark=None):
+    def __init__(
+        self,
+        no=None,
+        title=None,
+        url=None,
+        keyword=None,
+        date=None,
+        area=None,
+        address=None,
+        summary=None,
+        release_date=None,
+        devices=None,
+        attach_path=None,
+        status=None,
+        create_time=None,
+        send_time=None,
+        other_urls=None,
+        prompt_tokens=None,
+        completion_tokens=None,
+        total_tokens=None,
+        remark=None,
+    ):
         self.no = no
         self.title = title
         self.url = url
         self.date = date
         if not area:
-            area = ConfigHelper().get("default_area", "全国")
+            area = utils.get_config_value("default_area", "全国")
         self.area = area.replace(" ", "")
         self.keyword = keyword
         self.address = address
@@ -55,12 +55,14 @@ class ProcessData:
             f"ProcessData(no={self.no}, title={self.title}, date={self.date}, "
             f"area={self.area}, address={self.address}, summary={self.summary}, "
             f"status={self.status}, create_time={self.create_time}, "
-            f"send_time={self.send_time}, remark={self.remark})")
+            f"send_time={self.send_time}, remark={self.remark})"
+        )
 
     _insert_query = """
               INSERT IGNORE INTO t_data (no, title, url, keyword, date, area, address, summary, release_date, devices, attach_path, status, create_time, prompt_tokens, completion_tokens, total_tokens)
               VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
           """
+
     # _update_query = """
     #             UPDATE t_collect_data SET status = 1 WHERE url = %s;
     #         """
@@ -68,37 +70,7 @@ class ProcessData:
         if not isinstance(process_data, self.__class__):
             raise TypeError("process_data 不是 ProcessData 的实例")
 
-        insert_params = (process_data.no,
-                         process_data.title,
-                         process_data.url,
-                         process_data.keyword,
-                         process_data.date,
-                         process_data.area,
-                         process_data.address,
-                         process_data.summary,
-                         process_data.release_date,
-                         process_data.devices,
-                         process_data.attach_path,
-                         0,
-                         datetime.now(),
-                         process_data.prompt_tokens,
-                         process_data.completion_tokens,
-                         process_data.total_tokens)
-
-        # update_params = (process_data.url, )
-
-        with MySQLHelper() as db_helper:
-            db_helper.execute_non_query(self._insert_query, insert_params)
-            # db_helper.execute_non_query(self._update_query, update_params)
-
-    def insert_batch(self, process_data_list):
-        if not all(
-                isinstance(process_data, self.__class__)
-                for process_data in process_data_list):
-            raise TypeError("process_data_list 中的所有元素必须是 ProcessData 的实例")
-
-
-        insert_params = [(
+        insert_params = (
             process_data.no,
             process_data.title,
             process_data.url,
@@ -114,8 +86,43 @@ class ProcessData:
             datetime.now(),
             process_data.prompt_tokens,
             process_data.completion_tokens,
-            process_data.total_tokens
-        ) for process_data in process_data_list]
+            process_data.total_tokens,
+        )
+
+        # update_params = (process_data.url, )
+
+        with MySQLHelper() as db_helper:
+            db_helper.execute_non_query(self._insert_query, insert_params)
+            # db_helper.execute_non_query(self._update_query, update_params)
+
+    def insert_batch(self, process_data_list):
+        if not all(
+            isinstance(process_data, self.__class__)
+            for process_data in process_data_list
+        ):
+            raise TypeError("process_data_list 中的所有元素必须是 ProcessData 的实例")
+
+        insert_params = [
+            (
+                process_data.no,
+                process_data.title,
+                process_data.url,
+                process_data.keyword,
+                process_data.date,
+                process_data.area,
+                process_data.address,
+                process_data.summary,
+                process_data.release_date,
+                process_data.devices,
+                process_data.attach_path,
+                0,
+                datetime.now(),
+                process_data.prompt_tokens,
+                process_data.completion_tokens,
+                process_data.total_tokens,
+            )
+            for process_data in process_data_list
+        ]
 
         # update_params = [(process_data.url, )
         #                  for process_data in process_data_list]
@@ -123,78 +130,70 @@ class ProcessData:
         with MySQLHelper() as db_helper:
             db_helper.execute_non_query(self._insert_query, insert_params)
             affected_rows = db_helper.connection.affected_rows()
-            self.logger.info(f"成功插入 {affected_rows} 条数据")
+            utils.get_logger().info(f"成功插入 {affected_rows} 条数据")
             # for param in update_params:
             #     db_helper.execute_non_query(self._update_query, param)
             return affected_rows
 
+    _one_url_query = (
+        "SELECT url,no,other_urls,attach_path FROM t_data WHERE url = %s  LIMIT 1"
+    )
 
-    _one_url_query = """
-                    SELECT url,no,other_urls,attach_path FROM t_data WHERE url = %s  LIMIT 1
-                """
     def fetch_one_process_by_url(self, url: str):
         with MySQLHelper() as db_helper:
-            result = db_helper.fetch_one(self._one_url_query, (url, ))
+            result = db_helper.fetch_one(self._one_url_query, (url,))
             if not result:
                 return None
-            data = ProcessData(url=result["url"],
-                               no=result["no"],
-                               other_urls=result["other_urls"],
-                               attach_path=result["attach_path"])
+            data = ProcessData(
+                url=result["url"],
+                no=result["no"],
+                other_urls=result["other_urls"],
+                attach_path=result["attach_path"],
+            )
             return data
 
-    _one_no_query = """
-                      SELECT url,no,other_urls,attach_path FROM t_data WHERE no = %s  LIMIT 1
-                  """
+    _one_no_query = (
+        "SELECT url,no,other_urls,attach_path FROM t_data WHERE no = %s  LIMIT 1"
+    )
+
     def fetch_one_process_by_no(self, no: str):
         with MySQLHelper() as db_helper:
 
-            result = db_helper.fetch_one(self._one_no_query, (no, ))
+            result = db_helper.fetch_one(self._one_no_query, (no,))
             if not result:
                 return None
-            data = ProcessData(url=result["url"],
-                               no=result["no"],
-                               other_urls=result["other_urls"],
-                               attach_path=result["attach_path"])
+            data = ProcessData(
+                url=result["url"],
+                no=result["no"],
+                other_urls=result["other_urls"],
+                attach_path=result["attach_path"],
+            )
             return data
 
-    def fetch_no_send(self):
+    _not_send_query = "SELECT no, title, url, keyword, date, area, address, summary, attach_path, release_date FROM t_data WHERE status = 0"
+
+    def fetch_not_send(self):
         with MySQLHelper() as db_helper:
-            query = "SELECT no, title, url, keyword, date, area, address, summary, attach_path, release_date FROM t_data WHERE status = 0"
-            results = db_helper.execute_query(query)
+            results = db_helper.execute_query(self._not_send_query)
             data = [ProcessData(**result) for result in results]
             return data
 
+    _set_send_query = "UPDATE t_data SET status = 1, send_time = %s WHERE no = %s"
+
     def set_send(self, no):
         with MySQLHelper() as db_helper:
-            query = """
-            UPDATE t_data
-            SET status = 1, send_time = %s
-            WHERE no = %s
-            """
+
             params = (datetime.now(), no)
-            db_helper.execute_non_query(query, params)
+            db_helper.execute_non_query(self._set_send_query, params)
+
+    _update_other_urls_query = "UPDATE t_data SET other_urls = %s WHERE url = %s"
 
     def set_other_urls(self, url, other_urls):
         with MySQLHelper() as db_helper:
-            query = """
-            UPDATE t_data
-            SET other_urls = %s
-            WHERE url = %s
-            """
-            update_query = """
-            UPDATE t_collect_data SET status = 1 WHERE url = %s;
-            """
             params = (other_urls, url)
-            db_helper.execute_non_query(query, params)
-            db_helper.execute_non_query(update_query, (url, ))
+            db_helper.execute_non_query(self._update_other_urls_query, params)
 
-    def check_is_process_by_url(self, url):
-        with MySQLHelper() as db_helper:
-            query = "SELECT * FROM t_data WHERE url = %s"
-            params = (url, )
-            results = db_helper.execute_query(query, params)
-            return True if results else False
+    _delete_before_date_query = "DELETE FROM t_data WHERE date < %s"
 
     def delete_before_date(self, date: str):
         """
@@ -203,10 +202,10 @@ class ProcessData:
         :return: 删除的行数
         """
         with MySQLHelper() as db_helper:
-            query = "DELETE FROM t_data WHERE date < %s"
-            params = (date, )
-            db_helper.execute_non_query(query, params)
+            params = (date,)
+            db_helper.execute_non_query(self._delete_before_date_query, params)
             affected_rows = db_helper.connection.affected_rows()
-            self.logger.info(
-                f"删除 {date} 之前共 {affected_rows} 条 招标处理记录。")
-            return affected_rows
+            utils.get_logger().info(
+                f"删除 {date} 之前共 {affected_rows} 条 招标处理记录。"
+            )
+            return affected_rows

+ 99 - 97
SourceCode/TenderCrawler/app/models/process_result_data.py

@@ -1,30 +1,31 @@
+import utils
 from datetime import datetime
+
 from utils.mysql_helper import MySQLHelper
-from utils.logger_helper import LoggerHelper
 
 
 class ProcessResultData:
 
-    logger = LoggerHelper.get_logger()
-
-    def __init__(self,
-                 no=None,
-                 title=None,
-                 url=None,
-                 keyword=None,
-                 date=None,
-                 price=None,
-                 bidder=None,
-                 summary=None,
-                 attach_path=None,
-                 status=None,
-                 create_time=None,
-                 send_time=None,
-                 other_urls=None,
-                 prompt_tokens=None,
-                 completion_tokens=None,
-                 total_tokens=None,
-                 remark=None):
+    def __init__(
+        self,
+        no=None,
+        title=None,
+        url=None,
+        keyword=None,
+        date=None,
+        price=None,
+        bidder=None,
+        summary=None,
+        attach_path=None,
+        status=None,
+        create_time=None,
+        send_time=None,
+        other_urls=None,
+        prompt_tokens=None,
+        completion_tokens=None,
+        total_tokens=None,
+        remark=None,
+    ):
         self.no = no
         self.title = title
         self.url = url
@@ -48,7 +49,8 @@ class ProcessResultData:
             f"ProcessResultData(no={self.no}, title={self.title}, date={self.date}, "
             f"keyword={self.keyword}, price={self.price}, bidder={self.bidder}, summary={self.summary}, attach_path={self.attach_path}, "
             f"status={self.status}, create_time={self.create_time}, "
-            f"send_time={self.send_time}, remark={self.remark})")
+            f"send_time={self.send_time}, remark={self.remark})"
+        )
 
     _insert_query = """
               INSERT IGNORE INTO t_data_result (no, title, url, keyword, date, price,  bidder, summary, attach_path, status, create_time, prompt_tokens, completion_tokens, total_tokens)
@@ -62,34 +64,7 @@ class ProcessResultData:
         if not isinstance(process_result_data, self.__class__):
             raise TypeError("process_result_data 不是 ProcessResultData 的实例")
 
-        insert_params = (process_result_data.no,
-                         process_result_data.title,
-                         process_result_data.url,
-                         process_result_data.keyword,
-                         process_result_data.date,
-                         process_result_data.price,
-                         process_result_data.bidder,
-                         process_result_data.summary,
-                         process_result_data.attach_path,
-                         0,
-                         datetime.now(),
-                         process_result_data.prompt_tokens,
-                         process_result_data.completion_tokens,
-                         process_result_data.total_tokens)
-
-        # update_params = (process_result_data.url, )
-
-        with MySQLHelper() as db_helper:
-            db_helper.execute_non_query(self._insert_query, insert_params)
-            # db_helper.execute_non_query(self._update_query, update_params)
-
-    def insert_batch(self, process_result_data_list):
-        if not all(
-                isinstance(process_result_data, self.__class__)
-                for process_result_data in process_result_data_list):
-            raise TypeError("process_result_data_list 中的所有元素必须是 ProcessResultData 的实例")
-
-        insert_params = [(
+        insert_params = (
             process_result_data.no,
             process_result_data.title,
             process_result_data.url,
@@ -103,8 +78,43 @@ class ProcessResultData:
             datetime.now(),
             process_result_data.prompt_tokens,
             process_result_data.completion_tokens,
-            process_result_data.total_tokens
-        ) for process_result_data in process_result_data_list]
+            process_result_data.total_tokens,
+        )
+
+        # update_params = (process_result_data.url, )
+
+        with MySQLHelper() as db_helper:
+            db_helper.execute_non_query(self._insert_query, insert_params)
+            # db_helper.execute_non_query(self._update_query, update_params)
+
+    def insert_batch(self, process_result_data_list):
+        if not all(
+            isinstance(process_result_data, self.__class__)
+            for process_result_data in process_result_data_list
+        ):
+            raise TypeError(
+                "process_result_data_list 中的所有元素必须是 ProcessResultData 的实例"
+            )
+
+        insert_params = [
+            (
+                process_result_data.no,
+                process_result_data.title,
+                process_result_data.url,
+                process_result_data.keyword,
+                process_result_data.date,
+                process_result_data.price,
+                process_result_data.bidder,
+                process_result_data.summary,
+                process_result_data.attach_path,
+                0,
+                datetime.now(),
+                process_result_data.prompt_tokens,
+                process_result_data.completion_tokens,
+                process_result_data.total_tokens,
+            )
+            for process_result_data in process_result_data_list
+        ]
 
         # update_params = [(process_result_data.url, )
         #                  for process_result_data in process_result_data_list]
@@ -112,7 +122,7 @@ class ProcessResultData:
         with MySQLHelper() as db_helper:
             db_helper.execute_non_query(self._insert_query, insert_params)
             affected_rows = db_helper.connection.affected_rows()
-            self.logger.info(f"成功插入 {affected_rows} 条数据")
+            utils.get_logger().info(f"成功插入 {affected_rows} 条数据")
             # for param in update_params:
             #     db_helper.execute_non_query(self._update_query, param)
             return affected_rows
@@ -126,10 +136,12 @@ class ProcessResultData:
             result = db_helper.fetch_one(self._one_url_query, (url,))
             if not result:
                 return None
-            data = ProcessResultData(url=result["url"],
-                               no=result["no"],
-                               other_urls=result["other_urls"],
-                               attach_path=result["attach_path"])
+            data = ProcessResultData(
+                url=result["url"],
+                no=result["no"],
+                other_urls=result["other_urls"],
+                attach_path=result["attach_path"],
+            )
             return data
 
     _one_no_query = """
@@ -138,56 +150,44 @@ class ProcessResultData:
 
     def fetch_one_process_by_no(self, no: str):
         with MySQLHelper() as db_helper:
-            result = db_helper.fetch_one(self._one_no_query, (no, ))
+            result = db_helper.fetch_one(self._one_no_query, (no,))
             if not result:
                 return None
-            data = ProcessResultData(url=result["url"],
-                                     no=result["no"],
-                                     other_urls=result["other_urls"],
-                               attach_path=result["attach_path"])
+            data = ProcessResultData(
+                url=result["url"],
+                no=result["no"],
+                other_urls=result["other_urls"],
+                attach_path=result["attach_path"],
+            )
             return data
 
-    def fetch_no_send(self):
+    _not_send_query = "SELECT no, title, url, keyword, date, price, bidder, summary, attach_path, status, create_time, send_time FROM t_data_result WHERE status = 0"
+
+    def fetch_not_send(self):
         with MySQLHelper() as db_helper:
-            query = "SELECT no, title, url, keyword, date, price, bidder, summary, attach_path, status, create_time, send_time FROM t_data_result WHERE status = 0"
-            results = db_helper.execute_query(query)
+            results = db_helper.execute_query(self._not_send_query)
             data = [ProcessResultData(**result) for result in results]
             return data
 
+    _update_send_status_query = """
+           UPDATE t_data_result  SET status = 1, send_time = %s  WHERE no = %s
+           """
+
     def set_send(self, no):
         with MySQLHelper() as db_helper:
-            query = """
-            UPDATE t_data_result
-            SET status = 1, send_time = %s
-            WHERE no = %s
-            """
             params = (datetime.now(), no)
-            db_helper.execute_non_query(query, params)
+            db_helper.execute_non_query(self._update_send_status_query, params)
+
+    _update_other_urls_query = "UPDATE t_data_result SET other_urls = %s WHERE url = %s"
 
     def set_other_urls(self, url, other_urls):
         with MySQLHelper() as db_helper:
-            query = """
-            UPDATE t_data_result
-            SET other_urls = %s
-            WHERE url = %s
-            """
-            update_query = """
-            UPDATE t_collect_data SET status = 1 WHERE url = %s;
-            """
             params = (other_urls, url)
-            db_helper.execute_non_query(query, params)
-            db_helper.execute_non_query(update_query, (url, ))
+            db_helper.execute_non_query(self._update_other_urls_query, params)
 
-    def check_is_process_by_url(self, url):
-        with MySQLHelper() as db_helper:
-            query = "SELECT * FROM t_data_result WHERE url = %s"
-            params = (url, )
-            results = db_helper.execute_query(query, params)
-            return True if results else False
-    _query_report= """
-        select * from t_data_result where create_time between %s and %s
-    """
-    def fetch_to_report_by_date(self,start_date, end_date):
+    _query_report = "select * from t_data_result where create_time between %s and %s"
+
+    def fetch_to_report_by_date(self, start_date, end_date):
         """
         获取需要生成报表的数据
         :param start_date:
@@ -200,6 +200,8 @@ class ProcessResultData:
             data = [ProcessResultData(**result) for result in results]
             return data
 
+    _delete_before_date_query = "DELETE FROM t_data_result WHERE create_time < %s"
+
     def delete_before_date(self, date: str):
         """
         删除指定日期之前的数据
@@ -207,10 +209,10 @@ class ProcessResultData:
         :return:
         """
         with MySQLHelper() as db_helper:
-            query = "DELETE FROM t_data_result WHERE create_time < %s"
-            params = (date, )
-            db_helper.execute_non_query(query, params)
+            params = (date,)
+            db_helper.execute_non_query(self._delete_before_date_query, params)
             affected_rows = db_helper.connection.affected_rows()
-            self.logger.info(
-                f"删除 {date} 之前共 {affected_rows} 条 中标处理记录。")
-            return affected_rows
+            utils.get_logger().info(
+                f"删除 {date} 之前共 {affected_rows} 条 中标处理记录。"
+            )
+            return affected_rows

+ 17 - 18
SourceCode/TenderCrawler/app/models/url_setting.py

@@ -3,14 +3,16 @@ from utils.mysql_helper import MySQLHelper
 
 class UrlSetting:
 
-    def __init__(self,
-                 url=None,
-                 adapter_type=None,
-                 username=None,
-                 password=None,
-                 keywords=None,
-                 sort=None,
-                 is_active=None):
+    def __init__(
+        self,
+        url=None,
+        adapter_type=None,
+        username=None,
+        password=None,
+        keywords=None,
+        sort=None,
+        is_active=None,
+    ):
         self.url = url
         self.adapter_type = adapter_type
         self.username = username
@@ -21,20 +23,17 @@ class UrlSetting:
         self.sort = sort or 0
         self.is_active = is_active
 
-
     def __repr__(self):
-        return (
-            f"URL配置[ url: {self.url}  type: {self.adapter_type} keywords: {self.keywords}]"
-        )
+        return f"URL配置[ url: {self.url}  type: {self.adapter_type} keywords: {self.keywords}]"
 
     def to_dict(self):
         return {
-            'url': self.url,
-            'type': self.adapter_type,
-            'username': self.username,
-            'password': self.password,
-            'keywords': self.keywords,
-            'is_active': self.is_active
+            "url": self.url,
+            "type": self.adapter_type,
+            "username": self.username,
+            "password": self.password,
+            "keywords": self.keywords,
+            "is_active": self.is_active,
         }
 
     # # 插入 URL 设置数据

+ 26 - 30
SourceCode/TenderCrawler/app/stores/default_data_store.py

@@ -1,85 +1,81 @@
+import utils
 from models.process_result_data import ProcessResultData
-from utils.logger_helper import LoggerHelper
 from stores.data_store_interface import IDataStore
 
 
 class DefaultDataStore(IDataStore):
 
-    logger = LoggerHelper.get_logger()
-
     def __init__(self):
         pass
 
     def query_one_collect_url(self, url: str):
-        self.logger.info("Default: FETCH_ONE_URL")
+        utils.get_logger().info("Default: FETCH_ONE_URL")
 
     def insert_collect_data(self, data, is_batch=True):
-        self.logger.info("Default: INSERT_COLLECT_DATA")
+        utils.get_logger().info("Default: INSERT_COLLECT_DATA")
 
     def save_collect_data(self, is_force=False):
-        self.logger.info("Default: SAVE_COLLECT_DATA")
+        utils.get_logger().info("Default: SAVE_COLLECT_DATA")
 
     def set_collect_process(self, url):
-        self.logger.info("Default: SET_COLLECT_PROCESS")
+        utils.get_logger().info("Default: SET_COLLECT_PROCESS")
 
     def query_urls_to_process(self):
-        self.logger.info("Default: QUERY_TO_PROCESS")
+        utils.get_logger().info("Default: QUERY_TO_PROCESS")
 
     def query_one_collect_by_url(self, url):
-        self.logger.info("Default: QUERY_ONE_PROCESS")
+        utils.get_logger().info("Default: QUERY_ONE_PROCESS")
 
     def query_one_process_by_url(self, no):
-        self.logger.info("Default: query_one_process_by_url")
+        utils.get_logger().info("Default: query_one_process_by_url")
 
     def query_one_process_by_no(self, no):
-        self.logger.info("Default: query_one_process_by_no")
+        utils.get_logger().info("Default: query_one_process_by_no")
 
     def insert_process_data(self, data):
-        self.logger.info("Default: INSERT_PROCESS_DATA")
+        utils.get_logger().info("Default: INSERT_PROCESS_DATA")
 
     def save_process_data(self, is_force=False):
-        self.logger.info("Default: SAVE_PROCESS_DATA")
+        utils.get_logger().info("Default: SAVE_PROCESS_DATA")
 
     def set_process_other_urls(self, url, other_urls: str):
-        self.logger.info("Default: SET_PROCESS_OTHER_URLS")
+        utils.get_logger().info("Default: SET_PROCESS_OTHER_URLS")
 
     def query_one_process_result_by_url(self, url):
-        self.logger.info("Default: QUERY_ONE_PROCESS_RESULT_BY_URL")
+        utils.get_logger().info("Default: QUERY_ONE_PROCESS_RESULT_BY_URL")
 
     def query_one_process_result_by_no(self, no):
-        self.logger.info("Default: QUERY_ONE_PROCESS_RESULT_BY_NO")
+        utils.get_logger().info("Default: QUERY_ONE_PROCESS_RESULT_BY_NO")
 
-    def insert_process_result_data(self,
-                                   data: ProcessResultData,
-                                   is_batch=True):
-        self.logger.info("Default: INSERT_PROCESS_RESULT_DATA")
+    def insert_process_result_data(self, data: ProcessResultData, is_batch=True):
+        utils.get_logger().info("Default: INSERT_PROCESS_RESULT_DATA")
 
     def save_process_result_data(self, is_force=False):
-        self.logger.info("Default: SAVE_PROCESS_RESULT_DATA")
+        utils.get_logger().info("Default: SAVE_PROCESS_RESULT_DATA")
 
     def set_process_result_other_urls(self, url, other_urls: str):
-        self.logger.info("Default: SET_PROCESS_RESULT_OTHER_URLS")
+        utils.get_logger().info("Default: SET_PROCESS_RESULT_OTHER_URLS")
 
     def query_to_send(self):
-        self.logger.info("Default: QUERY_TO_SEND")
+        utils.get_logger().info("Default: QUERY_TO_SEND")
 
     def query_to_report_by_date(self, start_date, end_date):
-        self.logger.info("Default: QUERY_TO_REPORT_BY_DATE")
+        utils.get_logger().info("Default: QUERY_TO_REPORT_BY_DATE")
 
     def set_send(self, no: str):
-        self.logger.info("Default: SET_SEND")
+        utils.get_logger().info("Default: SET_SEND")
 
     def query_all_emails(self):
-        self.logger.info("Default: QUERY_ALL_EMAILS")
+        utils.get_logger().info("Default: QUERY_ALL_EMAILS")
 
     def query_all_virtual_emails(self):
-        self.logger.info("Default: QUERY_ALL_VIRTUAL_EMAILS")
+        utils.get_logger().info("Default: QUERY_ALL_VIRTUAL_EMAILS")
 
     def query_master_email(self):
-        self.logger.info("Default: GET_MASTER_EMAIL")
+        utils.get_logger().info("Default: GET_MASTER_EMAIL")
 
     def get_email_by_area(self, area: str):
-        self.logger.info("Default: GET_EMAIL_BY_AREA")
+        utils.get_logger().info("Default: GET_EMAIL_BY_AREA")
 
     def update_area_email_area_by_name(self, name: str, area: str):
-        self.logger.info("Default: UPDATE_AREA_EMAIL_AREA_BY_NAME")
+        utils.get_logger().info("Default: UPDATE_AREA_EMAIL_AREA_BY_NAME")

+ 28 - 30
SourceCode/TenderCrawler/app/stores/mysql_data_store.py

@@ -1,25 +1,22 @@
-from utils.logger_helper import LoggerHelper
-from utils.config_helper import ConfigHelper
-from stores.data_store_interface import IDataStore
+import utils
+from models.area_email import AreaEmail
 from models.collect_data import CollectData
 from models.process_data import ProcessData
 from models.process_result_data import ProcessResultData
-from models.area_email import AreaEmail
+from stores.data_store_interface import IDataStore
 
 
 class MysqlDataStore(IDataStore):
 
-    logger = LoggerHelper.get_logger()
-    config = ConfigHelper()
     _collectData = CollectData()
     _processData = ProcessData()
     _processResultData = ProcessResultData()
     _areaEmail = AreaEmail()
 
     def __init__(self):
-        self._collect_size = self.config.get_int('save.collect_batch_size', 1)
+        self._collect_size = utils.get_config_int("save.collect_batch_size", 1)
         self._collect_list = []
-        self._process_size = self.config.get_int('save.process_batch_size', 1)
+        self._process_size = utils.get_config_int("save.process_batch_size", 1)
         self._process_list = []
         self._process_result_list = []
 
@@ -29,16 +26,18 @@ class MysqlDataStore(IDataStore):
     def insert_collect_data(self, data: CollectData, is_batch=True):
         if not is_batch:
             self._collectData.insert(data)
-            self.logger.info(f"保存 采集数据 到数据库: {data.url}")
+            utils.get_logger().info(f"保存 采集数据 到数据库: {data.url}")
         else:
             self._collect_list.append(data)
             self.save_collect_data()
 
     def save_collect_data(self, is_force=False):
         if (is_force and len(self._collect_list) > 0) or len(
-                self._collect_list) >= self._collect_size:
-            self.logger.info("批量保存 采集数据 到数据库,数量: " +
-                             str(len(self._collect_list)))
+            self._collect_list
+        ) >= self._collect_size:
+            utils.get_logger().info(
+                "批量保存 采集数据 到数据库,数量: " + str(len(self._collect_list))
+            )
             self._collectData.insert_batch(self._collect_list)
             self._collect_list = []
 
@@ -58,7 +57,7 @@ class MysqlDataStore(IDataStore):
         if not is_batch:
             self._processData.insert(data)
             self._collectData.set_process(data.url)
-            self.logger.info(f"保存 处理数据 到数据库: {data.url}")
+            utils.get_logger().info(f"保存 处理数据 到数据库: {data.url}")
         else:
             self._process_list.append(data)
             self.save_process_data()
@@ -66,9 +65,11 @@ class MysqlDataStore(IDataStore):
     # 插入到数据库时会把CollectData设为已处理
     def save_process_data(self, is_force=False):
         if (is_force and len(self._process_list) > 0) or len(
-                self._process_list) >= self._process_size:
-            self.logger.info(
-                f"批量保存 处理数据 到数据库,数量: {str(len(self._process_list))}")
+            self._process_list
+        ) >= self._process_size:
+            utils.get_logger().info(
+                f"批量保存 处理数据 到数据库,数量: {str(len(self._process_list))}"
+            )
             self._processData.insert_batch(self._process_list)
             urls = [item.url for item in self._process_list]
             self._collectData.set_process_list(urls)
@@ -86,22 +87,22 @@ class MysqlDataStore(IDataStore):
     def query_one_process_result_by_no(self, no):
         return self._processResultData.fetch_one_process_by_no(no)
 
-    def insert_process_result_data(self,
-                                   data: ProcessResultData,
-                                   is_batch=True):
+    def insert_process_result_data(self, data: ProcessResultData, is_batch=True):
         if not is_batch:
             self._processResultData.insert(data)
             self._collectData.set_process(data.url)
-            self.logger.info(f"保存 处理数据结果 到数据库: {data.url}")
+            utils.get_logger().info(f"保存 处理数据结果 到数据库: {data.url}")
         else:
             self._process_result_list.append(data)
             self.save_process_result_data()
 
     def save_process_result_data(self, is_force=False):
         if (is_force and len(self._process_result_list) > 0) or len(
-                self._process_result_list) >= self._process_size:
-            self.logger.info(
-                f"批量保存 处理数据结果 到数据库,数量: {str(len(self._process_result_list))}")
+            self._process_result_list
+        ) >= self._process_size:
+            utils.get_logger().info(
+                f"批量保存 处理数据结果 到数据库,数量: {str(len(self._process_result_list))}"
+            )
             self._processResultData.insert_batch(self._process_result_list)
             urls = [item.url for item in self._process_result_list]
             self._collectData.set_process_list(urls)
@@ -110,15 +111,11 @@ class MysqlDataStore(IDataStore):
     def set_process_result_other_urls(self, url, other_urls: str):
         return self._processResultData.set_other_urls(url, other_urls)
 
-    def check_url_is_process(self, url: str) -> bool:
-        return self._processData.check_is_process_by_url(url)
-
     def query_to_send(self):
-        return self._processData.fetch_no_send()
+        return self._processData.fetch_not_send()
 
     def query_to_report_by_date(self, start_date, end_date):
-        return self._processResultData.fetch_to_report_by_date(
-            start_date, end_date)
+        return self._processResultData.fetch_to_report_by_date(start_date, end_date)
 
     def set_send(self, no: str):
         self._processData.set_send(no)
@@ -143,5 +140,6 @@ class MysqlDataStore(IDataStore):
 
     def delete_process_data_before_date(self, date: str):
         return self._processData.delete_before_date(date)
+
     def delete_process_result_data_before_date(self, date: str):
-        return self._processResultData.delete_before_date(date)
+        return self._processResultData.delete_before_date(date)

+ 147 - 1
SourceCode/TenderCrawler/app/utils/__init__.py

@@ -1,3 +1,149 @@
+"""
+utils/__init__.py
+
+该模块初始化文件,导入了多个辅助工具类,并定义了一系列便捷函数,用于日志记录、配置管理、文件操作、字符串处理和邮件发送等功能。
+"""
+
+import json
+
+from utils.ai_helper import AiHelper
 from utils.config_helper import ConfigHelper
+from utils.email_helper import EmailHelper
+from utils.file_helper import FileHelper
+from utils.logger_helper import LoggerHelper
+from utils.string_helper import StringHelper
+
+
+def get_logger():
+    """
+    获取日志记录器实例。
+
+    该函数通过调用LoggerHelper类的静态方法get_logger()来获取一个日志记录器实例。
+    主要用于需要记录日志的位置,通过该函数获取日志记录器实例,然后进行日志记录。
+    这样做可以保持日志记录的一致性和集中管理。
+
+    :return: Logger实例,用于记录日志。
+    """
+    return LoggerHelper.get_logger()
+
+
+def clean_log_file(day: int):
+    """
+    清理指定天数之前的日志文件。
+
+    :param day: 整数,表示清理多少天前的日志文件。
+    """
+    LoggerHelper.clean_log_file(day)
+
+
+def get_config():
+    """
+    获取配置管理器实例。
+
+    该函数返回一个ConfigHelper实例,用于读取和管理应用程序的配置信息。
+
+    :return: ConfigHelper实例,用于配置管理。
+    """
+    return ConfigHelper()
+
+
+def reload_config():
+    """
+    重新加载配置文件。
+
+    该函数会重新加载配置文件中的内容,适用于配置文件发生更改后需要重新加载的情况。
+    """
+    get_config().load_config()
+
+
+def get_config_value(key: str, default: str = None):
+    """
+    获取配置项的值。
+
+    :param key: 字符串,配置项的键。
+    :param default: 字符串,默认值(可选)。
+    :return: 配置项的值,如果不存在则返回默认值。
+    """
+    return get_config().get(key, default)
+
+
+def get_config_int(key: str, default: int = None):
+    """
+    获取配置项的整数值。
+
+    :param key: 字符串,配置项的键。
+    :param default: 整数,默认值(可选)。
+    :return: 配置项的整数值,如果不存在则返回默认值。
+    """
+    return get_config().get_int(key, default)
+
+
+def get_config_bool(key: str):
+    """
+    获取配置项的布尔值。
+
+    :param key: 字符串,配置项的键。
+    :return: 配置项的布尔值。
+    """
+    return get_config().get_bool(key)
+
+
+def download_remote_file(file_url: str, file_name: str) -> str:
+    """
+    下载远程文件并保存到本地。
+
+    :param file_url: 字符串,远程文件的URL。
+    :param file_name: 字符串,保存到本地的文件名。
+    :return: 字符串,下载后的文件路径。
+    """
+    return FileHelper().download_remote_file(file_url, file_name)
+
+
+def clean_attach_file(day: int):
+    """
+    清理指定天数之前的附件文件。
+
+    :param day: 整数,表示清理多少天前的附件文件。
+    """
+    FileHelper().clean_attach_file(day)
+
+
+def to_array(s: str, split: str = ",") -> list[str]:
+    """
+    将字符串按指定分隔符拆分为数组。
+
+    :param s: 字符串,待拆分的字符串。
+    :param split: 字符串,分隔符。
+    :return: 列表,拆分后的数组。
+    """
+    return StringHelper.to_array(s, split)
+
+
+def call_openai(system_prompt: str, user_prompt: str) -> json:
+    """
+    调用OpenAI API进行对话。
+
+    :param system_prompt: 字符串,系统提示信息。
+    :param user_prompt: 字符串,用户输入的提示信息。
+    :return: JSON对象,API返回的结果。
+    """
+    return AiHelper().call_openai(system_prompt, user_prompt)
+
+
+def send_email(
+    to_addr: str,
+    subject: str,
+    body: str,
+    body_is_html: bool = True,
+    attachment_paths: str = None,
+):
+    """
+    发送电子邮件。
 
-ConfigHelper().load_config()
+    :param to_addr: 字符串,收件人地址。
+    :param subject: 字符串,邮件主题。
+    :param body: 字符串,邮件正文。
+    :param body_is_html: 布尔值,是否为HTML格式,默认为True。
+    :param attachment_paths: 字符串,附件路径(可选)。
+    """
+    EmailHelper().send_email(to_addr, subject, body, body_is_html, attachment_paths)

+ 34 - 31
SourceCode/TenderCrawler/app/utils/ai_helper.py

@@ -1,30 +1,27 @@
+import json
 import re
+
 from openai import OpenAI
-import json
 
-from utils.logger_helper import LoggerHelper
-from utils.config_helper import ConfigHelper
+import utils
 
 
 class AiHelper:
 
-    logger = LoggerHelper.get_logger()
-    config = ConfigHelper()
-
     _ai_api_key = None
     _ai_api_url = None
     _ai_max_tokens = 150
 
     def __init__(self):
-        self._ai_api_key = self.config.get("ai.key")
-        self._ai_api_url = self.config.get("ai.url")
-        self._api_model = self.config.get("ai.model")
-        max_tokens = self.config.get("ai.max_tokens")
+        self._ai_api_key = utils.get_config_value("ai.key")
+        self._ai_api_url = utils.get_config_value("ai.url")
+        self._api_model = utils.get_config_value("ai.model")
+        max_tokens = utils.get_config_value("ai.max_tokens")
         if max_tokens:
             self._ai_max_tokens = int(max_tokens)
 
     def call_openai(self, system_prompt: str, user_prompt: str) -> json:
-        self.logger.info("调用AI API")
+        utils.get_logger().info("调用AI API")
         if self._ai_api_key is None:
             raise Exception("AI API key 没有配置")
         if self._ai_api_url is None:
@@ -34,13 +31,16 @@ class AiHelper:
         client = OpenAI(api_key=self._ai_api_key, base_url=self._ai_api_url)
         completion = client.chat.completions.create(
             model=self._api_model,
-            messages=[{
-                "role": "system",
-                "content": system_prompt,
-            }, {
-                "role": "user",
-                "content": user_prompt,
-            }],
+            messages=[
+                {
+                    "role": "system",
+                    "content": system_prompt,
+                },
+                {
+                    "role": "user",
+                    "content": user_prompt,
+                },
+            ],
             stream=False,
             temperature=0.7,
         )
@@ -55,9 +55,9 @@ class AiHelper:
                 result["completion_tokens"] = usage.get("completion_tokens", 0)
                 result["prompt_tokens"] = usage.get("prompt_tokens", 0)
                 result["total_tokens"] = usage.get("total_tokens", 0)
-                # self.logger.info(f"AI Process JSON: {result}")
+                # utils.get_logger().info(f"AI Process JSON: {result}")
             else:
-                self.logger.info(f"AI Response: {response}")
+                utils.get_logger().info(f"AI Response: {response}")
             return result
         except Exception as e:
             raise Exception(f"解析 AI 响应错误: {e}")
@@ -73,18 +73,19 @@ class AiHelper:
             raise Exception("AI 响应中未找到有效的 choices 或 message 数据")
 
         # 移除多余的 ```json 和 ```
-        if message_content.startswith("```json") and message_content.endswith(
-                "```"):
+        if message_content.startswith("```json") and message_content.endswith("```"):
             message_content = message_content[6:-3]
 
         # 去除开头的 'n' 字符
-        if message_content.startswith('n'):
+        if message_content.startswith("n"):
             message_content = message_content[1:]
         # 移除无效的转义字符和时间戳前缀
-        message_content = re.sub(r'\\[0-9]{2}', '',
-                                 message_content)  # 移除 \32 等无效转义字符
-        message_content = re.sub(r'\d{4}-\d{2}-\dT\d{2}:\d{2}:\d{2}\.\d+Z', '',
-                                 message_content)  # 移除时间戳
+        message_content = re.sub(
+            r"\\[0-9]{2}", "", message_content
+        )  # 移除 \32 等无效转义字符
+        message_content = re.sub(
+            r"\d{4}-\d{2}-\dT\d{2}:\d{2}:\d{2}\.\d+Z", "", message_content
+        )  # 移除时间戳
         message_content = message_content.strip()  # 去除首尾空白字符
 
         # 替换所有的反斜杠
@@ -93,17 +94,19 @@ class AiHelper:
         return message_content
 
     def _parse_response(self, response: str, first=True) -> json:
-        # self.logger.info(f"AI Response JSON STR: {response}")
+        # utils.get_logger().info(f"AI Response JSON STR: {response}")
         try:
             data = json.loads(response)
             return data
 
         except json.JSONDecodeError as e:
             if first:
-                self.logger.error(f"JSON 解析错误,去除部分特殊字符重新解析一次: {e}")
+                utils.get_logger().error(
+                    f"JSON 解析错误,去除部分特殊字符重新解析一次: {e}"
+                )
                 # 替换中文引号为空
-                message_content = re.sub(r'[“”]', "", response)  # 替换双引号
-                message_content = re.sub(r'[‘’]', "", message_content)  # 替换单引号
+                message_content = re.sub(r"[“”]", "", response)  # 替换双引号
+                message_content = re.sub(r"[‘’]", "", message_content)  # 替换单引号
                 return self._parse_response(message_content, False)
             else:
                 raise Exception(f"解析 AI 响应错误: {response} {e}")

+ 11 - 13
SourceCode/TenderCrawler/app/utils/config_helper.py

@@ -1,14 +1,13 @@
 import os
-import yaml
 
+import yaml
 
 
 class ConfigHelper:
     _instance = None
 
     # 默认配置文件路径
-    default_config_path = os.path.join(os.path.dirname(__file__), '..',
-                                       'config.yml')
+    default_config_path = os.path.join(os.path.dirname(__file__), "..", "config.yml")
 
     # 类变量存储加载的配置
     _config = None
@@ -28,7 +27,7 @@ class ConfigHelper:
                 self._path = path
             if not os.path.exists(self._path):
                 raise FileNotFoundError(f"没有找到文件或目录:'{self._path}'")
-        with open(self._path, 'r', encoding='utf-8') as file:
+        with open(self._path, "r", encoding="utf-8") as file:
             self._config = yaml.safe_load(file)
         # 合并环境变量配置
         self._merge_env_vars()
@@ -38,9 +37,8 @@ class ConfigHelper:
     def _merge_env_vars(self, env_prefix="APP_"):  # 环境变量前缀为 APP_
         for key, value in os.environ.items():
             if key.startswith(env_prefix):
-                config_key = key[len(env_prefix):].lower()
-                self._set_nested_key(self._config, config_key.split('__'),
-                                     value)
+                config_key = key[len(env_prefix) :].lower()
+                self._set_nested_key(self._config, config_key.split("__"), value)
 
     def _set_nested_key(self, config, keys, value):
         if len(keys) > 1:
@@ -50,10 +48,10 @@ class ConfigHelper:
         else:
             config[keys[0]] = value
 
-    def get(self, key:str, default:str=None):
+    def get(self, key: str, default: str = None):
         if self._config is None:
             self.load_config(self._path)
-        keys = key.split('.')
+        keys = key.split(".")
         config = self._config
         for k in keys:
             if isinstance(config, dict) and k in config:
@@ -62,15 +60,15 @@ class ConfigHelper:
                 return default
         return config
 
-    def get_bool(self, key:str)->bool:
-        val = str(self.get(key,"0"))
+    def get_bool(self, key: str) -> bool:
+        val = str(self.get(key, "0"))
         return True if val.lower() == "true" or val == "1" else False
 
-    def get_int(self, key:str, default:int=0)->int:
+    def get_int(self, key: str, default: int = 0) -> int:
         val = self.get(key)
         if not val:
             return default
-        try :
+        try:
             return int(val)
         except ValueError:
             return default

+ 45 - 55
SourceCode/TenderCrawler/app/utils/email_helper.py

@@ -1,94 +1,86 @@
-import smtplib
-import os
-import mimetypes
+import os, mimetypes, smtplib, utils
+from email import encoders
+from email.mime.base import MIMEBase
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
-from email.mime.base import MIMEBase
-from email import encoders
-
-
-from utils.config_helper import ConfigHelper
-from utils.logger_helper import LoggerHelper
-from utils.string_helper import StringHelper
-
-
-
 
 
 class EmailHelper:
 
-    logger = LoggerHelper.get_logger()
-    config = ConfigHelper()
-
     def __init__(self):
-        self.smtp_server = self.config.get("email.smtp_server")
-        self.port = self.config.get("email.smtp_port")
-        self.username = self.config.get("email.smtp_user")
-        self.password = self.config.get("email.smtp_password")
-        self.from_email = self.config.get("email.from_email")
+        self.smtp_server = utils.get_config_value("email.smtp_server")
+        self.port = utils.get_config_value("email.smtp_port")
+        self.username = utils.get_config_value("email.smtp_user")
+        self.password = utils.get_config_value("email.smtp_password")
+        self.from_email = utils.get_config_value("email.from_email")
         # print(
         #     f"server:{self.smtp_server},port:{self.port},username:{self.username},password:{self.password},from_email:{self.from_email}"
         # )
 
-    def send_email(self,
-                   to_addr: str,
-                   subject: str,
-                   body: str,
-                   body_is_html: bool = True,
-                   attachment_paths: str = None):
+    def send_email(
+        self,
+        to_addr: str,
+        subject: str,
+        body: str,
+        body_is_html: bool = True,
+        attachment_paths: str = None,
+    ):
         msg = MIMEMultipart()
-        msg['From'] = self.from_email
-        msg['To'] = ', '.join(to_addr.split(','))
-        msg['Subject'] = subject
+        msg["From"] = self.from_email
+        msg["To"] = ", ".join(to_addr.split(","))
+        msg["Subject"] = subject
 
         # 根据 body_is_html 参数设置 MIMEText 类型
         if body_is_html:
-            msg.attach(MIMEText(body, 'html', 'utf-8'))
+            msg.attach(MIMEText(body, "html", "utf-8"))
         else:
-            msg.attach(MIMEText(body, 'plain', 'utf-8'))
+            msg.attach(MIMEText(body, "plain", "utf-8"))
 
         if attachment_paths:
-            attachment_arr = StringHelper.to_array(attachment_paths)
+            attachment_arr = utils.to_array(attachment_paths)
             for attachment_path in attachment_arr:
                 self._attach_file(msg, attachment_path)
 
         try:
-            with smtplib.SMTP_SSL(self.smtp_server,port=self.port, timeout=10) as server:
+            with smtplib.SMTP_SSL(
+                self.smtp_server, port=self.port, timeout=10
+            ) as server:
                 # server.starttls()
                 server.login(self.username, self.password)
                 # 将 to_addr 字符串通过 split(',') 分割成列表,传递给 sendmail
-                server.sendmail(self.from_email, to_addr.split(','),
-                                msg.as_string())
-            self.logger.info(f"邮件发送成功:{to_addr}")
+                server.sendmail(self.from_email, to_addr.split(","), msg.as_string())
+            utils.get_logger().info(f"邮件发送成功:{to_addr}")
             return True
         except smtplib.SMTPAuthenticationError:
-            self.logger.error("SMTP 认证失败")
+            utils.get_logger().error("SMTP 认证失败")
         except smtplib.SMTPServerDisconnected:
-            self.logger.error("SMTP 服务器断开连接")
+            utils.get_logger().error("SMTP 服务器断开连接")
         except smtplib.SMTPException as e:
-            self.logger.error(f"SMTP 异常: {e}")
+            utils.get_logger().error(f"SMTP 异常: {e}")
         except Exception as e:
-            self.logger.error(f"邮件发送失败:{to_addr} {e}")
+            utils.get_logger().error(f"邮件发送失败:{to_addr} {e}")
             return False
 
-
-    def _attach_file(self, msg: MIMEMultipart, attachment_path: str):
+    @staticmethod
+    def _attach_file(msg: MIMEMultipart, attachment_path: str):
         if not os.path.isfile(attachment_path):
-            self.logger.error(f"文件 {attachment_path} 不存在。")
+            utils.get_logger().error(f"文件 {attachment_path} 不存在。")
             return
 
         file_size = os.path.getsize(attachment_path)
         max_size = 1024 * 8192  # 8MB
 
         if file_size > max_size:
-            self.logger.error(f"文件 {attachment_path} 大小超过限制 ({file_size} bytes > {max_size} bytes),不添加附件。")
+            utils.get_logger().error(
+                f"文件 {attachment_path} 大小超过限制 ({file_size} bytes > {max_size} bytes),不添加附件。"
+            )
             return
 
         # 根据文件名后缀获取 MIME 类型
         content_type, _ = mimetypes.guess_type(attachment_path)
         if content_type is None:
-            content_type = 'application/octet-stream'  # 默认类型
-        main_type, sub_type = content_type.split('/', 1)
+            content_type = "application/octet-stream"  # 默认类型
+        main_type, sub_type = content_type.split("/", 1)
 
         with open(attachment_path, "rb") as attachment:
             # part = MIMEBase('application', 'octet-stream')
@@ -96,14 +88,12 @@ class EmailHelper:
             part.set_payload(attachment.read(max_size))
             # 获取文件名并去除第一个 @ 字符前面的部分
             name = os.path.basename(attachment_path)
-            at_index = name.find('@')
+            at_index = name.find("@")
             if at_index != -1:
-                name = name[at_index + 1:]
-            part.add_header(
-                'Content-Disposition',
-                f"attachment; filename= {name}")
-            part.add_header('Content-ID', '<0>')
-            part.add_header('X-Attachment-Id', '0')
+                name = name[at_index + 1 :]
+            part.add_header("Content-Disposition", f"attachment; filename= {name}")
+            part.add_header("Content-ID", "<0>")
+            part.add_header("X-Attachment-Id", "0")
             encoders.encode_base64(part)
             msg.attach(part)
-            self.logger.info(f"添加附件 {name} {attachment_path} 到邮件中。")
+            utils.get_logger().info(f"添加附件 {name} {attachment_path} 到邮件中。")

+ 40 - 29
SourceCode/TenderCrawler/app/utils/file_helper.py

@@ -1,27 +1,29 @@
-import os
-import shutil
-import requests
+import os, shutil,utils
 from datetime import datetime, timedelta
 from urllib.parse import urlparse
 
-from utils.logger_helper import LoggerHelper
-from utils.config_helper import ConfigHelper
+import requests
+
+
+
 
 class FileHelper:
-    logger = LoggerHelper.get_logger()
-    config = ConfigHelper()
+
     DEFAULT_ATTACH_PATH = "./attaches/"
+
     def __init__(self):
-        path = self.config.get("save.attach_file_path", self.DEFAULT_ATTACH_PATH)
+        path = utils.get_config_value("save.attach_file_path", self.DEFAULT_ATTACH_PATH)
         path = path.replace("\\", "/")
         path = path.replace("//", "/")
         self._attach_file_path = path
 
-    def download_remote_file(self, file_url, file_name) -> str | None:
-        self.logger.info(f"下载远程文件: {file_url}  文件名:{file_name}")
+    def download_remote_file(self, file_url: str, file_name: str) -> str | None:
+        utils.get_logger().info(f"下载远程文件: {file_url}  文件名:{file_name}")
         current_timestamp = datetime.now().strftime("%H%M%S%f")[:-3]  # 取前三位毫秒
         file_name = f"{current_timestamp}@{file_name}"
-        file_path = os.path.join(self._attach_file_path, f'{datetime.now().strftime("%Y-%m-%d")}')
+        file_path = os.path.join(
+            self._attach_file_path, f'{datetime.now().strftime("%Y-%m-%d")}'
+        )
         if not os.path.exists(file_path):
             os.makedirs(file_path)
         path = os.path.join(file_path, file_name)
@@ -38,44 +40,47 @@ class FileHelper:
             "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59 Safari/537.36",
             "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1",
             "Mozilla/5.0 (iPad; CPU OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1",
-            "Mozilla/5.0 (Linux; Android 11; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Mobile Safari/537.36"
+            "Mozilla/5.0 (Linux; Android 11; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Mobile Safari/537.36",
         ]
 
         # 根据文件名长度选择一个 User-Agent
         ua_index = len(file_name) % len(user_agents)
         # 解析 file_url 获取 Referer
         parsed_url = urlparse(file_url)
-        referer = f"{parsed_url.scheme}://{parsed_url.netloc}/".replace("//download.", "//www.")
+        referer = f"{parsed_url.scheme}://{parsed_url.netloc}/".replace(
+            "//download.", "//www."
+        )
         headers = {
-            'User-Agent': user_agents[ua_index],
-            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
-            'Accept-Encoding': 'gzip, deflate, br',
-            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7',
-            'Referer': referer
+            "User-Agent": user_agents[ua_index],
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
+            "Accept-Encoding": "gzip, deflate, br",
+            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7",
+            "Referer": referer,
         }
 
         try:
             response = requests.get(file_url, headers=headers, allow_redirects=True)
             response.raise_for_status()
-            with open(path, 'wb') as f:
+            with open(path, "wb") as f:
                 f.write(response.content)
-            self.logger.info(f"文件下载成功: {file_name}")
+            utils.get_logger().info(f"文件下载成功: {file_name}")
             return path
         except requests.exceptions.HTTPError as http_err:
-            self.logger.error(f"HTTP 错误: {http_err}")
+            utils.get_logger().error(f"HTTP 错误: {http_err}")
         except Exception as e:
-            self.logger.error(f"文件下载失败: {file_name}。Exception: {e}")
+            utils.get_logger().error(f"文件下载失败: {file_name}。Exception: {e}")
             return None
 
-
-    def clean_attach_file(self, day:int) -> None:
+    def clean_attach_file(self, day: int) -> None:
         try:
             current_time = datetime.now()
             cutoff_time = current_time - timedelta(days=day)
             for root, dirs, _ in os.walk(self._attach_file_path):
                 for dir_name in dirs:
                     path = os.path.join(root, dir_name)
-                    dir_path = str(path).replace(self._attach_file_path,"").replace("\\","/")
+                    dir_path = (
+                        str(path).replace(self._attach_file_path, "").replace("\\", "/")
+                    )
                     if dir_path.count("/") > 0:
                         continue
                     try:
@@ -83,13 +88,19 @@ class FileHelper:
                         if dir_date < cutoff_time:
                             try:
                                 shutil.rmtree(path)
-                                self.logger.info(f"  删除目录及其内容: {dir_path}")
+                                utils.get_logger().info(
+                                    f"  删除目录及其内容: {dir_path}"
+                                )
                             except PermissionError:
-                                self.logger.error(f"  权限错误,无法删除目录: {dir_path}")
+                                utils.get_logger().error(
+                                    f"  权限错误,无法删除目录: {dir_path}"
+                                )
                             except Exception as e:
-                                self.logger.error(f"  删除目录失败: {dir_path}。Exception: {e}")
+                                utils.get_logger().error(
+                                    f"  删除目录失败: {dir_path}。Exception: {e}"
+                                )
                     except ValueError:
                         # 如果目录名称不符合 %Y-%m/%d 格式,跳过
                         continue
         except Exception as e:
-            self.logger.error(f"文件清理失败。Exception: {e}")
+            utils.get_logger().error(f"文件清理失败。Exception: {e}")

+ 29 - 23
SourceCode/TenderCrawler/app/utils/logger_helper.py

@@ -1,36 +1,37 @@
-import os
 import logging
-from logging.handlers import TimedRotatingFileHandler
+import os
 from datetime import datetime
+from logging.handlers import TimedRotatingFileHandler
 
 from utils.config_helper import ConfigHelper
 
+
 class LoggerHelper:
     """
     日志辅助类,用于创建和提供日志记录器实例
     该类实现了单例模式,确保在整个应用程序中只有一个日志记录器实例被创建和使用
     """
+
     _instance = None
     config = ConfigHelper()
     _log_file_name = f"{config.get("logger.file_name", "crawler")}.log"
     _log_file_path = config.get("logger.file_path", "./logs")
     _log_level_string = config.get("logger.level", "INFO")
     _log_level = logging.getLevelName(_log_level_string)
+
     def __new__(cls, *args, **kwargs):
         """
         实现单例模式,确保日志记录器仅被创建一次
         如果尚未创建实例,则创建并初始化日志记录器
         """
         if not cls._instance:
-            cls._instance = super(LoggerHelper,
-                                  cls).__new__(cls, *args, **kwargs)
+            cls._instance = super(LoggerHelper, cls).__new__(cls, *args, **kwargs)
             try:
                 cls._instance._initialize_logger()
             except Exception as e:
                 raise Exception(f"配置logger出错: {e}")
         return cls._instance
 
-
     @property
     def logger(self):
         return self._logger
@@ -39,18 +40,20 @@ class LoggerHelper:
         """
         初始化日志记录器,包括设置日志级别、创建处理器和格式化器,并将它们组合起来
         """
-        self._logger = logging.getLogger('app_logger')
+        self._logger = logging.getLogger("app_logger")
         self._logger.setLevel(self._log_level)
 
         if not os.path.exists(self._log_file_path):
             os.makedirs(self._log_file_path)
 
         # 创建按日期分割的文件处理器
-        file_handler = TimedRotatingFileHandler(os.path.join(self._log_file_path, self._log_file_name),
-                                                when='midnight',
-                                                interval=1,
-                                                backupCount=7,
-                                                encoding='utf-8')
+        file_handler = TimedRotatingFileHandler(
+            os.path.join(self._log_file_path, self._log_file_name),
+            when="midnight",
+            interval=1,
+            backupCount=7,
+            encoding="utf-8",
+        )
         file_handler.setLevel(logging.INFO)
 
         # 创建控制台处理器
@@ -58,8 +61,7 @@ class LoggerHelper:
         console_handler.setLevel(logging.INFO)
 
         # 创建格式化器
-        formatter = logging.Formatter(
-            '%(asctime)s - %(levelname)s - %(message)s')
+        formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
 
         # 将格式化器添加到处理器
         file_handler.setFormatter(formatter)
@@ -80,16 +82,20 @@ class LoggerHelper:
         return cls._instance._logger
 
     @classmethod
-    def delete_log(cls, day:int) :
+    def clean_log_file(cls, day: int):
         if not os.path.exists(cls._log_file_path):
             return
         for filename in os.listdir(cls._log_file_path):
-            if  filename!=cls._log_file_name and filename.startswith(cls._log_file_name):
-               try:
-                   file_path = os.path.join(cls._log_file_path, filename)
-                   file_time = datetime.strptime(filename.replace(f"{cls._log_file_name}.",""), "%Y-%m-%d")
-                   if (datetime.now() - file_time).days > day:
-                       os.remove(file_path)
-                       cls.get_logger().info(f"  删除日志文件: {file_path}")
-               except Exception as e:
-                   cls.get_logger().error(f"删除日志文件出错: {filename} {e}")
+            if filename != cls._log_file_name and filename.startswith(
+                cls._log_file_name
+            ):
+                try:
+                    file_path = os.path.join(cls._log_file_path, filename)
+                    file_time = datetime.strptime(
+                        filename.replace(f"{cls._log_file_name}.", ""), "%Y-%m-%d"
+                    )
+                    if (datetime.now() - file_time).days > day:
+                        os.remove(file_path)
+                        cls.get_logger().info(f"  删除日志文件: {file_path}")
+                except Exception as e:
+                    cls.get_logger().error(f"删除日志文件出错: {filename} {e}")

+ 30 - 33
SourceCode/TenderCrawler/app/utils/mysql_helper.py

@@ -1,45 +1,44 @@
-import pymysql
+import pymysql, utils
 from pymysql.cursors import DictCursor
-from utils.config_helper import ConfigHelper
-from utils.logger_helper import LoggerHelper
 
 
 class MySQLHelper:
 
-    logger = LoggerHelper.get_logger()
-    config = ConfigHelper()
-
     def __init__(self):
         try:
-            self.host = self.config.get('mysql.host')
-            self.user = self.config.get('mysql.user')
-            self.password = self.config.get('mysql.password')
-            self.db = self.config.get('mysql.db')
-            self.port = int(self.config.get('mysql.port'))
-            self.charset = self.config.get('mysql.charset')
+            self.host = utils.get_config_value("mysql.host")
+            self.user = utils.get_config_value("mysql.user")
+            self.password = utils.get_config_value("mysql.password")
+            self.db = utils.get_config_value("mysql.db")
+            self.port = int(utils.get_config_value("mysql.port"))
+            self.charset = utils.get_config_value("mysql.charset")
             self.connection = None
         except Exception as e:
-            self.logger.error(f"加载数据库配置文件失败: {e}")
+            utils.get_logger().error(f"加载数据库配置文件失败: {e}")
 
     def connect(self):
         try:
-            self.connection = pymysql.connect(host=self.host,
-                                              user=self.user,
-                                              password=self.password,
-                                              db=self.db,
-                                              port=self.port,
-                                              charset=self.charset,
-                                              cursorclass=DictCursor)
-            # self.logger.info(f"成功连接到数据库:{self.db}。")
+            self.connection = pymysql.connect(
+                host=self.host,
+                user=self.user,
+                password=self.password,
+                db=self.db,
+                port=self.port,
+                charset=self.charset,
+                cursorclass=DictCursor,
+            )
+            # utils.get_logger().info(f"成功连接到数据库:{self.db}。")
         except pymysql.MySQLError as e:
-            self.logger.error(f"数据库连接失败: {self.host}:{self.port} {self.db}")
+            utils.get_logger().error(
+                f"数据库连接失败: {self.host}:{self.port} {self.db}"
+            )
             self.connection = None  # 确保连接失败时设置为 None
             raise Exception(f"连接数据库失败: {e}")
 
     def disconnect(self):
         if self.connection and self.connection.open:
             self.connection.close()
-            # self.logger.info("数据库连接已关闭。")
+            # utils.get_logger().info("数据库连接已关闭。")
 
     def execute_query(self, query, params=None):
         try:
@@ -48,17 +47,16 @@ class MySQLHelper:
                 result = cursor.fetchall()
                 return result
         except pymysql.MySQLError as e:
-            self.logger.error(f"执行查询时出错:{e}")
+            utils.get_logger().error(f"执行查询时出错:{e}")
             return None
 
     def execute_non_query(self, query, params=None):
-        if isinstance(params, list) and all(
-                isinstance(p, tuple) for p in params):
+        if isinstance(params, list) and all(isinstance(p, tuple) for p in params):
             self.execute_many(query, params)
         elif isinstance(params, tuple):
             self.execute(query, params)
         else:
-            self.execute(query, (params, ))
+            self.execute(query, (params,))
 
     def execute(self, query, params=None):
         try:
@@ -66,18 +64,17 @@ class MySQLHelper:
                 cursor.execute(query, params)
                 self.connection.commit()
         except pymysql.MySQLError as e:
-            self.logger.error(f"执行非查询时出错:{e}")
+            utils.get_logger().error(f"执行非查询时出错:{e}")
             self.connection.rollback()
 
     def execute_many(self, query, params: list):
-        if isinstance(params, list) and all(
-                isinstance(p, tuple) for p in params):
+        if isinstance(params, list) and all(isinstance(p, tuple) for p in params):
             try:
                 with self.connection.cursor() as cursor:
                     cursor.executemany(query, params)
                     self.connection.commit()
             except pymysql.MySQLError as e:
-                self.logger.error(f"执行非查询时出错:{e}")
+                utils.get_logger().error(f"执行非查询时出错:{e}")
                 self.connection.rollback()
         else:
             raise ValueError("参数必须是元组列表")
@@ -89,7 +86,7 @@ class MySQLHelper:
                 result = cursor.fetchone()
                 return result
         except pymysql.MySQLError as e:
-            self.logger.error(f"获取一条记录时出错:{e}")
+            utils.get_logger().error(f"获取一条记录时出错:{e}")
             return None
 
     def __enter__(self):
@@ -113,7 +110,7 @@ class MySQLHelper:
         :param traceback: 异常的traceback对象, 如果没有异常则为None。
         """
         if exc_type:
-            self.logger.error(
+            utils.get_logger().error(
                 f"数据库发生异常,断开连接。异常类型:{exc_type}, 异常值:{exc_value} traceback: {traceback}"
             )
         self.disconnect()  # 断开连接

+ 7 - 9
SourceCode/TenderCrawler/app/utils/string_helper.py

@@ -1,7 +1,7 @@
 class StringHelper:
 
     @staticmethod
-    def check_empty(s: str,default:str) -> str:
+    def check_empty(s: str, default: str) -> str:
         """
         检查字符串是否为空
         """
@@ -9,10 +9,8 @@ class StringHelper:
             return s
         return default
 
-
-
     @staticmethod
-    def to_array(s: str, sep: str=",") -> list[str]:
+    def to_array(s: str, sep: str = ",") -> list[str]:
         """
         将字符串按指定分隔符分割成数组。
 
@@ -27,7 +25,7 @@ class StringHelper:
         return s.split(sep)
 
     @staticmethod
-    def startswith(s: str, prefix: str) -> str:
+    def e_startswith(s: str, prefix: str) -> str:
         """
         检查字符串是否以特定前缀开头,如果没有则补全。
 
@@ -40,7 +38,7 @@ class StringHelper:
         return s
 
     @staticmethod
-    def endswith(s: str, suffix: str) -> str:
+    def e_endswith(s: str, suffix: str) -> str:
         """
         检查字符串是否以特定后缀结尾,如果没有则补全。
 
@@ -53,7 +51,7 @@ class StringHelper:
         return s
 
     @staticmethod
-    def split_and_clean(s: str, sep: str=",") -> list[str]:
+    def split_and_clean(s: str, sep: str = ",") -> list[str]:
         """
         将字符串按指定分隔符分割并去除空字符串。
 
@@ -63,7 +61,7 @@ class StringHelper:
         """
         if not s:
             return []
-        parts = StringHelper.to_array(s,sep)
+        parts = StringHelper.to_array(s, sep)
         return [part.strip() for part in parts if part.strip()]
 
     @staticmethod
@@ -74,4 +72,4 @@ class StringHelper:
         :param s: 要处理的字符串。
         :return: 替换后的字符串。
         """
-        return ' '.join(s.split())
+        return " ".join(s.split())

+ 3 - 2
SourceCode/TenderCrawler/init.sql

@@ -1,3 +1,4 @@
+# noinspection SpellCheckingInspectionForFile
 
 
 SET NAMES utf8mb4;
@@ -19,8 +20,8 @@ CREATE TABLE `t_urls`  (
   PRIMARY KEY (`url`) USING BTREE
 ) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin ROW_FORMAT = Dynamic;
 
-INSERT INTO `t_urls` (`url`, `adapter_type`, `username`, `password`, `keywords`, `is_active`, `sort`, `remark`) VALUES ('https://www.ccgp.gov.cn/index.shtml', 'ccgp', '', '', '红外,红外显微镜,傅里叶红外,红外光谱,显微红外,拉曼,激光共聚焦拉曼,拉曼显微镜,拉曼光谱,显微拉曼,气体分析\'', 1, 100, '中国政府采购网 https://www.ccgp.gov.cn/index.shtml');
-INSERT INTO `t_urls` (`url`, `adapter_type`, `username`, `password`, `keywords`, `is_active`, `sort`, `remark`) VALUES ('https://www.chinabidding.com/', 'chinabidding', 'brukernano2011', '695765FqX', '红外光谱仪', 1,0, '中国国际招标网 (www.chinabidding.com 必联网)');
+INSERT INTO `t_urls` (`url`, `adapter_type`, `username`, `password`, `keywords`, `is_active`, `sort`, `remark`) VALUES ('https://www.ccgp.gov.cn/index.shtml', 'ccgp', '', '', '红外光谱仪,红外显微镜,傅里叶红外,红外光谱,显微红外,拉曼,激光共聚焦拉曼,拉曼显微镜,拉曼光谱,显微拉曼,红外,气体分析', 1, 100, '中国政府采购网 https://www.ccgp.gov.cn/index.shtml');
+INSERT INTO `t_urls` (`url`, `adapter_type`, `username`, `password`, `keywords`, `is_active`, `sort`, `remark`) VALUES ('https://www.chinabidding.com/', 'chinabidding', 'brukernano2011', '695765FqX', '红外光谱仪,红外显微镜,傅里叶红外,红外光谱,显微红外,拉曼,激光共聚焦拉曼,拉曼显微镜,拉曼光谱,显微拉曼,红外,气体分析', 1,0, '中国国际招标网 (www.chinabidding.com 必联网)');
 
 
 -- ----------------------------