Kaynağa Gözat

Fix 拼写错误

YueYunyun 3 hafta önce
ebeveyn
işleme
5b2834a750

+ 2 - 2
SourceCode/TenderCrawler/app/config.yml

@@ -43,7 +43,7 @@ ai:
             export interface Tender { //招标信息
                 no: string; // 招标项目编号
                 title: string; // 招标公告标题
-                provice: string; // 招标单位省份
+                province: string; // 招标单位省份
                 city: string; // 招标单位城市
                 date: string; // 项目开标的时间
                 address: string; // 项目开标的地点
@@ -66,7 +66,7 @@ ai:
               no: string; // 项目编号
               title: string; // 中标公告标题
               date: string; // 中标公告时间
-              provice: string; // 招标单位省份
+              province: string; // 招标单位省份
               city: string; // 招标单位城市
               summary: string; // 公告摘要信息,100字左右
               instruments: Instrument[]; // 中标设备的信息

+ 24 - 35
SourceCode/TenderCrawler/app/jobs/data_process.py

@@ -15,7 +15,7 @@ class DataProcess:
             export interface Tender { //招标信息
                 no: string; // 招标项目编号
                 title: string; // 招标公告标题
-                provice: string; // 招标单位省份
+                province: string; // 招标单位省份
                 city: string; // 招标单位城市
                 date: string; // 项目开标的时间
                 address: string; // 项目开标的地点
@@ -39,7 +39,7 @@ class DataProcess:
               no: string; // 项目编号
               title: string; // 中标公告标题
               date: string; // 中标公告时间,格式为yyyy-MM-dd 例如:2025-01-01
-              provice: string; // 招标单位省份
+              province: string; // 招标单位省份
               city: string; // 招标单位城市
               summary: string; // 公告摘要信息,100字左右
               instruments: Instrument[]; // 中标设备的信息,关于光谱仪的设备,其他设备不要
@@ -50,14 +50,11 @@ class DataProcess:
     def __init__(self, store: IDataStore):
         self._store = store
         self._ai_system_prompt = utils.get_config_value(
-            "ai.system_prompt", self.DEFAULT_AI_SYSTEM_PROMPT
-        )
+            "ai.system_prompt", self.DEFAULT_AI_SYSTEM_PROMPT)
         self._ai_prompt_template_1 = utils.get_config_value(
-            "ai.prompt_template_1", self.DEFAULT_AI_PROMPT_TEMPLATE_1
-        )
+            "ai.prompt_template_1", self.DEFAULT_AI_PROMPT_TEMPLATE_1)
         self._ai_prompt_template_2 = utils.get_config_value(
-            "ai.prompt_template_2", self.DEFAULT_AI_PROMPT_TEMPLATE_2
-        )
+            "ai.prompt_template_2", self.DEFAULT_AI_PROMPT_TEMPLATE_2)
 
     @property
     def store(self) -> IDataStore:
@@ -84,27 +81,20 @@ class DataProcess:
             if item.status == 1:
                 utils.get_logger().info(f"ALREADY1 URL:{url}")
                 return
-            data = (
-                self.store.query_one_process_by_url(url)
-                if item.data_type == 0
-                else self.store.query_one_process_result_by_url(url)
-            )
+            data = (self.store.query_one_process_by_url(url) if item.data_type
+                    == 0 else self.store.query_one_process_result_by_url(url))
             if data:
-                utils.get_logger().info(f"ALREADY2 [{item.data_type}] URL==> {url}")
+                utils.get_logger().info(
+                    f"ALREADY2 [{item.data_type}] URL==> {url}")
                 return
-            data = (
-                self._ai_process_1(item)
-                if item.data_type == 0
-                else self._ai_process_2(item)
-            )
+            data = (self._ai_process_1(item)
+                    if item.data_type == 0 else self._ai_process_2(item))
             if data:
                 old = None
                 if data.no:
-                    old = (
-                        self.store.query_one_process_result_by_no(data.no)
-                        if item.data_type == 0
-                        else self.store.query_one_process_by_no(data.no)
-                    )
+                    old = (self.store.query_one_process_result_by_no(data.no)
+                           if item.data_type == 0 else
+                           self.store.query_one_process_by_no(data.no))
                 if not old:
                     data.url = url
                     data.keyword = item.keyword
@@ -120,15 +110,14 @@ class DataProcess:
                         else:
                             old.other_urls = url
                         if item.data_type == 0:
-                            self.store.set_process_other_urls(data.url, old.other_urls)
+                            self.store.set_process_other_urls(
+                                data.url, old.other_urls)
                         else:
                             self.store.set_process_result_other_urls(
-                                data.url, old.other_urls
-                            )
+                                data.url, old.other_urls)
                     self.store.set_collect_process(old.url)
                     utils.get_logger().info(
-                        f"ALREADY 编号: {data.no} URL:{old.other_urls}"
-                    )
+                        f"ALREADY 编号: {data.no} URL:{old.other_urls}")
 
             utils.get_logger().info("END   ==>" + url)
         except Exception as e:
@@ -137,8 +126,8 @@ class DataProcess:
     def _ai_process_1(self, item: CollectData) -> ProcessData | None:
         try:
             data = utils.call_openai(
-                self._ai_system_prompt, f"{item.content} {self._ai_prompt_template_1}"
-            )
+                self._ai_system_prompt,
+                f"{item.content} {self._ai_prompt_template_1}")
             # area_str = data.get("area")
             #
             # if "省" in area_str:
@@ -152,7 +141,7 @@ class DataProcess:
                 no=data.get("no"),
                 title=data.get("title"),
                 date=data.get("date"),
-                provice=data.get("provice"),
+                province=data.get("province"),
                 city=data.get("city"),
                 address=data.get("address"),
                 devices=data.get("devices"),
@@ -169,12 +158,12 @@ class DataProcess:
     def _ai_process_2(self, item: CollectData) -> ProcessResultData | None:
         try:
             data = utils.call_openai(
-                self._ai_system_prompt, f"{item.content} {self._ai_prompt_template_2}"
-            )
+                self._ai_system_prompt,
+                f"{item.content} {self._ai_prompt_template_2}")
             result = ProcessResultData(
                 no=data.get("no"),
                 title=data.get("title"),
-                provice=data.get("provice"),
+                province=data.get("province"),
                 city=data.get("city"),
                 date=data.get("date"),
                 instruments_o=data.get("instruments"),

+ 3 - 3
SourceCode/TenderCrawler/app/jobs/data_send.py

@@ -156,7 +156,7 @@ class DataSend:
             <div class="container">
                 <h1>{item.title}</h1>
                 <p><strong>招标编号:</strong> {item.no if item.no else ""}</p>
-                <p><strong>项目区域:</strong> {item.provice if item.provice else ""}{item.city if item.city else ""}</p>
+                <p><strong>项目区域:</strong> {item.province if item.province else ""}{item.city if item.city else ""}</p>
                 <p><strong>相关设备:</strong> {item.devices if item.devices else ""}</p>
                 <p><strong>开标时间:</strong> {item.date if item.date else ""}</p>
                 <p><strong>开标地点:</strong> {item.address if item.address else ""}</p>
@@ -288,7 +288,7 @@ class DataSend:
         html = f"""
                   <tr>
                       <td rowspan="{row_count}"><a title="点击查看详情" href="{item.url}">{item.title}</a></td>
-                      <td rowspan="{row_count}">{item.provice if item.provice else ''}{item.city if item.city else ''}</td>
+                      <td rowspan="{row_count}">{item.province if item.province else ''}{item.city if item.city else ''}</td>
                         {self._gen_report_body_item_instrument(item.instruments[0] if item.instruments else None)}
                       <td rowspan="{row_count}">{item.date if item.date else ''}</td>
                   </tr>
@@ -341,7 +341,7 @@ class DataSend:
             "项目编号": data.no if data and data.no else "",
             "项目名称": data.title if data and data.title else "",
             "公告日期": data.date if data and data.date else "",
-            "招标省份": data.provice if data and data.provice else "",
+            "招标省份": data.province if data and data.province else "",
             "招标城市": data.city if data and data.city else "",
             "中标单位名称": instrument.company if instrument.company else "",
             "仪器名称": instrument.name if instrument and instrument.name else "",

+ 27 - 8
SourceCode/TenderCrawler/app/main.py

@@ -11,16 +11,31 @@ from jobs.job_runner import JobRunner
 
 
 class Application:
+    """应用程序主类"""
 
     def __init__(self):
+        """初始化应用程序"""
+        self._init_logger()
+        self._init_config()
+        self._init_signal_handlers()
+
+    def _init_logger(self):
+        """初始化日志系统"""
         self.logger = utils.get_logger()
+        self.logger.info("日志系统初始化完成")
+
+    def _init_config(self):
+        """初始化配置"""
         self.running = True
         self.job: Optional[JobRunner] = None
-        self.interval = utils.get_config_int("job.sleep_interval", 10)  # 默认10秒
+        self.interval = utils.get_config_int("job.sleep_interval", 10)
+        self.logger.info(f"配置加载完成, 任务检查间隔: {self.interval}秒")
 
-        # 注册信号处理
+    def _init_signal_handlers(self):
+        """初始化信号处理器"""
         signal.signal(signal.SIGINT, self._handle_shutdown)
         signal.signal(signal.SIGTERM, self._handle_shutdown)
+        self.logger.info("信号处理器注册完成")
 
     def _handle_shutdown(self, signum, frame):
         """处理退出信号"""
@@ -65,15 +80,15 @@ class Application:
             self.job = JobRunner()
             self.job.run_job()
 
-            self.logger.info(f"应用程序启动成功! 任务执行检测间隔: {self.interval}秒")
+            self.logger.info(f"应用程序启动成功! 任务执行间隔: {self.interval}秒")
 
             # 主循环
             while self.running:
                 try:
+                    schedule.run_pending()
                     now = datetime.datetime.now()
                     self._check_reload(now)
                     time.sleep(self.interval)
-                    schedule.run_pending()
                 except Exception as e:
                     self.logger.error(f"主循环执行异常: {e}")
                     time.sleep(self.interval)
@@ -82,10 +97,14 @@ class Application:
             self.logger.error(f"应用程序运行异常: {e}")
             sys.exit(1)
         finally:
-            self.logger.info("应用程序正在关闭...")
-            if self.job:
-                self.job.stop_job()
-            self.logger.info("应用程序已关闭")
+            self._cleanup()
+
+    def _cleanup(self):
+        """清理资源"""
+        self.logger.info("应用程序正在关闭...")
+        if self.job:
+            self.job.stop_job()
+        self.logger.info("应用程序已关闭")
 
 
 if __name__ == "__main__":

+ 36 - 43
SourceCode/TenderCrawler/app/models/process_data.py

@@ -13,7 +13,7 @@ class ProcessData:
         url=None,
         keyword=None,
         date=None,
-        provice=None,
+        province=None,
         city=None,
         address=None,
         summary=None,
@@ -33,12 +33,12 @@ class ProcessData:
         self.title = title
         self.url = url
         self.date = date
-        self.provice = provice.replace("省", "").replace("市", "") if provice else ""
-        self.city = (
-            city.replace("市", "").replace("区", "").replace("县", "") if city else ""
-        )
-        if self.provice == self.city:
-            self.provice = ""
+        self.province = province.replace("省", "").replace(
+            "市", "") if province else ""
+        self.city = (city.replace("市", "").replace("区", "").replace("县", "")
+                     if city else "")
+        if self.province == self.city:
+            self.province = ""
         self.keyword = keyword
         self.address = address
         self.summary = summary
@@ -57,13 +57,12 @@ class ProcessData:
     def __repr__(self):
         return (
             f"ProcessData(no={self.no}, title={self.title}, date={self.date}, "
-            f"provice={self.provice},city={self.city}, address={self.address}, summary={self.summary}, "
+            f"province={self.province},city={self.city}, address={self.address}, summary={self.summary}, "
             f"status={self.status}, create_time={self.create_time}, "
-            f"send_time={self.send_time}, remark={self.remark})"
-        )
+            f"send_time={self.send_time}, remark={self.remark})")
 
     _insert_query = """
-              INSERT IGNORE INTO t_data (no, title, url, keyword, date, provice, city, address, summary, release_date, devices, attach_path, status, create_time, prompt_tokens, completion_tokens, total_tokens)
+              INSERT IGNORE INTO t_data (no, title, url, keyword, date, province, city, address, summary, release_date, devices, attach_path, status, create_time, prompt_tokens, completion_tokens, total_tokens)
               VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
           """
 
@@ -80,7 +79,7 @@ class ProcessData:
             process_data.url,
             process_data.keyword,
             process_data.date,
-            process_data.provice,
+            process_data.province,
             process_data.city,
             process_data.address,
             process_data.summary,
@@ -103,33 +102,29 @@ class ProcessData:
 
     def insert_batch(self, process_data_list):
         if not all(
-            isinstance(process_data, self.__class__)
-            for process_data in process_data_list
-        ):
+                isinstance(process_data, self.__class__)
+                for process_data in process_data_list):
             raise TypeError("process_data_list 中的所有元素必须是 ProcessData 的实例")
 
-        insert_params = [
-            (
-                process_data.no,
-                process_data.title,
-                process_data.url,
-                process_data.keyword,
-                process_data.date,
-                process_data.provice,
-                process_data.city,
-                process_data.address,
-                process_data.summary,
-                process_data.release_date,
-                process_data.devices,
-                process_data.attach_path,
-                0,
-                datetime.now(),
-                process_data.prompt_tokens,
-                process_data.completion_tokens,
-                process_data.total_tokens,
-            )
-            for process_data in process_data_list
-        ]
+        insert_params = [(
+            process_data.no,
+            process_data.title,
+            process_data.url,
+            process_data.keyword,
+            process_data.date,
+            process_data.province,
+            process_data.city,
+            process_data.address,
+            process_data.summary,
+            process_data.release_date,
+            process_data.devices,
+            process_data.attach_path,
+            0,
+            datetime.now(),
+            process_data.prompt_tokens,
+            process_data.completion_tokens,
+            process_data.total_tokens,
+        ) for process_data in process_data_list]
 
         # update_params = [(process_data.url, )
         #                  for process_data in process_data_list]
@@ -148,7 +143,7 @@ class ProcessData:
 
     def fetch_one_process_by_url(self, url: str):
         with MySQLHelper() as db_helper:
-            result = db_helper.fetch_one(self._one_url_query, (url,))
+            result = db_helper.fetch_one(self._one_url_query, (url, ))
             if not result:
                 return None
             data = ProcessData(
@@ -166,7 +161,7 @@ class ProcessData:
     def fetch_one_process_by_no(self, no: str):
         with MySQLHelper() as db_helper:
 
-            result = db_helper.fetch_one(self._one_no_query, (no,))
+            result = db_helper.fetch_one(self._one_no_query, (no, ))
             if not result:
                 return None
             data = ProcessData(
@@ -209,10 +204,8 @@ class ProcessData:
         :return: 删除的行数
         """
         with MySQLHelper() as db_helper:
-            params = (date,)
+            params = (date, )
             db_helper.execute_non_query(self._delete_before_date_query, params)
             affected_rows = db_helper.connection.affected_rows()
-            utils.get_logger().info(
-                f"删除 {date} 之前共 {affected_rows} 条 招标处理记录。"
-            )
+            utils.get_logger().info(f"删除 {date} 之前共 {affected_rows} 条 招标处理记录。")
             return affected_rows

+ 42 - 52
SourceCode/TenderCrawler/app/models/process_result_data.py

@@ -6,6 +6,7 @@ from utils.mysql_helper import MySQLHelper
 
 
 class InstrumentData:
+
     def __init__(
         self,
         company: str,
@@ -34,7 +35,7 @@ class ProcessResultData:
         title=None,
         url=None,
         keyword=None,
-        provice=None,
+        province=None,
         city=None,
         date=None,
         instruments=None,
@@ -58,12 +59,12 @@ class ProcessResultData:
         self.instruments_str = ""
         self.instruments = []
         self.set_instruments(instruments, instruments_o)
-        self.provice = provice.replace("省", "").replace("市", "") if provice else ""
-        self.city = (
-            city.replace("市", "").replace("区", "").replace("县", "") if city else ""
-        )
-        if self.provice == self.city:
-            self.provice = ""
+        self.province = province.replace("省", "").replace(
+            "市", "") if province else ""
+        self.city = (city.replace("市", "").replace("区", "").replace("县", "")
+                     if city else "")
+        if self.province == self.city:
+            self.province = ""
         self.summary = summary
         self.attach_path = attach_path
         self.status = status
@@ -78,10 +79,9 @@ class ProcessResultData:
     def __repr__(self):
         return (
             f"ProcessResultData(no={self.no}, title={self.title}, date={self.date}, "
-            f"keyword={self.keyword}, provice={self.provice},city={self.city},instruments={self.instruments_str} summary={self.summary}, attach_path={self.attach_path}, "
+            f"keyword={self.keyword}, province={self.province},city={self.city},instruments={self.instruments_str} summary={self.summary}, attach_path={self.attach_path}, "
             f"status={self.status}, create_time={self.create_time}, "
-            f"send_time={self.send_time}, remark={self.remark})"
-        )
+            f"send_time={self.send_time}, remark={self.remark})")
 
     def set_instruments(self, instruments_str: str, instruments):
         if instruments is None:
@@ -94,19 +94,16 @@ class ProcessResultData:
             ]
         else:
             self.instruments = instruments or []
-            self.instruments_str = (
-                json.dumps(
-                    instruments,
-                    ensure_ascii=False,
-                )
-                if len(instruments) > 0
-                else ""
-            )
+            self.instruments_str = (json.dumps(
+                instruments,
+                ensure_ascii=False,
+            ) if len(instruments) > 0 else "")
 
     _insert_query = """
-              INSERT IGNORE INTO t_data_result (no, title, url, keyword, date, provice, city, instruments, summary, attach_path, status, create_time, prompt_tokens, completion_tokens, total_tokens)
+              INSERT IGNORE INTO t_data_result (no, title, url, keyword, date, province, city, instruments, summary, attach_path, status, create_time, prompt_tokens, completion_tokens, total_tokens)
               VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
           """
+
     # _update_query = """
     #             UPDATE t_collect_data SET status = 1 WHERE url = %s;
     #         """
@@ -121,7 +118,7 @@ class ProcessResultData:
             process_result_data.url,
             process_result_data.keyword,
             process_result_data.date,
-            process_result_data.provice,
+            process_result_data.province,
             process_result_data.city,
             process_result_data.instruments_str,
             process_result_data.summary,
@@ -142,33 +139,28 @@ class ProcessResultData:
 
     def insert_batch(self, process_result_data_list):
         if not all(
-            isinstance(process_result_data, self.__class__)
-            for process_result_data in process_result_data_list
-        ):
+                isinstance(process_result_data, self.__class__)
+                for process_result_data in process_result_data_list):
             raise TypeError(
-                "process_result_data_list 中的所有元素必须是 ProcessResultData 的实例"
-            )
+                "process_result_data_list 中的所有元素必须是 ProcessResultData 的实例")
 
-        insert_params = [
-            (
-                process_result_data.no,
-                process_result_data.title,
-                process_result_data.url,
-                process_result_data.keyword,
-                process_result_data.date,
-                process_result_data.provice,
-                process_result_data.city,
-                process_result_data.instruments_str,
-                process_result_data.summary,
-                process_result_data.attach_path,
-                0,
-                datetime.now(),
-                process_result_data.prompt_tokens,
-                process_result_data.completion_tokens,
-                process_result_data.total_tokens,
-            )
-            for process_result_data in process_result_data_list
-        ]
+        insert_params = [(
+            process_result_data.no,
+            process_result_data.title,
+            process_result_data.url,
+            process_result_data.keyword,
+            process_result_data.date,
+            process_result_data.province,
+            process_result_data.city,
+            process_result_data.instruments_str,
+            process_result_data.summary,
+            process_result_data.attach_path,
+            0,
+            datetime.now(),
+            process_result_data.prompt_tokens,
+            process_result_data.completion_tokens,
+            process_result_data.total_tokens,
+        ) for process_result_data in process_result_data_list]
 
         # update_params = [(process_result_data.url, )
         #                  for process_result_data in process_result_data_list]
@@ -187,7 +179,7 @@ class ProcessResultData:
 
     def fetch_one_process_by_url(self, url: str):
         with MySQLHelper() as db_helper:
-            result = db_helper.fetch_one(self._one_url_query, (url,))
+            result = db_helper.fetch_one(self._one_url_query, (url, ))
             if not result:
                 return None
             data = ProcessResultData(
@@ -204,7 +196,7 @@ class ProcessResultData:
 
     def fetch_one_process_by_no(self, no: str):
         with MySQLHelper() as db_helper:
-            result = db_helper.fetch_one(self._one_no_query, (no,))
+            result = db_helper.fetch_one(self._one_no_query, (no, ))
             if not result:
                 return None
             data = ProcessResultData(
@@ -215,7 +207,7 @@ class ProcessResultData:
             )
             return data
 
-    _not_send_query = "SELECT no, title, url, keyword, date, provice, city, instruments, summary, attach_path, status, create_time, send_time FROM t_data_result WHERE status = 0"
+    _not_send_query = "SELECT no, title, url, keyword, date, province, city, instruments, summary, attach_path, status, create_time, send_time FROM t_data_result WHERE status = 0"
 
     def fetch_not_send(self):
         with MySQLHelper() as db_helper:
@@ -265,10 +257,8 @@ class ProcessResultData:
         :return:
         """
         with MySQLHelper() as db_helper:
-            params = (date,)
+            params = (date, )
             db_helper.execute_non_query(self._delete_before_date_query, params)
             affected_rows = db_helper.connection.affected_rows()
-            utils.get_logger().info(
-                f"删除 {date} 之前共 {affected_rows} 条 中标处理记录。"
-            )
+            utils.get_logger().info(f"删除 {date} 之前共 {affected_rows} 条 中标处理记录。")
             return affected_rows

+ 2 - 2
SourceCode/TenderCrawler/init.sql

@@ -133,7 +133,7 @@ CREATE TABLE `t_data`  (
   `no` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '招标编号',
   `title` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '招标标题',
   `date` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '招标时间',
-  `provice` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '招标单位省份',
+  `province` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '招标单位省份',
   `city` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '招标单位城市',
   `address` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '详细地点',
   `summary` text CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL COMMENT '招标摘要',
@@ -161,7 +161,7 @@ CREATE TABLE `t_data_result`  (
   `no` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '招标编号',
   `title` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '招标标题',
   `date` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '公告时间',
-  `provice` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '招标单位省份',
+  `province` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '招标单位省份',
   `city` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '招标单位城市',
   `instruments` text CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '相关设备仪器',
   `summary` text CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL COMMENT '公告摘要',