瀏覽代碼

Update 月度中标报告报表添加excel文件

YueYunyun 6 月之前
父節點
當前提交
cdb6cffd03

+ 1 - 0
.gitignore

@@ -162,3 +162,4 @@ cython_debug/
 .dev/
 logs/
 attaches/
+temp_files/

+ 2 - 3
SourceCode/TenderCrawler/app/adapters/ccgp_data_collection_adapter.py

@@ -38,7 +38,7 @@ class CcgpDataCollectionAdapter(IDataCollectionAdapter):
         try:
             if not keyword:
                 raise Exception("搜索关键字不能为空")
-            wait = WebDriverWait(self.driver, 10, 1)
+            wait = WebDriverWait(self.driver, 30, 1)
             wait.until(ec.presence_of_element_located((By.ID, "searchForm")))
             search_el = self.driver.find_element(By.ID, "kw")
             sleep(2)
@@ -134,8 +134,7 @@ class CcgpDataCollectionAdapter(IDataCollectionAdapter):
             if self._check_is_collect_by_url(url):
                 close = False
                 return
-            # utils.get_logger().info(f"跳转详情")
-            print(".", end="")
+            utils.get_logger().debug(f"跳转详情")
             sleep(1)
             item.click()
             wait.until(ec.number_of_windows_to_be(2))

+ 3 - 1
SourceCode/TenderCrawler/app/config.yml

@@ -19,7 +19,8 @@ save:
   collect_data_key: '红外光谱仪,拉曼光谱仪'
   collect_batch_size: 100
   process_batch_size: 1 #AI处理一条插入一条
-  attach_file_path: './attaches/'
+  attach_file_path: './temp_files/attaches/'
+  report_file_path: './temp_files/report/'
 mysql:
   host: 192.168.0.81
   port: 3307
@@ -72,3 +73,4 @@ clean:
   collect_data: 30 # 清理多少天前的采集数据 0不清理
   process_data: 30 # 清理多少天前的处理数据[招标] 0不清理
   process_result_data: 60 # 清理多少天前的处理数据[中标] 0不清理 小于45会强制设为45
+  report: 90 # 清理多少天前的报表 0不清理  小于60会强制设为60

+ 12 - 0
SourceCode/TenderCrawler/app/jobs/data_clean.py

@@ -8,6 +8,9 @@ class DataClean:
     def __init__(self):
         self._clean_day = utils.get_config_int("clean.day", 30)
         self._clean_attach_day = utils.get_config_int("clean.attach", self._clean_day)
+        self._clean_report_day = utils.get_config_int("clean.report", self._clean_day)
+        if self._clean_report_day < 60:
+            self._clean_report_day = 60
         self._clean_log_day = utils.get_config_int("clean.log", self._clean_day)
         self._clean_collect_data_day = utils.get_config_int(
             "clean.collect_data", self._clean_day
@@ -30,6 +33,7 @@ class DataClean:
             self._clean_collect_data()
             self._clean_process_data()
             self._clean_process_result_data()
+            self._clean_report()
             utils.get_logger().info("清除历史文件数据 完成")
         except Exception as e:
             utils.get_logger().error(e)
@@ -42,6 +46,14 @@ class DataClean:
         utils.clean_attach_file(self._clean_attach_day)
         utils.get_logger().info("清除历史附件数据 完成")
 
+    def _clean_report(self):
+        if self._clean_report_day == 0:
+            utils.get_logger().info("跳过 清除中标报告报表")
+            return
+        utils.get_logger().info("开始 清除中标报告报表")
+        utils.clean_report_file(self._clean_report_day)
+        utils.get_logger().info("清除中标报告报表 完成")
+
     def _clean_log(self):
         if self._clean_log_day == 0:
             utils.get_logger().info("跳过 清除历史日志数据")

+ 70 - 57
SourceCode/TenderCrawler/app/jobs/data_send.py

@@ -52,7 +52,8 @@ class DataSend:
         title_prev = utils.get_config_value("email.report_title_prev", "【中标报告】")
         title = f"{start_date.month}月中标结果报告"
         body = self._build_report_email_html(title, items)
-        flag = utils.send_email(email, f"{title_prev} {title}", body, True)
+        attach_path = self._gen_report_exlecl(title, items)
+        flag = utils.send_email(email, f"{title_prev} {title}", body, True, attach_path)
         if flag:
             utils.get_logger().info("发送中标报告邮件成功")
 
@@ -177,10 +178,8 @@ class DataSend:
         """
         return html_body
 
-    def _build_report_email_html(self, title, items) -> str:
-        body = ""
-        for item in items:
-            body += self._build_report_email_body(item)
+    def _build_report_email_html(self, title, items: list[ProcessResultData]) -> str:
+        body = self._build_report_email_body(items)
         html = f"""
         <html>
         <head>
@@ -196,63 +195,52 @@ class DataSend:
                     color: #333;
                 }}
                 .container {{
-                    max-width: 600px;
+                    max-width: 1000px;
                     margin: 0 auto;
                     background-color: #fff;
                     padding: 20px;
                     border-radius: 8px;
                     box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
                 }}
-                .button-container {{
-                    text-align: center;
-                    margin-top: 20px;
-                }}
-                .button {{
-                    display: inline-block;
-                    padding: 10px 20px;
-                    font-size: 16px;
-                    color: #fff!important;
-                    background-color: #007bff;
-                    text-decoration: none;
-                    border-radius: 5px;
-                    transition: background-color 0.3s;
-                }}
-                .button:hover {{
-                    background-color: #0056b3;
-                }}
                 .system {{
                     color: #aaa;
+                    font-size: 80%;
                 }}
-                .card {{
+                .table-container {{
+                    overflow-x: auto;
+                    width: 100%;
+                }}
+                .table {{
+                    width: 1000px;
                     background-color: #ffffff;
                     border: 1px solid #dddddd;
                     border-radius: 8px;
                     margin-bottom: 20px;
                     padding: 20px;
                     box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+                    border-collapse: collapse;
                 }}
-                .card h2 {{
-                    margin-top: 0;
+                .table th, .table td {{
+                    padding: 5px;
+                    border-bottom: 1px solid #dddddd;
+                    word-wrap: break-word;
+                    text-align: center;
+                    font-size:12px;
                 }}
-                .card p {{
-                    margin: 0;
+                .table th:not(:first-child), .table td:not(:first-child) {{
+                    border-left: 1px solid #dddddd;
                 }}
-                .button-container {{
-                    text-align: center;
-                    margin-top: 15px;
+                .table th {{
+                    padding: 10px;
+                    background-color: #f8f9fa;
+                    font-weight: bold;
+                    font-size:14px;
                 }}
-                .button {{
-                    display: inline-block;
-                    padding: 6px 15px;
-                    font-size: 14px;
-                    color: #fff!important;
-                    background-color: #007bff;
-                    text-decoration: none;
-                    border-radius: 3px;
-                    transition: background-color 0.3s;
+                .table tr:last-child td {{
+                    border-bottom: none;
                 }}
-                .button:hover {{
-                    background-color: #0056b3;
+                .table td a{{
+                    color: #007bff;
                 }}
             </style>
         </head>
@@ -268,23 +256,48 @@ class DataSend:
         return html
 
     @staticmethod
-    def _build_report_email_body(item: ProcessResultData) -> str:
-        body = f"""
-           <div class="card">
-               <h2>{item.title}</h2>
-               <p><strong>项目编号:</strong> {item.no}</p>
-               <p><strong>公告日期:</strong> {item.date}</p>
-               <p><strong>关键词:</strong> {item.keyword}</p>
-               <p><strong>价格:</strong> {item.price}</p>
-               <p><strong>中标人:</strong> {item.bidder}</p>
-               <p><strong>摘要:</strong> {item.summary}</p>
-               <div class="button-container">
-                <a href="{item.url}" class="button">查看详情</a>
-               </div>
-           </div>
-           """
+    def _build_report_email_body(items: list[ProcessResultData]) -> str:
+        if not items:
+            return ""
+
+        body = """
+        <div class="table-container">
+            <table class="table">
+                <tr>
+                    <th style="width:200px">项目名称</th>
+                    <th style="width:150px">公告日期</th>
+                    <th style="width:120px">价格</th>
+                    <th>中标人</th>
+                </tr>
+        """
+        for item in items:
+            body += f"""
+            <tr>
+                <td><a  title="点击查看详情" href="{item.url}">{item.title}</a></td>
+                <td>{item.date}</td>
+                <td>{item.price}</td>
+                <td>{item.bidder}</td>
+            </tr>
+            """
+        body += "</table></div>"
         return body
 
+    @staticmethod
+    def _gen_report_exlecl(title, items: list[ProcessResultData]) -> str:
+        if not items:
+            return ""
+            # 将 list 数据转换为 DataFrame
+        data = {
+            "项目编号": [item.no for item in items],
+            "项目名称": [item.title for item in items],
+            "公告日期": [item.date for item in items],
+            "价格": [item.price for item in items],
+            "中标人": [item.bidder for item in items],
+            "公告摘要": [item.summary for item in items],
+            "URL": [item.url for item in items],
+        }
+        return utils.save_reort_excel(data, title)
+
     def _send_email_no_found(self) -> None:
         email = utils.get_config_value("email.error_email")
         utils.get_logger().info(f"开始发送区域邮箱未匹配邮件: {email}")

+ 20 - 0
SourceCode/TenderCrawler/app/utils/__init__.py

@@ -108,6 +108,26 @@ def clean_attach_file(day: int):
     FileHelper().clean_attach_file(day)
 
 
+def save_reort_excel(data, file_name: str = None) -> str:
+    """
+    保存报表数据到Excel文件。
+
+    :param data: 列表,报表数据。
+    :param file_name: 字符串,保存的文件名(可选)。
+    :return: 字符串,保存的文件路径。
+    """
+    return FileHelper().save_report_excel(data, file_name)
+
+
+def clean_report_file(day: int):
+    """
+    清理指定天数之前的报表文件。
+
+    :param day: 整数,表示清理多少天前的报表文件。
+    """
+    FileHelper().clean_report_file(day)
+
+
 def to_array(s: str, split: str = ",") -> list[str]:
     """
     将字符串按指定分隔符拆分为数组。

+ 70 - 9
SourceCode/TenderCrawler/app/utils/file_helper.py

@@ -1,21 +1,29 @@
-import os, shutil,utils
+import os, shutil, utils
+import pandas as pd
 from datetime import datetime, timedelta
 from urllib.parse import urlparse
 
 import requests
 
 
-
-
 class FileHelper:
 
-    DEFAULT_ATTACH_PATH = "./attaches/"
+    DEFAULT_ATTACH_PATH = "./temp_files/attaches/"
+    DEFAULT_REPORT_PATH = "./temp_files/reoport/"
 
     def __init__(self):
-        path = utils.get_config_value("save.attach_file_path", self.DEFAULT_ATTACH_PATH)
-        path = path.replace("\\", "/")
-        path = path.replace("//", "/")
-        self._attach_file_path = path
+        attach_path = utils.get_config_value(
+            "save.attach_file_path", self.DEFAULT_ATTACH_PATH
+        )
+        attach_path = attach_path.replace("\\", "/")
+        attach_path = attach_path.replace("//", "/")
+        self._attach_file_path = attach_path
+        report_path = utils.get_config_value(
+            "save.report_file_path", self.DEFAULT_REPORT_PATH
+        )
+        report_path = report_path.replace("\\", "/")
+        report_path = report_path.replace("//", "/")
+        self._report_file_path = report_path
 
     def download_remote_file(self, file_url: str, file_name: str) -> str | None:
         utils.get_logger().info(f"下载远程文件: {file_url}  文件名:{file_name}")
@@ -103,4 +111,57 @@ class FileHelper:
                         # 如果目录名称不符合 %Y-%m/%d 格式,跳过
                         continue
         except Exception as e:
-            utils.get_logger().error(f"文件清理失败。Exception: {e}")
+            utils.get_logger().error(f"attach 文件清理失败。Exception: {e}")
+
+    def save_report_excel(self, data, file_name: str = None) -> str:
+        try:
+            df = pd.DataFrame(data)
+            file_path = os.path.join(
+                self._report_file_path, f'{datetime.now().strftime("%Y-%m-%d")}'
+            )
+            if not os.path.exists(file_path):
+                os.makedirs(file_path)
+            file_name = f"{file_name}_{datetime.now().strftime('%H%M%S')}.xlsx"
+            path = os.path.join(file_path, file_name)
+            path = path.replace("\\", "/")
+            path = path.replace("//", "/")
+            df.to_excel(path, index=False)
+            utils.get_logger().debug(f"Report报存成功: {file_name}")
+            return path
+        except Exception as e:
+            utils.get_logger().error(f"保存 Report Excel 文件失败。Exception: {e}")
+            return ""
+
+    def clean_report_file(self, day: int) -> None:
+        try:
+            current_time = datetime.now()
+            cutoff_time = current_time - timedelta(days=day)
+            for root, dirs, _ in os.walk(self._report_file_path):
+                for dir_name in dirs:
+                    path = os.path.join(root, dir_name)
+                    dir_path = (
+                        str(path).replace(self._report_file_path, "").replace("\\", "/")
+                    )
+                    if dir_path.count("/") > 0:
+                        continue
+                    try:
+                        dir_date = datetime.strptime(dir_path, "%Y-%m-%d")
+                        if dir_date < cutoff_time:
+                            try:
+                                shutil.rmtree(path)
+                                utils.get_logger().info(
+                                    f"  Report 删除目录及其内容: {dir_path}"
+                                )
+                            except PermissionError:
+                                utils.get_logger().error(
+                                    f"  Report 权限错误,无法删除目录: {dir_path}"
+                                )
+                            except Exception as e:
+                                utils.get_logger().error(
+                                    f"  Report 删除目录失败: {dir_path}。Exception: {e}"
+                                )
+                    except ValueError:
+                        # 如果目录名称不符合 %Y-%m/%d 格式,跳过
+                        continue
+        except Exception as e:
+            utils.get_logger().error(f"Report 文件清理失败。Exception: {e}")

+ 2 - 2
SourceCode/TenderCrawler/docker-compose.yml

@@ -59,12 +59,12 @@ services:
       - APP_JOB__COLLECT=20:00,12:00
       - APP_JOB__PROCESS=23:00,4:00,13:00
       - APP_JOB__SEND_EMAIL=08:20,14:00
-      - APP_JOB__RUN_NOW=1
+      - APP_JOB__RUN_NOW=0
       - APP_SELENIUM__REMOTE_DRIVER_URL=http://y_selenium:4444/wd/hub
     volumes:
       - /home/docker/tender-crawler_v2/app/config.yml:/app/config.yml
       - /home/docker/tender-crawler_v2/app/logs:/app/logs
-      - /home/docker/tender-crawler_v2/app/attaches:/app/attaches
+      - /home/docker/tender-crawler_v2/app/temp_files:/app/temp_files
     #      - ./.dev/app/config.yml:/app/config.yml
     #      - ./.dev/app/logs:/app/logs
     #      - ./.dev/app/attaches:/app/attaches

+ 2 - 1
SourceCode/TenderCrawler/requirements.txt

@@ -1,9 +1,10 @@
 PyMySQL==1.1.1
 python_dateutil==2.9.0.post0
 PyYAML==6.0.2
-PyYAML==6.0.2
 Requests==2.32.3
 schedule==1.2.2
 selenium==4.27.1
 cryptography==41.0.4
 openai==1.58.1
+pandas~=2.2.3
+openpyxl==3.1.5