Browse Source

tools-dev-t1

klzhangweiya 4 months ago
parent
commit
59bf61cc66

+ 24 - 0
SourceCode/DataMiddleware/tools/config.yml

@@ -0,0 +1,24 @@
+mysql:
+  host: 192.168.0.81
+  port: 3307
+  db: iwb_data_tielu_pdf_dev
+  user: root
+  password: Iwb-2024
+  charset: utf8mb4
+logger:
+  file_path: './logs/'
+  level: 'debug'
+app:
+  port: 5223
+
+ai:
+  #  url: http://192.168.0.109:7580/api/chat
+  #  model: qwen2.5:7b
+  key: sk-febca8fea4a247f096cedeea9f185520
+  url: https://dashscope.aliyuncs.com/compatible-mode/v1
+  model: qwen2.5-vl-72b-instruct
+  max_tokens: 10240
+
+fastgpt:
+  url: http://192.168.0.104:8020/api/v1/chat/completions
+  key: fastgpt-pzXtKVjkBU8NW8MUqZ7WnEfqK3m8qP6wmDdfcBgOaK2PZDekoHM1

+ 30 - 0
SourceCode/DataMiddleware/tools/init.sql

@@ -0,0 +1,30 @@
+-- 创建数据库
+CREATE DATABASE IF NOT EXISTS iwb_data_tielu_pdf_dev CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;
+USE iwb_data_tielu_pdf_dev;
+-- 创建标准表
+CREATE TABLE IF NOT EXISTS pdf_standard (
+    id INT AUTO_INCREMENT PRIMARY KEY,
+    standard_code VARCHAR(100) COMMENT '标准编号',
+    standard_name VARCHAR(255) COMMENT '标准名称',
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
+    INDEX idx_standard_code (standard_code),
+    INDEX idx_standard_name (standard_name)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='PDF标准表';
+
+-- 创建PDF记录表
+CREATE TABLE IF NOT EXISTS pdf_records (
+    id INT AUTO_INCREMENT PRIMARY KEY,
+    standard_name VARCHAR(255) COMMENT '标准名称',
+    chapter_name VARCHAR(255) COMMENT '章节名称',
+    section_name VARCHAR(255) COMMENT '节名称',
+    subsection_name VARCHAR(255) COMMENT '小节名称',
+    pdf_path VARCHAR(500) COMMENT 'PDF文件路径',
+    image_path TEXT COMMENT '生成的图片路径',
+    markdown_text TEXT COMMENT 'AI分析生成的Markdown文本',
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
+    INDEX idx_standard_code (standard_code),
+    INDEX idx_standard_name (standard_name),
+    INDEX idx_chapter_name (chapter_name)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='PDF文件处理记录表';

+ 23 - 0
SourceCode/DataMiddleware/tools/main.py

@@ -0,0 +1,23 @@
+from pdf_split.processor import PDFProcessor
+import tools.utils as utils
+from tools.pdf_split.mysql_store import MysqlStore
+
+@app.route('/standards')
+def show_standards():
+    store = MysqlStore()
+    standards = store.get_all_standards()
+    records = store.get_all_pdf_records()
+    return render_template('standards.html', standards=standards, records=records)
+def main():
+    log = utils.get_logger()
+    #"""PDF拆分工具的主入口函数"""
+    #log.info("PDF拆分工具已启动")
+    #PDFProcessor.split("01")
+    """图片解析成文本入口函数"""
+    log.info("图片解析成文本工具已启动")
+    PDFProcessor.process_image_to_txt("01")
+
+
+
+if __name__ == '__main__':
+    main()

+ 0 - 0
SourceCode/DataMiddleware/tools/models/__init__.py


+ 38 - 0
SourceCode/DataMiddleware/tools/models/pdf_records.py

@@ -0,0 +1,38 @@
+from datetime import datetime
+
+class Record:
+    def __init__(self,
+                 standard_name: str,
+                 chapter_name: str,
+                 section_name: str,
+                 subsection_name: str,
+                 pdf_path: str,
+                 image_path: str,
+                 markdown_text: str,
+                 id: int = None,
+                 created_at: datetime = None,
+                 updated_at: datetime = None):
+        self.id = id  # 自增ID
+        self.standard_name = standard_name  # 标准名称
+        self.chapter_name = chapter_name  # 章节名称
+        self.section_name = section_name  # 节名称
+        self.subsection_name = subsection_name  # 小节名称
+        self.pdf_path = pdf_path  # PDF路径
+        self.image_path = image_path  # 图片路径
+        self.markdown_text = markdown_text  # Markdown文本
+        self.created_at = created_at or datetime.now()  # 创建时间
+        self.updated_at = updated_at or datetime.now()  # 更新时间
+
+    def to_dict(self):
+        return {
+            'id': self.id,
+            'standard_name': self.standard_name,
+            'chapter_name': self.chapter_name,
+            'section_name': self.section_name,
+            'subsection_name': self.subsection_name,
+            'pdf_path': self.pdf_path,
+            'image_path': self.image_path,
+            'markdown_text': self.markdown_text,
+            'created_at': self.created_at.strftime('%Y-%m-%d %H:%M:%S'),
+            'updated_at': self.updated_at.strftime('%Y-%m-%d %H:%M:%S')
+        }

+ 23 - 0
SourceCode/DataMiddleware/tools/models/pdf_standard.py

@@ -0,0 +1,23 @@
+from datetime import datetime
+
+class Standard:
+    def __init__(self,
+                 standard_code: str,
+                 standard_name: str,
+                 id: int = None,
+                 created_at: datetime = None,
+                 updated_at: datetime = None):
+        self.id = id  # 自增ID
+        self.standard_code = standard_code  # 标准编号
+        self.standard_name = standard_name  # 标准名称
+        self.created_at = created_at or datetime.now()  # 创建时间
+        self.updated_at = updated_at or datetime.now()  # 更新时间
+
+    def to_dict(self):
+        return {
+            'id': self.id,
+            'standard_code': self.standard_code,
+            'standard_name': self.standard_name,
+            'created_at': self.created_at.strftime('%Y-%m-%d %H:%M:%S'),
+            'updated_at': self.updated_at.strftime('%Y-%m-%d %H:%M:%S')
+        }

+ 67 - 67
SourceCode/DataMiddleware/tools/pdf_json/v1/01.json

@@ -5,12 +5,12 @@
 			{
 				"start_page": 3,
 				"end_page": 8,
-				"output_name": "01沟槽及管道@01光(电)缆沟@01光(电)缆沟.pdf"
+				"output_name": "01沟槽及管道@01光(电)缆沟@01光(电)缆沟"
 			},
 			{
 				"start_page": 9,
 				"end_page": 13,
-				"output_name": "01沟槽及管道@02铺设光(电)缆管道@01铺设光(电)缆管道.pdf"
+				"output_name": "01沟槽及管道@02铺设光(电)缆管道@01铺设光(电)缆管道"
 			}
 		]
 	},
@@ -20,42 +20,42 @@
 			{
 				"start_page": 14,
 				"end_page": 23,
-				"output_name": "02光(电)缆线路@01敷设长途光缆@01敷设埋式光缆.pdf"
+				"output_name": "02光(电)缆线路@01敷设长途光缆@01敷设埋式光缆"
 			},
 			{
 				"start_page": 24,
 				"end_page": 26,
-				"output_name": "02光(电)缆线路@01敷设长途光缆@02敷设管道光缆.pdf"
+				"output_name": "02光(电)缆线路@01敷设长途光缆@02敷设管道光缆"
 			},
 			{
 				"start_page": 27,
 				"end_page": 32,
-				"output_name": "02光(电)缆线路@01敷设长途光缆@03敷设槽道光缆.pdf"
+				"output_name": "02光(电)缆线路@01敷设长途光缆@03敷设槽道光缆"
 			},
 			{
 				"start_page": 33,
 				"end_page": 33,
-				"output_name": "02光(电)缆线路@02安装光纤监测设备@01安装光纤监测设备.pdf"
+				"output_name": "02光(电)缆线路@02安装光纤监测设备@01安装光纤监测设备"
 			},
 			{
 				"start_page": 34,
 				"end_page": 42,
-				"output_name": "02光(电)缆线路@03敷设长途电缆@01敷设埋式长途电缆.pdf"
+				"output_name": "02光(电)缆线路@03敷设长途电缆@01敷设埋式长途电缆"
 			},
 			{
 				"start_page": 43,
 				"end_page": 45,
-				"output_name": "02光(电)缆线路@03敷设长途电缆@02敷设管道长途电缆.pdf"
+				"output_name": "02光(电)缆线路@03敷设长途电缆@02敷设管道长途电缆"
 			},
 			{
 				"start_page": 46,
 				"end_page": 50,
-				"output_name": "02光(电)缆线路@03敷设长途电缆@02敷设长途电缆.pdf"
+				"output_name": "02光(电)缆线路@03敷设长途电缆@02敷设长途电缆"
 			},
 			{
 				"start_page": 51,
 				"end_page": 64,
-				"output_name": "02光(电)缆线路@04敷设地区通信光(电)缆@01敷设地区通信光(电)缆.pdf"
+				"output_name": "02光(电)缆线路@04敷设地区通信光(电)缆@01敷设地区通信光(电)缆"
 			}
 		]
 	},
@@ -65,7 +65,7 @@
 			{
 				"start_page": 65,
 				"end_page": 69,
-				"output_name": "03配线架(柜)及走线架(槽)安装@01配线架(柜)及走线架(槽)安装@01配线架(柜)及走线架(槽)安装.pdf"
+				"output_name": "03配线架(柜)及走线架(槽)安装@01配线架(柜)及走线架(槽)安装@01配线架(柜)及走线架(槽)安装"
 			}
 		]
 	},
@@ -75,27 +75,27 @@
 			{
 				"start_page": 70,
 				"end_page": 73,
-				"output_name": "04传输及接入网设备@01安装传输及接入网设备@01安装传输及接入网设备.pdf"
+				"output_name": "04传输及接入网设备@01安装传输及接入网设备@01安装传输及接入网设备"
 			},
 			{
 				"start_page": 74,
 				"end_page": 77,
-				"output_name": "04传输及接入网设备@01安装传输及接入网设备@02安装传输及接入网设备.pdf"
+				"output_name": "04传输及接入网设备@01安装传输及接入网设备@02安装传输及接入网设备"
 			},
 			{
 				"start_page": 78,
 				"end_page": 79,
-				"output_name": "04传输及接入网设备@01安装传输及接入网设备@03安装传输及接入网设备.pdf"
+				"output_name": "04传输及接入网设备@01安装传输及接入网设备@03安装传输及接入网设备"
 			},
 			{
 				"start_page": 80,
 				"end_page": 82,
-				"output_name": "04传输及接入网设备@01安装传输及接入网设备@04安装传输及接入网设备.pdf"
+				"output_name": "04传输及接入网设备@01安装传输及接入网设备@04安装传输及接入网设备"
 			},
 			{
 				"start_page": 83,
 				"end_page": 84,
-				"output_name": "04传输及接入网设备@02安装传输及接入网网管设备@01安装传输及接入网网管设备.pdf"
+				"output_name": "04传输及接入网设备@02安装传输及接入网网管设备@01安装传输及接入网网管设备"
 			}
 		]
 	},
@@ -105,12 +105,12 @@
 			{
 				"start_page": 85,
 				"end_page": 86,
-				"output_name": "05数据通信网设备@01安装数据网设备@01安装数据网设备.pdf"
+				"output_name": "05数据通信网设备@01安装数据网设备@01安装数据网设备"
 			},
 			{
 				"start_page": 87,
 				"end_page": 88,
-				"output_name": "05数据通信网设备@02安装数据网网管设备@01安装数据网网管设备.pdf"
+				"output_name": "05数据通信网设备@02安装数据网网管设备@01安装数据网网管设备"
 			}
 		]
 	},
@@ -120,7 +120,7 @@
 			{
 				"start_page": 89,
 				"end_page": 91,
-				"output_name": "06电话交换设备@01电话交换设备@01电话交换设备.pdf"
+				"output_name": "06电话交换设备@01电话交换设备@01电话交换设备"
 			}
 		]
 	},
@@ -130,12 +130,12 @@
 			{
 				"start_page": 92,
 				"end_page": 94,
-				"output_name": "07有线调度通信系统设备@01安装与调试调度交换机@01安装与调试调度交换机.pdf"
+				"output_name": "07有线调度通信系统设备@01安装与调试调度交换机@01安装与调试调度交换机"
 			},
 			{
 				"start_page": 95,
 				"end_page": 99,
-				"output_name": "07有线调度通信系统设备@02安装与调试叫班系统@01安装与调试叫班系统.pdf"
+				"output_name": "07有线调度通信系统设备@02安装与调试叫班系统@01安装与调试叫班系统"
 			}
 		]
 	},
@@ -145,37 +145,37 @@
 			{
 				"start_page": 100,
 				"end_page": 106,
-				"output_name": "08数字移动通信系统(GSM-R)@01架设漏泄同轴电缆@01架设漏泄同轴电缆.pdf"
+				"output_name": "08数字移动通信系统(GSM-R)@01架设漏泄同轴电缆@01架设漏泄同轴电缆"
 			},
 			{
 				"start_page": 107,
 				"end_page": 110,
-				"output_name": "08数字移动通信系统(GSM-R)@02架设通信铁塔@01架设通信铁塔.pdf"
+				"output_name": "08数字移动通信系统(GSM-R)@02架设通信铁塔@01架设通信铁塔"
 			},
 			{
 				"start_page": 111,
 				"end_page": 115,
-				"output_name": "08数字移动通信系统(GSM-R)@03安装无线网及附属设备@01安装基站及无线中继设备.pdf"
+				"output_name": "08数字移动通信系统(GSM-R)@03安装无线网及附属设备@01安装基站及无线中继设备"
 			},
 			{
 				"start_page": 116,
 				"end_page": 117,
-				"output_name": "08数字移动通信系统(GSM-R)@03安装无线网及附属设备@02安装无线终端设备.pdf"
+				"output_name": "08数字移动通信系统(GSM-R)@03安装无线网及附属设备@02安装无线终端设备"
 			},
 			{
 				"start_page": 118,
 				"end_page": 119,
-				"output_name": "08数字移动通信系统(GSM-R)@03安装无线网及附属设备@03安装漏缆监测设备.pdf"
+				"output_name": "08数字移动通信系统(GSM-R)@03安装无线网及附属设备@03安装漏缆监测设备"
 			},
 			{
 				"start_page": 120,
 				"end_page": 122,
-				"output_name": "08数字移动通信系统(GSM-R)@04安装核心网设备@01安装核心网设备.pdf"
+				"output_name": "08数字移动通信系统(GSM-R)@04安装核心网设备@01安装核心网设备"
 			},
 			{
 				"start_page": 123,
 				"end_page": 124,
-				"output_name": "08数字移动通信系统(GSM-R)@05移动通信系统调试@01移动通信系统调试.pdf"
+				"output_name": "08数字移动通信系统(GSM-R)@05移动通信系统调试@01移动通信系统调试"
 			}
 		]
 	},
@@ -185,7 +185,7 @@
 			{
 				"start_page": 125,
 				"end_page": 128,
-				"output_name": "09会议电视系统设备@01会议电视系统设备@01会议电视系统设备.pdf"
+				"output_name": "09会议电视系统设备@01会议电视系统设备@01会议电视系统设备"
 			}
 		]
 	},
@@ -195,12 +195,12 @@
 			{
 				"start_page": 129,
 				"end_page": 139,
-				"output_name": "10综合视频监控系统设备@01综合视频监控系统设备@01综合视频监控系统设备.pdf"
+				"output_name": "10综合视频监控系统设备@01综合视频监控系统设备@01综合视频监控系统设备"
 			},
 			{
 				"start_page": 140,
 				"end_page": 143,
-				"output_name": "10综合视频监控系统设备@02安装视频采集点及汇集点设备@01安装视频采集点及汇集点设备.pdf"
+				"output_name": "10综合视频监控系统设备@02安装视频采集点及汇集点设备@01安装视频采集点及汇集点设备"
 			}
 		]
 	},
@@ -210,12 +210,12 @@
 			{
 				"start_page": 144,
 				"end_page": 146,
-				"output_name": "11应急通信系统设备@01安装应急通信系统设备@01安装应急通信系统设备.pdf"
+				"output_name": "11应急通信系统设备@01安装应急通信系统设备@01安装应急通信系统设备"
 			},
 			{
 				"start_page": 147,
 				"end_page": 149,
-				"output_name": "11应急通信系统设备@02安装隧道应急电话设备@01安装隧道应急电话设备.pdf"
+				"output_name": "11应急通信系统设备@02安装隧道应急电话设备@01安装隧道应急电话设备"
 			}
 		]
 	},
@@ -225,7 +225,7 @@
 			{
 				"start_page": 150,
 				"end_page": 152,
-				"output_name": "12时钟及时间同步系统设备@01时钟及时间同步系统设备@01时钟及时间同步系统设备.pdf"
+				"output_name": "12时钟及时间同步系统设备@01时钟及时间同步系统设备@01时钟及时间同步系统设备"
 			}
 		]
 	},
@@ -235,12 +235,12 @@
 			{
 				"start_page": 153,
 				"end_page": 157,
-				"output_name": "13通信电源设备及防雷接地装置@01安装通信电源设备@01安装通信电源设备.pdf"
+				"output_name": "13通信电源设备及防雷接地装置@01安装通信电源设备@01安装通信电源设备"
 			},
 			{
 				"start_page": 158,
 				"end_page": 160,
-				"output_name": "13通信电源设备及防雷接地装置@02安装防雷及地线装置@01安装防雷及地线装置.pdf"
+				"output_name": "13通信电源设备及防雷接地装置@02安装防雷及地线装置@01安装防雷及地线装置"
 			}
 		]
 	},
@@ -250,7 +250,7 @@
 			{
 				"start_page": 161,
 				"end_page": 162,
-				"output_name": "14电源及设备房屋环境监控设备@01电源及设备房屋环境监控设备@01电源及设备房屋环境监控设备.pdf"
+				"output_name": "14电源及设备房屋环境监控设备@01电源及设备房屋环境监控设备@01电源及设备房屋环境监控设备"
 			}
 		]
 	},
@@ -260,7 +260,7 @@
 			{
 				"start_page": 163,
 				"end_page": 168,
-				"output_name": "15综合布线@01综合布线@01综合布线.pdf"
+				"output_name": "15综合布线@01综合布线@01综合布线"
 			}
 		]
 	},
@@ -270,147 +270,147 @@
 			{
 				"start_page": 170,
 				"end_page": 170,
-				"output_name": "16附录_工程量组成@01沟槽及管道@01挖填光(电)缆沟.pdf"
+				"output_name": "16附录_工程量组成@01沟槽及管道@01挖填光(电)缆沟"
 			},
 			{
 				"start_page": 171,
 				"end_page": 172,
-				"output_name": "16附录_工程量组成@01沟槽及管道@02铺设光(电)缆管道.pdf"
+				"output_name": "16附录_工程量组成@01沟槽及管道@02铺设光(电)缆管道"
 			},
 			{
 				"start_page": 173,
 				"end_page": 178,
-				"output_name": "16附录_工程量组成@01沟槽及管道@01挖填光(电)缆沟.pdf"
+				"output_name": "16附录_工程量组成@01沟槽及管道@01挖填光(电)缆沟"
 			},
 			{
 				"start_page": 179,
 				"end_page": 179,
-				"output_name": "16附录_工程量组成@02光(电)缆敷设@02安装光纤监测设备.pdf"
+				"output_name": "16附录_工程量组成@02光(电)缆敷设@02安装光纤监测设备"
 			},
 			{
 				"start_page": 180,
 				"end_page": 182,
-				"output_name": "16附录_工程量组成@02光(电)缆敷设@03敷设长途电缆.pdf"
+				"output_name": "16附录_工程量组成@02光(电)缆敷设@03敷设长途电缆"
 			},
 			{
 				"start_page": 183,
 				"end_page": 186,
-				"output_name": "16附录_工程量组成@02光(电)缆敷设@04敷设地区通信光(电)缆.pdf"
+				"output_name": "16附录_工程量组成@02光(电)缆敷设@04敷设地区通信光(电)缆"
 			},
 			{
 				"start_page": 187,
 				"end_page": 188,
-				"output_name": "16附录_工程量组成@03配线架(柜)及走线架(槽)安装@01配线架(柜)及走线架(槽)安装.pdf"
+				"output_name": "16附录_工程量组成@03配线架(柜)及走线架(槽)安装@01配线架(柜)及走线架(槽)安装"
 			},
 			{
 				"start_page": 189,
 				"end_page": 193,
-				"output_name": "16附录_工程量组成@04传输及接入网设备@01安装传输及接入网设备.pdf"
+				"output_name": "16附录_工程量组成@04传输及接入网设备@01安装传输及接入网设备"
 			},
 			{
 				"start_page": 194,
 				"end_page": 194,
-				"output_name": "16附录_工程量组成@04传输及接入网设备@02安装传输及接入网网管设备.pdf"
+				"output_name": "16附录_工程量组成@04传输及接入网设备@02安装传输及接入网网管设备"
 			},
 				{
 				"start_page": 195,
 				"end_page": 195,
-				"output_name": "16附录_工程量组成@05数据通信网设备@01安装数据网设备.pdf"
+				"output_name": "16附录_工程量组成@05数据通信网设备@01安装数据网设备"
 			},
 			{
 				"start_page": 196,
 				"end_page": 196,
-				"output_name": "16附录_工程量组成@05数据通信网设备@02安装数据网网管设备.pdf"
+				"output_name": "16附录_工程量组成@05数据通信网设备@02安装数据网网管设备"
 			},
 			{
 				"start_page": 197,
 				"end_page": 197,
-				"output_name": "16附录_工程量组成@06电话交换设备@01电话交换设备.pdf"
+				"output_name": "16附录_工程量组成@06电话交换设备@01电话交换设备"
 			},
 			{
 				"start_page": 198,
 				"end_page": 198,
-				"output_name": "16附录_工程量组成@07有线调度通信系统设备@01安装与调试调度交换机.pdf"
+				"output_name": "16附录_工程量组成@07有线调度通信系统设备@01安装与调试调度交换机"
 			},
 			{
 				"start_page": 199,
 				"end_page": 200,
-				"output_name": "16附录_工程量组成@07有线调度通信系统设备@02安装与调试叫班系统.pdf"
+				"output_name": "16附录_工程量组成@07有线调度通信系统设备@02安装与调试叫班系统"
 			},
 				{
 				"start_page": 201,
 				"end_page": 201,
-				"output_name": "16附录_工程量组成@08数字移动通信系统(GSM-R)@01架设漏泄同轴电缆.pdf"
+				"output_name": "16附录_工程量组成@08数字移动通信系统(GSM-R)@01架设漏泄同轴电缆"
 			},
 			{
 				"start_page": 202,
 				"end_page": 202,
-				"output_name": "16附录_工程量组成@08数字移动通信系统(GSM-R)@02架设通信铁塔.pdf"
+				"output_name": "16附录_工程量组成@08数字移动通信系统(GSM-R)@02架设通信铁塔"
 			},
 			{
 				"start_page": 203,
 				"end_page": 205,
-				"output_name": "16附录_工程量组成@08数字移动通信系统(GSM-R)@03安装无线网及附属设备.pdf"
+				"output_name": "16附录_工程量组成@08数字移动通信系统(GSM-R)@03安装无线网及附属设备"
 			},
 			{
 				"start_page": 206,
 				"end_page": 208,
-				"output_name": "16附录_工程量组成@08数字移动通信系统(GSM-R)@04安装核心网设备.pdf"
+				"output_name": "16附录_工程量组成@08数字移动通信系统(GSM-R)@04安装核心网设备"
 			},
 			{
 				"start_page": 209,
 				"end_page": 209,
-				"output_name": "16附录_工程量组成@08数字移动通信系统(GSM-R)@05移动通信系统调试.pdf"
+				"output_name": "16附录_工程量组成@08数字移动通信系统(GSM-R)@05移动通信系统调试"
 			},
 			{
 				"start_page": 210,
 				"end_page": 210,
-				"output_name": "16附录_工程量组成@09会议电视系统设备@01会议电视系统设备.pdf"
+				"output_name": "16附录_工程量组成@09会议电视系统设备@01会议电视系统设备"
 			},
 			{
 				"start_page": 211,
 				"end_page": 215,
-				"output_name": "16附录_工程量组成@10综合视频监控系统设备@01视频采集点及汇集点设备.pdf"
+				"output_name": "16附录_工程量组成@10综合视频监控系统设备@01视频采集点及汇集点设备"
 			},
 			{
 				"start_page": 216,
 				"end_page": 216,
-				"output_name": "16附录_工程量组成@10综合视频监控系统设备@02视频采节点.pdf"
+				"output_name": "16附录_工程量组成@10综合视频监控系统设备@02视频采节点"
 			},
 				{
 				"start_page": 217,
 				"end_page": 217,
-				"output_name": "16附录_工程量组成@11应急通信系统设备@01安装应急通信系统设备.pdf"
+				"output_name": "16附录_工程量组成@11应急通信系统设备@01安装应急通信系统设备"
 			},
 			{
 				"start_page": 218,
 				"end_page": 218,
-				"output_name": "16附录_工程量组成@11应急通信系统设备@02安装隧道应急电话设备.pdf"
+				"output_name": "16附录_工程量组成@11应急通信系统设备@02安装隧道应急电话设备"
 			},
 			{
 				"start_page": 219,
 				"end_page": 219,
-				"output_name": "16附录_工程量组成@12时钟及时间同步系统设备@01时钟及时间同步系统设备.pdf"
+				"output_name": "16附录_工程量组成@12时钟及时间同步系统设备@01时钟及时间同步系统设备"
 			},
 			{
 				"start_page": 220,
 				"end_page": 221,
-				"output_name": "16附录_工程量组成@13通信电源设备及防雷接地装置@01安装通信电源设备.pdf"
+				"output_name": "16附录_工程量组成@13通信电源设备及防雷接地装置@01安装通信电源设备"
 			},
 			{
 				"start_page": 222,
 				"end_page": 222,
-				"output_name": "16附录_工程量组成@13通信电源设备及防雷接地装置@02安装防雷及地线装置.pdf"
+				"output_name": "16附录_工程量组成@13通信电源设备及防雷接地装置@02安装防雷及地线装置"
 			},
 			{
 				"start_page": 223,
 				"end_page": 223,
-				"output_name": "16附录_工程量组成@14电源及设备房屋环境监控设备@01电源及设备房屋环境监控设备.pdf"
+				"output_name": "16附录_工程量组成@14电源及设备房屋环境监控设备@01电源及设备房屋环境监控设备"
 			},
 			{
 				"start_page": 223,
 				"end_page": 226,
-				"output_name": "16附录_工程量组成@15综合布线@01综合布线.pdf"
+				"output_name": "16附录_工程量组成@15综合布线@01综合布线"
 			}
 		]
 	}

+ 102 - 102
SourceCode/DataMiddleware/tools/pdf_json/v1/03.json

@@ -5,82 +5,82 @@
       {
         "start_page": 3,
         "end_page": 6,
-        "output_name": "01洞身开挖、出砟@01洞身开挖@01隧道断面有效面积≤40m2(轨道运输).pdf"
+        "output_name": "01洞身开挖、出砟@01洞身开挖@01隧道断面有效面积≤40m2(轨道运输)"
       },
       {
         "start_page": 7,
         "end_page": 9,
-        "output_name": "01洞身开挖、出砟@01洞身开挖@02隧道断面有效面积≤40m2(汽车运输).pdf"
+        "output_name": "01洞身开挖、出砟@01洞身开挖@02隧道断面有效面积≤40m2(汽车运输)"
       },
       {
         "start_page": 10,
         "end_page": 13,
-        "output_name": "01洞身开挖、出砟@01洞身开挖@03隧道断面有效面积≤60m2(轨道运输).pdf"
+        "output_name": "01洞身开挖、出砟@01洞身开挖@03隧道断面有效面积≤60m2(轨道运输)"
       },
       {
         "start_page": 14,
         "end_page": 16,
-        "output_name": "01洞身开挖、出砟@01洞身开挖@04隧道断面有效面积≤60m2(汽车运输).pdf"
+        "output_name": "01洞身开挖、出砟@01洞身开挖@04隧道断面有效面积≤60m2(汽车运输)"
       },
       {
         "start_page": 17,
         "end_page": 19,
-        "output_name": "01洞身开挖、出砟@01洞身开挖@05隧道断面有效面积≤85m.pdf"
+        "output_name": "01洞身开挖、出砟@01洞身开挖@05隧道断面有效面积≤85m"
       },
       {
         "start_page": 20,
         "end_page": 22,
-        "output_name": "01洞身开挖、出砟@01洞身开挖@06隧道断面有效面积>85m2.pdf"
+        "output_name": "01洞身开挖、出砟@01洞身开挖@06隧道断面有效面积>85m2"
       },
       {
         "start_page": 23,
         "end_page": 24,
-        "output_name": "01洞身开挖、出砟@02出砟运输@01正洞轨道出砟.pdf"
+        "output_name": "01洞身开挖、出砟@02出砟运输@01正洞轨道出砟"
       },
       {
         "start_page": 25,
         "end_page": 27,
-        "output_name": "01洞身开挖、出砟@02出砟运输@02正洞汽车出砟.pdf"
+        "output_name": "01洞身开挖、出砟@02出砟运输@02正洞汽车出砟"
       },
       {
         "start_page": 28,
         "end_page": 31,
-        "output_name": "01洞身开挖、出砟@03正洞通过辅助坑道出砟运输@01通过有轨斜井出砟.pdf"
+        "output_name": "01洞身开挖、出砟@03正洞通过辅助坑道出砟运输@01通过有轨斜井出砟"
       },
       {
         "start_page": 32,
         "end_page": 32,
-        "output_name": "01洞身开挖、出砟@03正洞通过辅助坑道出砟运输@02通过无轨斜井出砟.pdf"
+        "output_name": "01洞身开挖、出砟@03正洞通过辅助坑道出砟运输@02通过无轨斜井出砟"
       },
       {
         "start_page": 33,
         "end_page": 34,
-        "output_name": "01洞身开挖、出砟@03正洞通过辅助坑道出砟运输@03通过平行导坑出砟.pdf"
+        "output_name": "01洞身开挖、出砟@03正洞通过辅助坑道出砟运输@03通过平行导坑出砟"
       },
       {
         "start_page": 35,
         "end_page": 38,
-        "output_name": "01洞身开挖、出砟@03正洞通过辅助坑道出砟运输@04通过竖井出砟.pdf"
+        "output_name": "01洞身开挖、出砟@03正洞通过辅助坑道出砟运输@04通过竖井出砟"
       },
       {
         "start_page": 39,
         "end_page": 39,
-        "output_name": "01洞身开挖、出砟@04洞外运砟@01出砟洞外汽车倒运、增运.pdf"
+        "output_name": "01洞身开挖、出砟@04洞外运砟@01出砟洞外汽车倒运、增运"
       },
       {
         "start_page": 40,
         "end_page": 40,
-        "output_name": "01洞身开挖、出砟@04洞外运砟@02有轨洞外增运.pdf"
+        "output_name": "01洞身开挖、出砟@04洞外运砟@02有轨洞外增运"
       },
       {
         "start_page": 41,
         "end_page": 42,
-        "output_name": "01洞身开挖、出砟@05开挖台架和仰拱栈桥@01开挖台架.pdf"
+        "output_name": "01洞身开挖、出砟@05开挖台架和仰拱栈桥@01开挖台架"
       },
       {
         "start_page": 43,
         "end_page": 44,
-        "output_name": "01洞身开挖、出砟@05开挖台架和仰拱栈桥@02简易仰拱栈桥.pdf"
+        "output_name": "01洞身开挖、出砟@05开挖台架和仰拱栈桥@02简易仰拱栈桥"
       }
     ]
   },
@@ -90,47 +90,47 @@
       {
         "start_page": 47,
         "end_page": 48,
-        "output_name": "02支护@01喷射混凝土@01喷射普通混凝土.pdf"
+        "output_name": "02支护@01喷射混凝土@01喷射普通混凝土"
       },
       {
         "start_page": 49,
         "end_page": 50,
-        "output_name": "02支护@01喷射混凝土@02喷射纤维混凝土.pdf"
+        "output_name": "02支护@01喷射混凝土@02喷射纤维混凝土"
       },
       {
         "start_page": 51,
         "end_page": 53,
-        "output_name": "02支护@02锚杆@01锚杆.pdf"
+        "output_name": "02支护@02锚杆@01锚杆"
       },
       {
         "start_page": 54,
         "end_page": 55,
-        "output_name": "02支护@03钢筋网、格栅钢架、型钢钢架@01钢筋网、格栅钢架、型钢钢架.pdf"
+        "output_name": "02支护@03钢筋网、格栅钢架、型钢钢架@01钢筋网、格栅钢架、型钢钢架"
       },
       {
         "start_page": 56,
         "end_page": 58,
-        "output_name": "02支护@04超前支护@01钻孔.pdf"
+        "output_name": "02支护@04超前支护@01钻孔"
       },
       {
         "start_page": 59,
         "end_page": 60,
-        "output_name": "02支护@04超前支护@02注浆.pdf"
+        "output_name": "02支护@04超前支护@02注浆"
       },
       {
         "start_page": 61,
         "end_page": 62,
-        "output_name": "02支护@05拆除临时支护@01拆除临时支护.pdf"
+        "output_name": "02支护@05拆除临时支护@01拆除临时支护"
       },
       {
         "start_page": 63,
         "end_page": 63,
-        "output_name": "02支护@06综合接地焊接@01综合接地焊接.pdf"
+        "output_name": "02支护@06综合接地焊接@01综合接地焊接"
       },
       {
         "start_page": 64,
         "end_page": 66,
-        "output_name": "02支护@07支护台架@01支护台架.pdf"
+        "output_name": "02支护@07支护台架@01支护台架"
       }
     ]
   },
@@ -140,67 +140,67 @@
       {
         "start_page": 69,
         "end_page": 70,
-        "output_name": "03衬砌@01衬砌模板、台架@01衬砌钢台模.pdf"
+        "output_name": "03衬砌@01衬砌模板、台架@01衬砌钢台模"
       },
       {
         "start_page": 71,
         "end_page": 72,
-        "output_name": "03衬砌@01衬砌模板、台架@02衬砌组合模板.pdf"
+        "output_name": "03衬砌@01衬砌模板、台架@02衬砌组合模板"
       },
       {
         "start_page": 73,
         "end_page": 74,
-        "output_name": "03衬砌@01衬砌模板、台架@03沟槽模板.pdf"
+        "output_name": "03衬砌@01衬砌模板、台架@03沟槽模板"
       },
       {
         "start_page": 75,
         "end_page": 75,
-        "output_name": "03衬砌@01衬砌模板、台架@04防水板台架.pdf"
+        "output_name": "03衬砌@01衬砌模板、台架@04防水板台架"
       },
       {
         "start_page": 76,
         "end_page": 77,
-        "output_name": "03衬砌@02模筑混凝土@01混凝土集中拌制.pdf"
+        "output_name": "03衬砌@02模筑混凝土@01混凝土集中拌制"
       },
       {
         "start_page": 78,
         "end_page": 79,
-        "output_name": "03衬砌@02模筑混凝土@02混凝土浇筑.pdf"
+        "output_name": "03衬砌@02模筑混凝土@02混凝土浇筑"
       },
       {
         "start_page": 80,
         "end_page": 81,
-        "output_name": "03衬砌@03钢筋@01钢筋.pdf"
+        "output_name": "03衬砌@03钢筋@01钢筋"
       },
       {
         "start_page": 82,
         "end_page": 84,
-        "output_name": "03衬砌@04钢筋混凝土盖板@01钢筋混凝土盖板.pdf"
+        "output_name": "03衬砌@04钢筋混凝土盖板@01钢筋混凝土盖板"
       },
       {
         "start_page": 85,
         "end_page": 88,
-        "output_name": "03衬砌@05防水和排水@01防水和排水.pdf"
+        "output_name": "03衬砌@05防水和排水@01防水和排水"
       },
       {
         "start_page": 89,
         "end_page": 90,
-        "output_name": "03衬砌@06中心水沟@01中心水沟开挖.pdf"
+        "output_name": "03衬砌@06中心水沟@01中心水沟开挖"
       },
       {
         "start_page": 91,
         "end_page": 91,
-        "output_name": "03衬砌@06中心水沟@02钢筋混凝土预制管铺设.pdf"
+        "output_name": "03衬砌@06中心水沟@02钢筋混凝土预制管铺设"
       },
       {
         "start_page": 92,
         "end_page": 94,
-        "output_name": "03衬砌@06中心水沟@03深埋中心水沟检查井.pdf"
+        "output_name": "03衬砌@06中心水沟@03深埋中心水沟检查井"
       },
       {
         "start_page": 95,
         "end_page": 96,
-        "output_name": "03衬砌@07拱顶压浆@01拱顶压浆.pdf"
+        "output_name": "03衬砌@07拱顶压浆@01拱顶压浆"
       }
     ]
   },
@@ -210,12 +210,12 @@
       {
         "start_page": 99,
         "end_page": 102,
-        "output_name": "04通风及管线路@01通风@01通风.pdf"
+        "output_name": "04通风及管线路@01通风@01通风"
       },
       {
         "start_page": 103,
         "end_page": 106,
-        "output_name": "04通风及管线路@02高压风水管、照明、电力线路@01高压风水管、照明、电力线路.pdf"
+        "output_name": "04通风及管线路@02高压风水管、照明、电力线路@01高压风水管、照明、电力线路"
       }
     ]
   },
@@ -225,27 +225,27 @@
       {
         "start_page": 109,
         "end_page": 110,
-        "output_name": "05运输@01混凝土运输@01洞外混凝土增运.pdf"
+        "output_name": "05运输@01混凝土运输@01洞外混凝土增运"
       },
       {
         "start_page": 111,
         "end_page": 112,
-        "output_name": "05运输@01混凝土运输@02正洞混凝土运输.pdf"
+        "output_name": "05运输@01混凝土运输@02正洞混凝土运输"
       },
       {
         "start_page": 113,
         "end_page": 116,
-        "output_name": "05运输@01混凝土运输@03通过辅助坑道运输混凝土.pdf"
+        "output_name": "05运输@01混凝土运输@03通过辅助坑道运输混凝土"
       },
       {
         "start_page": 117,
         "end_page": 118,
-        "output_name": "05运输@02材料运输@01正洞运输材料.pdf"
+        "output_name": "05运输@02材料运输@01正洞运输材料"
       },
       {
         "start_page": 119,
         "end_page": 122,
-        "output_name": "05运输@02材料运输@02通过辅助坑道材料运输.pdf"
+        "output_name": "05运输@02材料运输@02通过辅助坑道材料运输"
       }
     ]
   },
@@ -255,22 +255,22 @@
       {
         "start_page": 125,
         "end_page": 127,
-        "output_name": "06洞门及明洞@01洞门及明洞混凝土@01洞门及明洞混凝土.pdf"
+        "output_name": "06洞门及明洞@01洞门及明洞混凝土@01洞门及明洞混凝土"
       },
       {
         "start_page": 128,
         "end_page": 131,
-        "output_name": "06洞门及明洞@02洞门及明洞砌筑@01洞门及明洞砌筑.pdf"
+        "output_name": "06洞门及明洞@02洞门及明洞砌筑@01洞门及明洞砌筑"
       },
       {
         "start_page": 132,
         "end_page": 138,
-        "output_name": "06洞门及明洞@03洞门附属@01洞门附属.pdf"
+        "output_name": "06洞门及明洞@03洞门附属@01洞门附属"
       },
       {
         "start_page": 139,
         "end_page": 142,
-        "output_name": "06洞门及明洞@04明洞附属@01明洞附属.pdf"
+        "output_name": "06洞门及明洞@04明洞附属@01明洞附属"
       }
     ]
   },
@@ -280,132 +280,132 @@
       {
         "start_page": 145,
         "end_page": 148,
-        "output_name": "07辅助坑道@01辅助坑道开挖@01斜井(轨道矿车提升运输).pdf"
+        "output_name": "07辅助坑道@01辅助坑道开挖@01斜井(轨道矿车提升运输)"
       },
       {
         "start_page": 149,
         "end_page": 151,
-        "output_name": "07辅助坑道@01辅助坑道开挖@02斜井(汽车运输).pdf"
+        "output_name": "07辅助坑道@01辅助坑道开挖@02斜井(汽车运输)"
       },
       {
         "start_page": 152,
         "end_page": 155,
-        "output_name": "07辅助坑道@01辅助坑道开挖@03平行导坑(轨道运输).pdf"
+        "output_name": "07辅助坑道@01辅助坑道开挖@03平行导坑(轨道运输)"
       },
       {
         "start_page": 156,
         "end_page": 158,
-        "output_name": "07辅助坑道@01辅助坑道开挖@04平行导坑(汽车运输).pdf"
+        "output_name": "07辅助坑道@01辅助坑道开挖@04平行导坑(汽车运输)"
       },
       {
         "start_page": 159,
         "end_page": 164,
-        "output_name": "07辅助坑道@01辅助坑道开挖@05竖井.pdf"
+        "output_name": "07辅助坑道@01辅助坑道开挖@05竖井"
       },
       {
         "start_page": 165,
         "end_page": 166,
-        "output_name": "07辅助坑道@02出砟运输@01斜井(轨道矿车提升运输).pdf"
+        "output_name": "07辅助坑道@02出砟运输@01斜井(轨道矿车提升运输)"
       },
       {
         "start_page": 167,
         "end_page": 167,
-        "output_name": "07辅助坑道@02出砟运输@02斜井(汽车运输).pdf"
+        "output_name": "07辅助坑道@02出砟运输@02斜井(汽车运输)"
       },
       {
         "start_page": 168,
         "end_page": 169,
-        "output_name": "07辅助坑道@02出砟运输@03平行导坑(轨道运输).pdf"
+        "output_name": "07辅助坑道@02出砟运输@03平行导坑(轨道运输)"
       },
       {
         "start_page": 170,
         "end_page": 170,
-        "output_name": "07辅助坑道@02出砟运输@04平行导坑(汽车运输).pdf"
+        "output_name": "07辅助坑道@02出砟运输@04平行导坑(汽车运输)"
       },
       {
         "start_page": 171,
         "end_page": 174,
-        "output_name": "07辅助坑道@02出砟运输@05竖井(提升运输).pdf"
+        "output_name": "07辅助坑道@02出砟运输@05竖井(提升运输)"
       },
       {
         "start_page": 175,
         "end_page": 176,
-        "output_name": "07辅助坑道@03衬砌@01混凝土集中拌制.pdf"
+        "output_name": "07辅助坑道@03衬砌@01混凝土集中拌制"
       },
       {
         "start_page": 177,
         "end_page": 177,
-        "output_name": "07辅助坑道@03衬砌@02混凝土浇筑.pdf"
+        "output_name": "07辅助坑道@03衬砌@02混凝土浇筑"
       },
       {
         "start_page": 178,
         "end_page": 179,
-        "output_name": "07辅助坑道@04通风@01斜井.pdf"
+        "output_name": "07辅助坑道@04通风@01斜井"
       },
       {
         "start_page": 180,
         "end_page": 181,
-        "output_name": "07辅助坑道@04通风@02平行导坑.pdf"
+        "output_name": "07辅助坑道@04通风@02平行导坑"
       },
       {
         "start_page": 182,
         "end_page": 183,
-        "output_name": "07辅助坑道@04通风@03竖井.pdf"
+        "output_name": "07辅助坑道@04通风@03竖井"
       },
       {
         "start_page": 184,
         "end_page": 185,
-        "output_name": "07辅助坑道@05管线路@01斜井.pdf"
+        "output_name": "07辅助坑道@05管线路@01斜井"
       },
       {
         "start_page": 186,
         "end_page": 187,
-        "output_name": "07辅助坑道@05管线路@02平行导坑.pdf"
+        "output_name": "07辅助坑道@05管线路@02平行导坑"
       },
       {
         "start_page": 188,
         "end_page": 189,
-        "output_name": "07辅助坑道@05管线路@03竖井.pdf"
+        "output_name": "07辅助坑道@05管线路@03竖井"
       },
       {
         "start_page": 190,
         "end_page": 191,
-        "output_name": "07辅助坑道@06混凝土运输@01斜井.pdf"
+        "output_name": "07辅助坑道@06混凝土运输@01斜井"
       },
       {
         "start_page": 192,
         "end_page": 193,
-        "output_name": "07辅助坑道@06混凝土运输@02平行导坑.pdf"
+        "output_name": "07辅助坑道@06混凝土运输@02平行导坑"
       },
 		{
         "start_page": 194,
         "end_page": 195,
-        "output_name": "07辅助坑道@06混凝土运输@03竖井.pdf"
+        "output_name": "07辅助坑道@06混凝土运输@03竖井"
       },
       {
         "start_page": 196,
         "end_page": 197,
-        "output_name": "07辅助坑道@07材料运输@01斜井.pdf"
+        "output_name": "07辅助坑道@07材料运输@01斜井"
       },
       {
         "start_page": 198,
         "end_page": 199,
-        "output_name": "07辅助坑道@07材料运输@02平行导坑.pdf"
+        "output_name": "07辅助坑道@07材料运输@02平行导坑"
       },
       {
         "start_page": 200,
         "end_page": 201,
-        "output_name": "07辅助坑道@07材料运输@03竖井.pdf"
+        "output_name": "07辅助坑道@07材料运输@03竖井"
       },
       {
         "start_page": 202,
         "end_page": 203,
-        "output_name": "07辅助坑道@08辅助坑道模板、台架@01组合钢模架.pdf"
+        "output_name": "07辅助坑道@08辅助坑道模板、台架@01组合钢模架"
       },
       {
         "start_page": 204,
         "end_page": 206,
-        "output_name": "07辅助坑道@08辅助坑道模板、台架@02开挖台架.pdf"
+        "output_name": "07辅助坑道@08辅助坑道模板、台架@02开挖台架"
       }
     ]
   },
@@ -415,27 +415,27 @@
       {
         "start_page": 209,
         "end_page": 210,
-        "output_name": "08超前地质预报及监控量测@01超前地质预报探测@01风钻加深炮孔超前水平探测.pdf"
+        "output_name": "08超前地质预报及监控量测@01超前地质预报探测@01风钻加深炮孔超前水平探测"
       },
       {
         "start_page": 211,
         "end_page": 213,
-        "output_name": "08超前地质预报及监控量测@01超前地质预报探测@02钻机冲击钻超前水平探测.pdf"
+        "output_name": "08超前地质预报及监控量测@01超前地质预报探测@02钻机冲击钻超前水平探测"
       },
       {
         "start_page": 214,
         "end_page": 216,
-        "output_name": "08超前地质预报及监控量测@01超前地质预报探测@03钻机钻孔取芯超前水平探测.pdf"
+        "output_name": "08超前地质预报及监控量测@01超前地质预报探测@03钻机钻孔取芯超前水平探测"
       },
       {
         "start_page": 217,
         "end_page": 218,
-        "output_name": "08超前地质预报及监控量测@01超前地质预报探测@04地震波反射法物理探测.pdf"
+        "output_name": "08超前地质预报及监控量测@01超前地质预报探测@04地震波反射法物理探测"
       },
       {
         "start_page": 219,
         "end_page": 220,
-        "output_name": "08超前地质预报及监控量测@02施工监控量测@01施工监控量测.pdf"
+        "output_name": "08超前地质预报及监控量测@02施工监控量测@01施工监控量测"
       }
     ]
   },
@@ -445,72 +445,72 @@
       {
         "start_page": 223,
         "end_page": 224,
-        "output_name": "09改扩建工程@01围岩开挖@01围岩开挖.pdf"
+        "output_name": "09改扩建工程@01围岩开挖@01围岩开挖"
       },
       {
         "start_page": 225,
         "end_page": 227,
-        "output_name": "09改扩建工程@02圬工凿除@01圬工凿除.pdf"
+        "output_name": "09改扩建工程@02圬工凿除@01圬工凿除"
       },
       {
         "start_page": 228,
         "end_page": 229,
-        "output_name": "09改扩建工程@03洞身衬砌@01洞身衬砌.pdf"
+        "output_name": "09改扩建工程@03洞身衬砌@01洞身衬砌"
       },
       {
         "start_page": 230,
         "end_page": 230,
-        "output_name": "09改扩建工程@04出砟@01出砟.pdf"
+        "output_name": "09改扩建工程@04出砟@01出砟"
       },
       {
         "start_page": 231,
         "end_page": 232,
-        "output_name": "09改扩建工程@05支护@01喷射混凝土.pdf"
+        "output_name": "09改扩建工程@05支护@01喷射混凝土"
       },
       {
         "start_page": 233,
         "end_page": 235,
-        "output_name": "09改扩建工程@05支护@02锚杆.pdf"
+        "output_name": "09改扩建工程@05支护@02锚杆"
       },
       {
         "start_page": 236,
         "end_page": 238,
-        "output_name": "09改扩建工程@05支护@03钢筋网、格栅钢架、型钢钢架.pdf"
+        "output_name": "09改扩建工程@05支护@03钢筋网、格栅钢架、型钢钢架"
       },
       {
         "start_page": 239,
         "end_page": 239,
-        "output_name": "09改扩建工程@06防水和排水@01防水板.pdf"
+        "output_name": "09改扩建工程@06防水和排水@01防水板"
       },
       {
         "start_page": 240,
         "end_page": 241,
-        "output_name": "09改扩建工程@06防水和排水@02衬砌背后压浆.pdf"
+        "output_name": "09改扩建工程@06防水和排水@02衬砌背后压浆"
       },
       {
         "start_page": 242,
         "end_page": 242,
-        "output_name": "09改扩建工程@06防水和排水@03盲沟、止水带、透水软管.pdf"
+        "output_name": "09改扩建工程@06防水和排水@03盲沟、止水带、透水软管"
       },
       {
         "start_page": 243,
         "end_page": 246,
-        "output_name": "09改扩建工程@06防水和排水@04漏水处理.pdf"
+        "output_name": "09改扩建工程@06防水和排水@04漏水处理"
       },
       {
         "start_page": 247,
         "end_page": 249,
-        "output_name": "09改扩建工程@07其他@01线路加固.pdf"
+        "output_name": "09改扩建工程@07其他@01线路加固"
       },
       {
         "start_page": 250,
         "end_page": 250,
-        "output_name": "09改扩建工程@07其他@02管线路铺拆.pdf"
+        "output_name": "09改扩建工程@07其他@02管线路铺拆"
       },
       {
         "start_page": 251,
         "end_page": 252,
-        "output_name": "09改扩建工程@07其他@03管线路使用费、照明用电.pdf"
+        "output_name": "09改扩建工程@07其他@03管线路使用费、照明用电"
       }
     ]
   },
@@ -520,42 +520,42 @@
       {
         "start_page": 255,
         "end_page": 256,
-        "output_name": "10隧道机械化施工@01凿岩台车机械化开挖@01凿岩台车机械化开挖.pdf"
+        "output_name": "10隧道机械化施工@01凿岩台车机械化开挖@01凿岩台车机械化开挖"
       },
       {
         "start_page": 257,
         "end_page": 259,
-        "output_name": "10隧道机械化施工@02辅助坑道机械化出砟@01正洞自斜井底皮带机出砟(配合钻爆法施工).pdf"
+        "output_name": "10隧道机械化施工@02辅助坑道机械化出砟@01正洞自斜井底皮带机出砟(配合钻爆法施工)"
       },
       {
         "start_page": 260,
         "end_page": 261,
-        "output_name": "10隧道机械化施工@02辅助坑道机械化出砟@02平导出砟(挖装机装砟).pdf"
+        "output_name": "10隧道机械化施工@02辅助坑道机械化出砟@02平导出砟(挖装机装砟)"
       },
       {
         "start_page": 262,
         "end_page": 263,
-        "output_name": "10隧道机械化施工@03衬砌机械化施工@01衬砌台车及模架.pdf"
+        "output_name": "10隧道机械化施工@03衬砌机械化施工@01衬砌台车及模架"
       },
       {
         "start_page": 264,
         "end_page": 265,
-        "output_name": "10隧道机械化施工@03衬砌机械化施工@02栈桥.pdf"
+        "output_name": "10隧道机械化施工@03衬砌机械化施工@02栈桥"
       },
       {
         "start_page": 266,
         "end_page": 267,
-        "output_name": "10隧道机械化施工@03衬砌机械化施工@03防水板机械自动铺设.pdf"
+        "output_name": "10隧道机械化施工@03衬砌机械化施工@03防水板机械自动铺设"
       },
       {
         "start_page": 268,
         "end_page": 269,
-        "output_name": "10隧道机械化施工@04支护机械化施工@01湿喷机械手喷射混凝土.pdf"
+        "output_name": "10隧道机械化施工@04支护机械化施工@01湿喷机械手喷射混凝土"
       },
       {
         "start_page": 270,
         "end_page": 272,
-        "output_name": "10隧道机械化施工@04支护机械化施工@02凿岩台车锚杆作业.pdf"
+        "output_name": "10隧道机械化施工@04支护机械化施工@02凿岩台车锚杆作业"
       }
     ]
   }

+ 2 - 2
SourceCode/DataMiddleware/tools/pdf_split/__init__.py

@@ -1,4 +1,4 @@
-from processor import PDFProcessor
-from model import SplitModel
+from tools.pdf_split.processor import PDFProcessor
+from tools.pdf_split.model import SplitModel
 
 __all__ = ['SplitModel', 'PDFProcessor']

+ 0 - 8
SourceCode/DataMiddleware/tools/pdf_split/main.py

@@ -1,8 +0,0 @@
-from processor import PDFProcessor
-
-def main():
-    """PDF拆分工具的主入口函数"""
-    PDFProcessor.split("01")
-
-if __name__ == '__main__':
-    main()

+ 73 - 0
SourceCode/DataMiddleware/tools/pdf_split/mysql_store.py

@@ -0,0 +1,73 @@
+from tools.utils.mysql_helper import MySQLHelper
+from datetime import datetime
+from typing import Optional, Dict
+
+class MysqlStore:
+    def __init__(self):
+        self._db_helper = MySQLHelper()
+
+    # ------------------ 标准表基础操作 ------------------
+    # ------------------ 标准表基础操作 ------------------
+    def create_standard(self, code: str, name: str) -> int:
+        """创建标准记录,返回插入ID"""
+        sql = """INSERT INTO pdf_standard 
+                   (standard_code, standard_name, created_at, updated_at)
+                   VALUES (%s, %s, NOW(), NOW())"""  # 改用NOW()获取完整时间戳
+        with self._db_helper as db:
+            return db.execute(sql, (code, name))
+
+    def get_standard(self, standard_id: int) -> Optional[Dict]:
+        """根据ID获取标准"""
+        sql = "SELECT * FROM pdf_standard WHERE id = %s"
+        with self._db_helper as db:
+            return db.fetch_one(sql, (standard_id,))
+    def get_all_standards(self):
+        sql = "SELECT * FROM pdf_standard ORDER BY created_at DESC"
+        with self._db_helper as db:
+            return db.execute_query(sql)
+
+    def get_all_pdf_records(self):
+        sql = "SELECT chapter_name, pdf_path, created_at FROM pdf_records ORDER BY created_at DESC"
+        with self._db_helper as db:
+            return db.execute_query(sql)
+    # ------------------ PDF记录表核心操作 ------------------
+    def add_pdf_record(
+            self,
+            standard_name: str,
+            pdf_path: str,
+            image_path: str,
+            markdown_text: str,
+            chapter: str = "",
+            section: str = "",
+            subsection: str = ""
+    ) -> int:
+        """添加PDF处理记录,返回插入ID"""
+        sql = """INSERT INTO pdf_records 
+                   (standard_name, chapter_name, section_name, subsection_name,
+                    pdf_path, image_path, markdown_text, created_at, updated_at)
+                   VALUES (%s, %s, %s, %s, %s, %s, %s, NOW(), NOW())"""  # 增加时间字段
+        params = (standard_name, chapter, section, subsection,
+                  pdf_path, image_path, markdown_text)
+        with self._db_helper as db:
+            return db.execute(sql, params)
+
+    def update_markdown(self, record_id: int, new_text: str) -> bool:
+        """更新Markdown内容"""
+        sql = "UPDATE pdf_records SET markdown_text = %s, updated_at = NOW() WHERE id = %s"
+        with self._db_helper as db:
+            return db.execute(sql, (new_text, record_id))
+
+    def get_standard_by_name(self, name: str) -> dict:
+        """根据标准名称获取标准"""
+        sql = "SELECT * FROM pdf_standard WHERE standard_name = %s"
+        with self._db_helper as db:
+            return db.fetch_one(sql, (name,))
+
+    def update_pdf_record(self, markdown_text: str, image_paths: str, by_image_path: str):
+        """更新已有的PDF记录"""
+        with self._db_helper as db:
+            sql = """UPDATE pdf_records 
+                     SET markdown_text = %s, 
+                         image_path = %s 
+                     WHERE image_path LIKE %s"""
+            db.execute(sql, (markdown_text, image_paths, by_image_path))

+ 324 - 74
SourceCode/DataMiddleware/tools/pdf_split/processor.py

@@ -1,10 +1,15 @@
-import json,os
+import json,os,time
+import re
 from typing import List
 from PyPDF2 import PdfReader, PdfWriter
-from model import SplitModel,PageConfig
+from tools.pdf_split.model import SplitModel,PageConfig
 from PIL import Image
 import io
-import pymupdf  
+import pymupdf
+import tools.utils as utils
+from tools.utils.file_helper import encode_image
+from tools.utils.ai_helper import AiHelper
+from tools.pdf_split.mysql_store import MysqlStore
 
 class PDFProcessor:
     """PDF处理器类,负责执行PDF文件的拆分操作"""
@@ -170,87 +175,332 @@ class PDFProcessor:
     def extract_and_merge_images(input_file: str, output_file: str = None) -> str:
         try:
             pdf_document = pymupdf.open(input_file)
-            images = []
-            total_height = 0
-            max_width = 0
+            # 根据输入路径生成图片目录
+            output_name = os.path.splitext(os.path.basename(input_file))[0]
+            parts = input_file.rsplit('/pdf/', 1)
+            output_file = '/pdf/'.join(parts[:-1]) + '/img/' + parts[-1]
+            output_dir = os.path.splitext(output_file)[0]
+
+            #output_dir = output_file + f'/{output_name}/'
+            os.makedirs(output_dir, exist_ok=True)
 
             # 遍历每一页提取图片
             for page_num in range(pdf_document.page_count):
                 page = pdf_document[page_num]
-                
                 # 获取页面上的所有图片,包括内嵌图片
-                pix = page.get_pixmap(matrix=pymupdf.Matrix(2, 2))  # 使用2倍缩放以获得更好的质量
-                img_data = pix.tobytes("png")
-                
-                # 将图片字节转换为PIL Image对象
-                image = Image.open(io.BytesIO(img_data))
-                if image.mode != 'RGB':
-                    image = image.convert('RGB')
-                
-                images.append(image)
-                total_height += image.height
-                max_width = max(max_width, image.width)
-
-            # 如果没有找到图片
-            if not images:
-                print("未在PDF中找到任何图片")
-                return ''
-
-            # 创建新的图片用于拼接
-            merged_image = Image.new('RGB', (max_width, total_height))
-            y_offset = 0
-
-            # 将所有图片垂直拼接
-            for img in images:
-                x_offset = (max_width - img.width) // 2
-                merged_image.paste(img, (x_offset, y_offset))
-                y_offset += img.height
-
-            # 设置输出路径
-            if output_file is None:
-                parts = input_file.rsplit('/pdf/', 1)
-                output_file = '/pdf/'.join(parts[:-1]) + '/img/' + parts[-1]
-                output_file = os.path.splitext(output_file)[0] + "_merged.png"
-                os.makedirs(os.path.dirname(output_file), exist_ok=True)
-
-            # 根据图片数量计算目标大小
-            target_size_per_image = 100 * 1024  # 每张图片100KB
-            max_size = target_size_per_image * len(images)
-            scale = 1.0
-            quality = 95
-
-            while True:
-                temp_buffer = io.BytesIO()
-                if scale < 1.0:
-                    new_size = (int(merged_image.width * scale), int(merged_image.height * scale))
-                    resized_image = merged_image.resize(new_size, Image.Resampling.LANCZOS)
-                    resized_image.save(temp_buffer, 'PNG', optimize=True, quality=quality)
-                else:
-                    merged_image.save(temp_buffer, 'PNG', optimize=True, quality=quality)
-                
-                size = temp_buffer.tell()
-                
-                if size <= max_size:
-                    with open(output_file, 'wb') as f:
-                        f.write(temp_buffer.getvalue())
-                        print(f"成功保存图片:[{(size // 1024)} KB] {output_file}")
-                    break
-                
-                if scale > 0.5:
-                    scale *= 0.9
-                else:
-                    # 如果达到最小缩放比例,直接保存当前结果
-                    with open(output_file, 'wb') as f:
-                        f.write(temp_buffer.getvalue())
-                        print(f"成功保存图片:[{(size // 1024)} KB] {output_file}")
-                    break
-
-            return output_file
+                # pix = page.get_pixmap(matrix=pymupdf.Matrix(2, 2))  # 实际使用的缩放参数
+                # img_data = pix.tobytes("png")
+
+                # 初始化压缩参数
+                scale = 1.0
+                img_data = None
+                max_size = 200 * 1024
+                # 循环调整缩放直到符合大小要求
+                while scale >= 0.5:  # 最小缩放比例50%
+                    # 生成临时图片数据
+                    temp_pix = page.get_pixmap(matrix=pymupdf.Matrix(1.5 * scale, 1.5 * scale))
+                    img_data = temp_pix.tobytes("png")
+
+                    if len(img_data) <= max_size:  # 100KB限制
+                        break
+                    scale *= 0.9  # 每次缩小10%
+                # 生成序列文件名
+                img_path = os.path.join(output_dir, f"{page_num + 1:02d}.png")
+                # 保存单页图片
+                with open(img_path, 'wb') as f:
+                    f.write(img_data)
+                print(f"成功保存图片({len(img_data) // 1024}KB): {img_path}")
+            return output_dir
 
         except Exception as e:
             print(f"处理图片时发生错误: {str(e)}")
             return ''
 
+    # @staticmethod
+    # def extract_and_merge_images(input_file: str, output_file: str = None) -> str:
+    #     try:
+    #         pdf_document = pymupdf.open(input_file)
+    #         images = []
+    #         total_height = 0
+    #         max_width = 0
+    #
+    #         # 遍历每一页提取图片
+    #         for page_num in range(pdf_document.page_count):
+    #             page = pdf_document[page_num]
+    #
+    #             # 获取页面上的所有图片,包括内嵌图片
+    #             pix = page.get_pixmap(matrix=pymupdf.Matrix(2, 2))  # 使用2倍缩放以获得更好的质量
+    #             img_data = pix.tobytes("png")
+    #
+    #             # 将图片字节转换为PIL Image对象
+    #             image = Image.open(io.BytesIO(img_data))
+    #             if image.mode != 'RGB':
+    #                 image = image.convert('RGB')
+    #
+    #             images.append(image)
+    #             total_height += image.height
+    #             max_width = max(max_width, image.width)
+    #
+    #         # 如果没有找到图片
+    #         if not images:
+    #             print("未在PDF中找到任何图片")
+    #             return ''
+    #
+    #         # 创建新的图片用于拼接
+    #         merged_image = Image.new('RGB', (max_width, total_height))
+    #         y_offset = 0
+    #
+    #         # 将所有图片垂直拼接
+    #         for img in images:
+    #             x_offset = (max_width - img.width) // 2
+    #             merged_image.paste(img, (x_offset, y_offset))
+    #             y_offset += img.height
+    #
+    #         # 设置输出路径
+    #         if output_file is None:
+    #             parts = input_file.rsplit('/pdf/', 1)
+    #             output_file = '/pdf/'.join(parts[:-1]) + '/img/' + parts[-1]
+    #             output_file = os.path.splitext(output_file)[0] + "_merged.png"
+    #             os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    #
+    #         # 根据图片数量计算目标大小
+    #         target_size_per_image = 200 * 1024  # 每张图片100KB
+    #         max_size = target_size_per_image * len(images)
+    #         scale = 1.0
+    #         quality = 95
+    #
+    #         while True:
+    #             temp_buffer = io.BytesIO()
+    #             if scale < 1.0:
+    #                 new_size = (int(merged_image.width * scale), int(merged_image.height * scale))
+    #                 resized_image = merged_image.resize(new_size, Image.Resampling.LANCZOS)
+    #                 resized_image.save(temp_buffer, 'PNG', optimize=True, quality=quality)
+    #             else:
+    #                 merged_image.save(temp_buffer, 'PNG', optimize=True, quality=quality)
+    #
+    #             size = temp_buffer.tell()
+    #
+    #             if size <= max_size:
+    #                 with open(output_file, 'wb') as f:
+    #                     f.write(temp_buffer.getvalue())
+    #                     print(f"成功保存图片:[{(size // 1024)} KB] {output_file}")
+    #                 break
+    #
+    #             if scale > 0.5:
+    #                 scale *= 0.9
+    #             else:
+    #                 # 如果达到最小缩放比例,直接保存当前结果
+    #                 with open(output_file, 'wb') as f:
+    #                     f.write(temp_buffer.getvalue())
+    #                     print(f"成功保存图片:[{(size // 1024)} KB] {output_file}")
+    #                 break
+    #
+    #         return output_file
+    #
+    #     except Exception as e:
+    #         print(f"处理图片时发生错误: {str(e)}")
+    #         return ''
+    @staticmethod
+    def process_image_to_txt(filename: str):
+        """将目录下的多张图片合并生成一个Markdown文件"""
+        version = "v1"
+        base_output_dir = "./temp_files/pdf/output/"
+        output_dir = f"{base_output_dir}/{version}/{filename}/"
+        image_dir = f"{output_dir}/img/"
+        txt_dir = f"{output_dir}/txt/"
+
+        db_store = MysqlStore()
+        ai_helper = AiHelper()
+
+        # 创建标准记录(如果不存在)
+        if not db_store.get_standard_by_name(filename):
+            db_store.create_standard(code=filename, name=filename)
+
+        try:
+            # 遍历图片目录中的每个子目录(新增目录处理逻辑)
+            for dir_path, dir_names, file_names in os.walk(image_dir):
+                # 跳过根目录
+                if dir_path == image_dir:
+                    continue
+
+                # 解析目录结构(新增章节解析)
+                dir_rel_path = os.path.relpath(dir_path, image_dir)
+
+                chapter_parts = dir_rel_path.split('@')
+                if len(chapter_parts) < 3:
+                    continue  # 跳过不符合命名规范的目录
+
+                # 生成对应的txt目录
+                #txt_subdir = os.path.join(txt_dir, dir_rel_path)
+                #os.makedirs(txt_subdir, exist_ok=True)
+
+                # 收集当前目录的所有图片(新增图片收集逻辑)
+                image_files = sorted(
+                    [f for f in file_names if f.lower().endswith(('.png', '.jpg', '.jpeg'))],
+                    key=lambda x: int(x.split('.')[0])
+                )
+
+                if not image_files:
+                    continue
+
+                # 创建合并的markdown文件(修改文件生成逻辑)
+                md_filename = f"{dir_rel_path.replace('@', '_')}.md"
+                md_path = os.path.join(txt_dir, md_filename)
+                os.makedirs(os.path.dirname(md_path), exist_ok=True)
+                md_content = f"# {filename}\n## {'/'.join(chapter_parts)}\n\n"
+
+                # 处理目录下所有图片(新增合并循环)
+                all_images = []
+                for img_file in image_files:
+                    
+                    img_path = os.path.join(dir_path, img_file)
+                    img_name = os.path.basename(img_path)
+                    try:
+                        # 调用AI分析图片
+                        page_content = ai_helper.analyze_image_with_ai(img_path)
+                        # 添加5秒延时以控制API请求频率
+                        time.sleep(5)
+                        # 生成图片相对路径
+                        #rel_path = os.path.relpath(img_path, txt_subdir)
+                        utils.get_logger().info(f"处理图片 {img_path} 成功")
+                        md_content += f"########### {img_path} ####################\n"
+                        md_content += f"--start{img_name}--\n\n"
+                        md_content += f"\n\n{page_content}\n\n"
+                        md_content += f"--end{img_name}--\n\n"
+                        all_images.append(img_path)
+                    except Exception as e:
+                        print(f"处理图片 {img_file} 失败: {str(e)}")
+                        continue
+
+                # 保存合并的文档(修改保存逻辑)
+                with open(md_path, 'w', encoding='utf-8') as f:
+                    f.write(md_content)
+
+                # 插入数据库记录(新增批量记录)
+                db_store.add_pdf_record(
+                    standard_name=filename,
+                    pdf_path=os.path.abspath(all_images[0].replace("/img/", "/pdf/").rsplit('.', 1)[0] + '.pdf'),
+                    image_path='\n'.join([os.path.abspath(p) for p in all_images]),
+                    markdown_text=md_content,
+                    chapter=chapter_parts[0],
+                    section=chapter_parts[1],
+                    subsection=chapter_parts[2]
+                )
+
+                print(f"成功生成合并文档: {md_path}")
+
+        except Exception as e:
+            print(f"处理过程中发生错误: {str(e)}")
+
+    @staticmethod
+    def regenerate_markdown(img_path: str):
+        """重新生成指定图片或目录的Markdown内容"""
+        processor = PDFProcessor()
+        db_store = MysqlStore()
+        ai_helper = AiHelper()
+
+        if os.path.isdir(img_path):
+            # 处理整个目录
+            dir_path = img_path
+            # 通过图片路径反向推导标准名称和目录结构
+            parts = dir_path.split('/img/')
+            if len(parts) < 2:
+                print("无效的目录路径")
+                return
+
+            # 获取原Markdown文件路径
+            txt_root = dir_path.replace('/img/', '/txt/')
+            md_files = [f for f in os.listdir(txt_root) if f.endswith('.md')]
+            if not md_files:
+                print("找不到对应的Markdown文件")
+                return
+
+            md_path = os.path.join(txt_root, md_files[0])
+            # 重新生成整个目录内容
+            processor._process_directory(dir_path, md_path, db_store, ai_helper)
+
+        elif os.path.isfile(img_path):
+            # 处理单个图片
+            img_file = os.path.basename(img_path)
+            dir_path = os.path.dirname(img_path)
+            # 查找对应的Markdown文件
+            txt_dir = dir_path.replace('/img/', '/txt/')
+            md_files = [f for f in os.listdir(txt_dir) if f.endswith('.md')]
+            if not md_files:
+                print("找不到对应的Markdown文件")
+                return
+
+            md_path = os.path.join(txt_dir, md_files[0])
+            # 更新单个图片内容
+            processor._update_single_image(img_path, md_path, db_store, ai_helper)
+
+    def _process_directory(self, dir_path: str, md_path: str, db_store, ai_helper):
+        """处理整个目录重新生成"""
+        # 收集目录下所有图片
+        image_files = sorted(
+            [f for f in os.listdir(dir_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))],
+            key=lambda x: int(x.split('.')[0])
+        )
+
+        # 重新生成Markdown内容
+        new_content = f"# {os.path.basename(os.path.dirname(md_path))}\n"
+        all_images = []
+
+        for img_file in image_files:
+            img_path = os.path.join(dir_path, img_file)
+            page_content = ai_helper.analyze_image_with_ai(img_path)
+            img_name = os.path.splitext(img_file)[0]
+
+            new_content += f"########### {img_path} ####################\n"
+            new_content += f"--start{img_name}--\n\n"
+            new_content += f"\n\n{page_content}\n\n"
+            new_content += f"--end{img_name}--\n\n"
+            all_images.append(img_path)
+
+        # 写入更新后的内容
+        with open(md_path, 'w', encoding='utf-8') as f:
+            f.write(new_content)
+
+        # 更新数据库记录
+        db_store.update_pdf_record(
+            markdown_text=new_content,
+            image_paths=','.join(all_images),
+            by_image_path=all_images[0]
+        )
+
+    def _update_single_image(self, img_path: str, md_path: str, db_store, ai_helper):
+        """更新单个图片内容"""
+        img_name = os.path.splitext(os.path.basename(img_path))[0]
+
+        # 读取原有内容
+        with open(md_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+
+        # 生成新内容
+        start_tag = f"--start{img_name}--"
+        end_tag = f"--end{img_name}--"
+        pattern = re.compile(f'{re.escape(start_tag)}(.*?){re.escape(end_tag)}', re.DOTALL)
+
+        # 调用AI重新分析
+        new_content = ai_helper.analyze_image_with_ai(img_path)
+        updated_section = f"{start_tag}\n\n{new_content}\n\n{end_tag}"
+
+        # 替换内容
+        new_md_content = re.sub(pattern, updated_section, content)
+
+        # 写入更新后的内容
+        with open(md_path, 'w', encoding='utf-8') as f:
+            f.write(new_md_content)
+
+        # 更新数据库记录
+        # db_store.update_pdf_record(
+        #     markdown_text=new_md_content,
+        #     image_paths=img_path,
+        #     by_image_path=img_path
+        # )
+
+
+
 
 
 

+ 53 - 0
SourceCode/DataMiddleware/tools/test/fast_gpt_client.py

@@ -0,0 +1,53 @@
+import yaml
+import requests
+from pathlib import Path
+from tools.utils import get_config
+
+
+class FastGPTClient:
+    def __init__(self):
+        self.config = get_config()
+        self.headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.config.get('fastgpt.key')}"
+        }
+
+
+
+    def chat(self, prompt: str, temperature: float = 0.7) -> str:
+        """与FastGPT进行对话"""
+        payload = {
+            "chatId": "my_chatId111",
+            "stream": False,
+            "detail": False,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": prompt
+                }
+            ]
+        }
+
+        try:
+            response = requests.post(
+                self.config.get('fastgpt.url'),
+                headers=self.headers,
+                json=payload,
+                timeout=500
+            )
+            response.raise_for_status()
+
+            data = response.json()
+            return data['choices'][0]['message']['content']
+        except Exception as e:
+            print(f"API调用失败: {str(e)}")
+            raise e
+
+
+if __name__ == "__main__":
+    try:
+        client = FastGPTClient()
+        response = client.chat("总结一下知识库总定额编号相关的内容")
+        print("FastGPT回复:", response)
+    except Exception as e:
+        print(f"发生异常: {str(e)}")

+ 116 - 0
SourceCode/DataMiddleware/tools/utils/__init__.py

@@ -0,0 +1,116 @@
+"""
+utils/__init__.py
+
+该模块初始化文件,导入了多个辅助工具类,并定义了一系列便捷函数,用于日志记录、配置管理、文件操作、字符串处理和邮件发送等功能。
+"""
+import json
+
+from tools.utils.ai_helper import AiHelper
+from tools.utils.config_helper import ConfigHelper
+from tools.utils.logger_helper import LoggerHelper
+#import tools.utils.logger_helper as logger_helper
+
+
+
+def get_logger():
+    """
+    获取日志记录器实例。
+
+    该函数通过调用LoggerHelper类的静态方法get_logger()来获取一个日志记录器实例。
+    主要用于需要记录日志的位置,通过该函数获取日志记录器实例,然后进行日志记录。
+    这样做可以保持日志记录的一致性和集中管理。
+
+    :return: Logger实例,用于记录日志。
+    """
+    #return logger_helper.LoggerHelper.get_logger()
+    return LoggerHelper.get_logger()
+
+
+def clean_log_file(day: int):
+    """
+    清理指定天数之前的日志文件。
+
+    :param day: 整数,表示清理多少天前的日志文件。
+    """
+    #logger_helper.LoggerHelper.clean_log_file(day)
+    LoggerHelper.clean_log_file(day)
+
+
+def get_config():
+    """
+    获取配置管理器实例。
+
+    该函数返回一个ConfigHelper实例,用于读取和管理应用程序的配置信息。
+
+    :return: ConfigHelper实例,用于配置管理。
+    """
+    return ConfigHelper()
+
+
+def reload_config():
+    """
+    重新加载配置文件。
+
+    该函数会重新加载配置文件中的内容,适用于配置文件发生更改后需要重新加载的情况。
+    """
+    get_config().load_config()
+
+
+def get_config_value(key: str, default: str = None):
+    """
+    获取配置项的值。
+
+    :param key: 字符串,配置项的键。
+    :param default: 字符串,默认值(可选)。
+    :return: 配置项的值,如果不存在则返回默认值。
+    """
+    return get_config().get(key, default)
+
+
+def get_config_int(key: str, default: int = None):
+    """
+    获取配置项的整数值。
+
+    :param key: 字符串,配置项的键。
+    :param default: 整数,默认值(可选)。
+    :return: 配置项的整数值,如果不存在则返回默认值。
+    """
+    return get_config().get_int(key, default)
+
+
+def get_config_bool(key: str):
+    """
+    获取配置项的布尔值。
+
+    :param key: 字符串,配置项的键。
+    :return: 配置项的布尔值。
+    """
+    return get_config().get_bool(key)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+def call_openai(system_prompt: str, user_prompt: str) -> json:
+    """
+    调用OpenAI API进行对话。
+
+    :param system_prompt: 字符串,系统提示信息。
+    :param user_prompt: 字符串,用户输入的提示信息。
+    :return: JSON对象,API返回的结果。
+    """
+    return AiHelper().call_openai(system_prompt, user_prompt)
+
+
+

+ 162 - 0
SourceCode/DataMiddleware/tools/utils/ai_helper.py

@@ -0,0 +1,162 @@
+import json
+import re
+
+import tools.utils as utils
+from tools.utils.file_helper import encode_image
+from openai import OpenAI
+
+
+class AiHelper:
+
+    _ai_api_key = None
+    _ai_api_url = None
+    _ai_max_tokens = 150
+
+    def __init__(self, api_url: str=None, api_key: str=None, api_model: str=None):
+        self._ai_api_url = api_url if api_url else utils.get_config_value("ai.url")
+        self._ai_api_key = api_key if api_key else utils.get_config_value("ai.key")
+        self._api_model = api_model if api_model else utils.get_config_value("ai.model")
+        max_tokens = utils.get_config_value("ai.max_tokens")
+        if max_tokens:
+            self._ai_max_tokens = int(max_tokens)
+
+    def call_openai(self, system_prompt: str, user_prompt: str,api_url: str=None,api_key: str=None,api_model: str=None) -> json:
+        if api_url:
+            self._ai_api_url = api_url
+        if api_key:
+            self._ai_api_key = api_key
+        if api_model:
+            self._api_model = api_model
+        if self._ai_api_key is None:
+            raise Exception("AI API key 没有配置")
+        if self._ai_api_url is None:
+            raise Exception("AI API url 没有配置")
+        if self._api_model is None:
+            raise Exception("AI API model 没有配置")
+
+        utils.get_logger().info(f"调用AI API ==> Url:{self._ai_api_url},Model:{self._api_model}")
+
+        client = OpenAI(api_key=self._ai_api_key, base_url=self._ai_api_url)
+        completion = client.chat.completions.create(
+            model=self._api_model,
+            messages=[
+                {
+                    "role": "system",
+                    "content": system_prompt,
+                },
+                {
+                    "role": "user",
+                    "content": user_prompt,
+                },
+            ],
+            stream=False,
+            temperature=0.7,
+            response_format={"type": "json_object"},
+            # max_tokens=self._ai_max_tokens,
+        )
+        try:
+            response = completion.model_dump_json()
+            result = {}
+            response_json = json.loads(response)
+            res_str = self._extract_message_content(response_json)
+            result_data = self._parse_response(res_str, True)
+            if result_data:
+                result["data"] = result_data
+                usage = response_json["usage"]
+                result["completion_tokens"] = usage.get("completion_tokens", 0)
+                result["prompt_tokens"] = usage.get("prompt_tokens", 0)
+                result["total_tokens"] = usage.get("total_tokens", 0)
+                utils.get_logger().info(f"AI Process JSON: {result}")
+            else:
+                utils.get_logger().info(f"AI Response: {response}")
+            return result
+        except Exception as e:
+            raise Exception(f"解析 AI 响应错误: {e}")
+
+    @staticmethod
+    def _extract_message_content(response_json: dict) -> str:
+        utils.get_logger().info(f"AI Response JSON: {response_json}")
+        if "choices" in response_json and len(response_json["choices"]) > 0:
+            choice = response_json["choices"][0]
+            message_content = choice.get("message", {}).get("content", "")
+        elif "message" in response_json:
+            message_content = response_json["message"].get("content", "")
+        else:
+            raise Exception("AI 响应中未找到有效的 choices 或 message 数据")
+
+        # 移除多余的 ```json 和 ```
+        if message_content.startswith("```json") and message_content.endswith(
+                "```"):
+            message_content = message_content[6:-3]
+
+        # 去除开头的 'n' 字符
+        if message_content.startswith("n"):
+            message_content = message_content[1:]
+        # 移除无效的转义字符和时间戳前缀
+        message_content = re.sub(r"\\[0-9]{2}", "",
+                                 message_content)  # 移除 \32 等无效转义字符
+        message_content = re.sub(r"\d{4}-\d{2}-\dT\d{2}:\d{2}:\d{2}\.\d+Z", "",
+                                 message_content)  # 移除时间戳
+        message_content = message_content.strip()  # 去除首尾空白字符
+
+        # 替换所有的反斜杠
+        message_content = message_content.replace("\\", "")
+
+        return message_content
+
+    def _parse_response(self, response: str, first=True) -> json:
+        # utils.get_logger().info(f"AI Response JSON STR: {response}")
+        try:
+            data = json.loads(response)
+            return data
+
+        except json.JSONDecodeError as e:
+            if first:
+                utils.get_logger().error(f"JSON 解析错误,去除部分特殊字符重新解析一次: {e}")
+                # 替换中文引号为空
+                message_content = re.sub(r"[“”]", "", response)  # 替换双引号
+                message_content = re.sub(r"[‘’]", "", message_content)  # 替换单引号
+                return self._parse_response(message_content, False)
+            else:
+                raise Exception(f"解析 AI 响应错误: {response} {e}")
+
+    def analyze_image_with_ai(self,image_path, api_url: str=None,api_key: str=None,api_model: str=None):
+        """调用OpenAI的API分析图片内容"""
+        if api_url:
+            self._ai_api_url = api_url
+        if api_key:
+            self._ai_api_key = api_key
+        if api_model:
+            self._api_model = api_model
+        if self._ai_api_key is None:
+            raise Exception("AI API key 没有配置")
+        if self._ai_api_url is None:
+            raise Exception("AI API url 没有配置")
+        if self._api_model is None:
+            raise Exception("AI API model 没有配置")
+        try:
+            client = OpenAI(api_key=self._ai_api_key, base_url=self._ai_api_url)
+            base64_str = encode_image(image_path)
+            response = client.chat.completions.create(
+                model=self._api_model,
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text",
+                             "text": "请总结图片中的表格,供RAG系统embedding使用。要求以文本的信息列出,定额编号对应的详细信息,其中表格的列名中显示了定额编号,行名中显示了电算代号。定额编号所示的列代表了这一类定额,通过项目的不同条件来区分,比如长度、地质条件等;而电算代号所示的行则代表了具体的材料、人工等的消耗量,表示在特定定额编号所示的条件下,具体的资源(人力或材料)消耗量。"},
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": base64_str
+                                }
+                            }
+                        ]
+                    }
+                ],
+                timeout=600
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            print(f"调用AI接口时出错: {e}")
+        return ''

+ 79 - 0
SourceCode/DataMiddleware/tools/utils/config_helper.py

@@ -0,0 +1,79 @@
+import os
+
+import yaml
+
+
+class ConfigHelper:
+    _instance = None
+
+    # 默认配置文件路径
+    default_config_path = os.path.join(os.path.dirname(__file__), "..", "config.yml")
+
+    # 类变量存储加载的配置
+    _config = None
+    _path = None
+
+    def __new__(cls, *args, **kwargs):
+        if not cls._instance:
+            cls._instance = super(ConfigHelper, cls).__new__(cls)
+        return cls._instance
+
+    def load_config(self, path=None):
+        if self._config is None:
+            if not path:
+                # print(f"使用默认配置文件:{self.default_config_path}")
+                self._path = self.default_config_path
+            else:
+                self._path = path
+            if not os.path.exists(self._path):
+                raise FileNotFoundError(f"没有找到文件或目录:'{self._path}'")
+        with open(self._path, "r", encoding="utf-8") as file:
+            self._config = yaml.safe_load(file)
+        # 合并环境变量配置
+        self._merge_env_vars()
+        # print(f"加载的配置文件内容:{self._config}")
+        return self._config
+
+    def _merge_env_vars(self, env_prefix="APP_"):  # 环境变量前缀为 APP_
+        for key, value in os.environ.items():
+            if key.startswith(env_prefix):
+                config_key = key[len(env_prefix) :].lower()
+                self._set_nested_key(self._config, config_key.split("__"), value)
+
+    def _set_nested_key(self, config, keys, value):
+        if len(keys) > 1:
+            if keys[0] not in config or not isinstance(config[keys[0]], dict):
+                config[keys[0]] = {}
+            self._set_nested_key(config[keys[0]], keys[1:], value)
+        else:
+            config[keys[0]] = value
+
+    def get(self, key: str, default: str = None):
+        if self._config is None:
+            self.load_config(self._path)
+        keys = key.split(".")
+        config = self._config
+        for k in keys:
+            if isinstance(config, dict) and k in config:
+                config = config[k]
+            else:
+                return default
+        return config
+
+    def get_bool(self, key: str) -> bool:
+        val = str(self.get(key, "0"))
+        return True if val.lower() == "true" or val == "1" else False
+
+    def get_int(self, key: str, default: int = 0) -> int:
+        val = self.get(key)
+        if not val:
+            return default
+        try:
+            return int(val)
+        except ValueError:
+            return default
+
+    def get_all(self):
+        if self._config is None:
+            self.load_config(self._path)
+        return self._config

+ 17 - 0
SourceCode/DataMiddleware/tools/utils/file_helper.py

@@ -0,0 +1,17 @@
+import os
+import mimetypes
+import base64
+
+
+
+
+def encode_image(path: str):
+    # 根据文件扩展名获取 MIME 类型
+    mime_type, _ = mimetypes.guess_type(path)
+    if mime_type is None:
+        mime_type = 'image/jpeg'  # 默认使用 jpeg 类型
+    # 将图片编码为 base64 字符串
+    with open(path, "rb") as image_file:
+        encoded_string = base64.b64encode(image_file.read())
+        base64Str = encoded_string.decode("utf-8")
+        return f"data:{mime_type};base64,{base64Str}"

+ 113 - 0
SourceCode/DataMiddleware/tools/utils/logger_helper.py

@@ -0,0 +1,113 @@
+import logging
+import os
+from datetime import datetime
+from logging.handlers import TimedRotatingFileHandler
+
+from tools.utils.config_helper import ConfigHelper
+
+
+class LoggerHelper:
+    """
+    日志辅助类,用于创建和提供日志记录器实例
+    该类实现了单例模式,确保在整个应用程序中只有一个日志记录器实例被创建和使用
+    """
+
+    _instance = None
+    config = ConfigHelper()
+    _log_file_name = f"{config.get("logger.file_name", "log")}.log"
+    _log_file_path = config.get("logger.file_path", "./logs")
+    _log_level_string = config.get("logger.level", "INFO")
+
+    def __new__(cls, *args, **kwargs):
+        """
+        实现单例模式,确保日志记录器仅被创建一次
+        如果尚未创建实例,则创建并初始化日志记录器
+        """
+        if not cls._instance:
+            cls._instance = super(LoggerHelper, cls).__new__(cls, *args, **kwargs)
+            try:
+                cls._instance._initialize_logger()
+            except Exception as e:
+                raise Exception(f"配置logger出错: {e}")
+        return cls._instance
+
+    @property
+    def logger(self):
+        return self._logger
+
+    def _initialize_logger(self):
+        """
+        初始化日志记录器,包括设置日志级别、创建处理器和格式化器,并将它们组合起来
+        """
+        log_level = self._get_log_level()
+        self._logger = logging.getLogger("app_logger")
+        self._logger.setLevel(log_level)
+
+        if not os.path.exists(self._log_file_path):
+            os.makedirs(self._log_file_path)
+
+        # 创建按日期分割的文件处理器
+        file_handler = TimedRotatingFileHandler(
+            os.path.join(self._log_file_path, self._log_file_name),
+            when="midnight",
+            interval=1,
+            backupCount=7,
+            encoding="utf-8",
+        )
+        file_handler.setLevel(log_level)
+
+        # 创建控制台处理器
+        console_handler = logging.StreamHandler()
+        console_handler.setLevel(logging.DEBUG)
+
+        # 创建格式化器
+        formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
+
+        # 将格式化器添加到处理器
+        file_handler.setFormatter(formatter)
+        console_handler.setFormatter(formatter)
+
+        # 将处理器添加到日志记录器
+        self._logger.addHandler(file_handler)
+        self._logger.addHandler(console_handler)
+
+    def _get_log_level(self):
+        try:
+            # 尝试将字符串转换为 logging 模块中的日志级别常量
+            log_level = getattr(logging, self._log_level_string.upper())
+            if not isinstance(log_level, int):
+                raise ValueError
+            return log_level
+        except (AttributeError, ValueError):
+            raise ValueError(
+                f"配置logger出错: Unknown level: '{self._log_level_string}'"
+            )
+
+    @classmethod
+    def get_logger(cls):
+        """
+        提供初始化后的日志记录器实例
+        :return: 初始化后的日志记录器实例
+        """
+        if not cls._instance:
+            cls._instance = cls()
+        return cls._instance._logger
+
+    @classmethod
+    def clean_log_file(cls, day: int):
+        if not os.path.exists(cls._log_file_path):
+            return
+        for filename in os.listdir(cls._log_file_path):
+            if filename != cls._log_file_name and filename.startswith(
+                cls._log_file_name
+            ):
+                try:
+                    file_path = os.path.join(cls._log_file_path, filename)
+                    file_time = datetime.strptime(
+                        filename.replace(f"{cls._log_file_name}.", ""), "%Y-%m-%d"
+                    )
+                    if (datetime.now() - file_time).days > day:
+                        os.remove(file_path)
+                        cls.get_logger().info(f"  删除日志文件: {file_path}")
+                except Exception as e:
+                    cls.get_logger().error(f"删除日志文件出错: {filename} {e}")

+ 117 - 0
SourceCode/DataMiddleware/tools/utils/mysql_helper.py

@@ -0,0 +1,117 @@
+import pymysql
+import tools.utils as utils
+from pymysql.cursors import DictCursor
+
+
+class MySQLHelper:
+
+    def __init__(self):
+        try:
+            self.host = utils.get_config_value("mysql.host")
+            self.user = utils.get_config_value("mysql.user")
+            self.password = utils.get_config_value("mysql.password")
+            self.db = utils.get_config_value("mysql.db")
+            self.port = int(utils.get_config_value("mysql.port"))
+            self.charset = utils.get_config_value("mysql.charset")
+            self.connection = None
+        except Exception as e:
+            utils.get_logger().error(f"加载数据库配置文件失败: {e}")
+
+    def connect(self):
+        try:
+            self.connection = pymysql.connect(
+                host=self.host,
+                user=self.user,
+                password=self.password,
+                db=self.db,
+                port=self.port,
+                charset=self.charset,
+                cursorclass=DictCursor,
+            )
+            # utils.get_logger().info(f"成功连接到数据库:{self.db}。")
+        except pymysql.MySQLError as e:
+            utils.get_logger().error(
+                f"数据库连接失败: {self.host}:{self.port} {self.db}"
+            )
+            self.connection = None  # 确保连接失败时设置为 None
+            raise Exception(f"连接数据库失败: {e}")
+
+    def disconnect(self):
+        if self.connection and self.connection.open:
+            self.connection.close()
+            # utils.get_logger().info("数据库连接已关闭。")
+
+    def execute_query(self, query, params=None):
+        try:
+            with self.connection.cursor() as cursor:
+                cursor.execute(query, params)
+                result = cursor.fetchall()
+                return result
+        except pymysql.MySQLError as e:
+            utils.get_logger().error(f"执行查询时出错:{e}")
+            return None
+
+    def execute_non_query(self, query, params=None):
+        if isinstance(params, list) and all(isinstance(p, tuple) for p in params):
+            self.execute_many(query, params)
+        elif isinstance(params, tuple):
+            self.execute(query, params)
+        else:
+            self.execute(query, (params,))
+
+    def execute(self, query, params=None):
+        try:
+            with self.connection.cursor() as cursor:
+                cursor.execute(query, params)
+                self.connection.commit()
+        except pymysql.MySQLError as e:
+            utils.get_logger().error(f"执行非查询时出错:{e}")
+            self.connection.rollback()
+
+    def execute_many(self, query, params: list):
+        if isinstance(params, list) and all(isinstance(p, tuple) for p in params):
+            try:
+                with self.connection.cursor() as cursor:
+                    cursor.executemany(query, params)
+                    self.connection.commit()
+            except pymysql.MySQLError as e:
+                utils.get_logger().error(f"执行非查询时出错:{e}")
+                self.connection.rollback()
+        else:
+            raise ValueError("参数必须是元组列表")
+
+    def fetch_one(self, query, params=None):
+        try:
+            with self.connection.cursor() as cursor:
+                cursor.execute(query, params)
+                result = cursor.fetchone()
+                return result
+        except pymysql.MySQLError as e:
+            utils.get_logger().error(f"获取一条记录时出错:{e}")
+            return None
+
+    def __enter__(self):
+        """
+        当进入上下文时自动调用此方法。
+        它负责建立连接,并将当前实例返回,以便在上下文中使用。
+
+        :return: 返回实例本身,以便在上下文中使用。
+        """
+
+        self.connect()  # 建立连接
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        """
+        当退出上下文时自动调用此方法。
+        无论上下文中的代码是否完成或因异常退出,此方法都会被调用,以确保断开连接。
+
+        :param exc_type: 异常类型, 如果没有异常则为None。
+        :param exc_value: 异常值, 如果没有异常则为None。
+        :param traceback: 异常的traceback对象, 如果没有异常则为None。
+        """
+        if exc_type:
+            utils.get_logger().error(
+                f"数据库发生异常,断开连接。异常类型:{exc_type}, 异常值:{exc_value} traceback: {traceback}"
+            )
+        self.disconnect()  # 断开连接