Bläddra i källkod

add tools 分割pdf

YueYunyun 4 månader sedan
förälder
incheckning
0d91071970

+ 5 - 4
SourceCode/DataMiddleware/requirements.txt

@@ -3,8 +3,9 @@ PyYAML==6.0.2
 Requests==2.32.3
 openai==1.58.1
 pandas~=2.2.3
-pdfplumber~=0.11.5
-pytesseract~=0.3.13
-pillow~=11.1.0
 Flask~=3.1.0
-pdf2image~=1.17.0
+click~=8.1.8
+PyPDF2~=3.0.1
+pillow~=11.1.0
+pathlib~=1.0.1
+PyMuPDF~=1.25.3

+ 0 - 0
SourceCode/DataMiddleware/tools/__init__.py


+ 417 - 0
SourceCode/DataMiddleware/tools/pdf_json/v1/01.json

@@ -0,0 +1,417 @@
+[
+	{
+		"output_dir": "01沟槽及管道/",
+		"page_configs": [
+			{
+				"start_page": 3,
+				"end_page": 8,
+				"output_name": "01沟槽及管道@01光(电)缆沟@01光(电)缆沟.pdf"
+			},
+			{
+				"start_page": 9,
+				"end_page": 13,
+				"output_name": "01沟槽及管道@02铺设光(电)缆管道@01铺设光(电)缆管道.pdf"
+			}
+		]
+	},
+	{
+		"output_dir": "02光(电)缆线路/",
+		"page_configs": [
+			{
+				"start_page": 14,
+				"end_page": 23,
+				"output_name": "02光(电)缆线路@01敷设长途光缆@01敷设埋式光缆.pdf"
+			},
+			{
+				"start_page": 24,
+				"end_page": 26,
+				"output_name": "02光(电)缆线路@01敷设长途光缆@02敷设管道光缆.pdf"
+			},
+			{
+				"start_page": 27,
+				"end_page": 32,
+				"output_name": "02光(电)缆线路@01敷设长途光缆@03敷设槽道光缆.pdf"
+			},
+			{
+				"start_page": 33,
+				"end_page": 33,
+				"output_name": "02光(电)缆线路@02安装光纤监测设备@01安装光纤监测设备.pdf"
+			},
+			{
+				"start_page": 34,
+				"end_page": 42,
+				"output_name": "02光(电)缆线路@03敷设长途电缆@01敷设埋式长途电缆.pdf"
+			},
+			{
+				"start_page": 43,
+				"end_page": 45,
+				"output_name": "02光(电)缆线路@03敷设长途电缆@02敷设管道长途电缆.pdf"
+			},
+			{
+				"start_page": 46,
+				"end_page": 50,
+				"output_name": "02光(电)缆线路@03敷设长途电缆@02敷设长途电缆.pdf"
+			},
+			{
+				"start_page": 51,
+				"end_page": 64,
+				"output_name": "02光(电)缆线路@04敷设地区通信光(电)缆@01敷设地区通信光(电)缆.pdf"
+			}
+		]
+	},
+	{
+		"output_dir": "03配线架(柜)及走线架(槽)安装/",
+		"page_configs": [
+			{
+				"start_page": 65,
+				"end_page": 69,
+				"output_name": "03配线架(柜)及走线架(槽)安装@01配线架(柜)及走线架(槽)安装@01配线架(柜)及走线架(槽)安装.pdf"
+			}
+		]
+	},
+	{
+		"output_dir": "04传输及接入网设备/",
+		"page_configs": [
+			{
+				"start_page": 70,
+				"end_page": 73,
+				"output_name": "04传输及接入网设备@01安装传输及接入网设备@01安装传输及接入网设备.pdf"
+			},
+			{
+				"start_page": 74,
+				"end_page": 77,
+				"output_name": "04传输及接入网设备@01安装传输及接入网设备@02安装传输及接入网设备.pdf"
+			},
+			{
+				"start_page": 78,
+				"end_page": 79,
+				"output_name": "04传输及接入网设备@01安装传输及接入网设备@03安装传输及接入网设备.pdf"
+			},
+			{
+				"start_page": 80,
+				"end_page": 82,
+				"output_name": "04传输及接入网设备@01安装传输及接入网设备@04安装传输及接入网设备.pdf"
+			},
+			{
+				"start_page": 83,
+				"end_page": 84,
+				"output_name": "04传输及接入网设备@02安装传输及接入网网管设备@01安装传输及接入网网管设备.pdf"
+			}
+		]
+	},
+	{
+		"output_dir": "05数据通信网设备/",
+		"page_configs": [
+			{
+				"start_page": 85,
+				"end_page": 86,
+				"output_name": "05数据通信网设备@01安装数据网设备@01安装数据网设备.pdf"
+			},
+			{
+				"start_page": 87,
+				"end_page": 88,
+				"output_name": "05数据通信网设备@02安装数据网网管设备@01安装数据网网管设备.pdf"
+			}
+		]
+	},
+	{
+		"output_dir": "06电话交换设备/",
+		"page_configs": [
+			{
+				"start_page": 89,
+				"end_page": 91,
+				"output_name": "06电话交换设备@01电话交换设备@01电话交换设备.pdf"
+			}
+		]
+	},
+	{
+		"output_dir": "07有线调度通信系统设备/",
+		"page_configs": [
+			{
+				"start_page": 92,
+				"end_page": 94,
+				"output_name": "07有线调度通信系统设备@01安装与调试调度交换机@01安装与调试调度交换机.pdf"
+			},
+			{
+				"start_page": 95,
+				"end_page": 99,
+				"output_name": "07有线调度通信系统设备@02安装与调试叫班系统@01安装与调试叫班系统.pdf"
+			}
+		]
+	},
+	{
+		"output_dir": "08数字移动通信系统(GSM-R)/",
+		"page_configs": [
+			{
+				"start_page": 100,
+				"end_page": 106,
+				"output_name": "08数字移动通信系统(GSM-R)@01架设漏泄同轴电缆@01架设漏泄同轴电缆.pdf"
+			},
+			{
+				"start_page": 107,
+				"end_page": 110,
+				"output_name": "08数字移动通信系统(GSM-R)@02架设通信铁塔@01架设通信铁塔.pdf"
+			},
+			{
+				"start_page": 111,
+				"end_page": 115,
+				"output_name": "08数字移动通信系统(GSM-R)@03安装无线网及附属设备@01安装基站及无线中继设备.pdf"
+			},
+			{
+				"start_page": 116,
+				"end_page": 117,
+				"output_name": "08数字移动通信系统(GSM-R)@03安装无线网及附属设备@02安装无线终端设备.pdf"
+			},
+			{
+				"start_page": 118,
+				"end_page": 119,
+				"output_name": "08数字移动通信系统(GSM-R)@03安装无线网及附属设备@03安装漏缆监测设备.pdf"
+			},
+			{
+				"start_page": 120,
+				"end_page": 122,
+				"output_name": "08数字移动通信系统(GSM-R)@04安装核心网设备@01安装核心网设备.pdf"
+			},
+			{
+				"start_page": 123,
+				"end_page": 124,
+				"output_name": "08数字移动通信系统(GSM-R)@05移动通信系统调试@01移动通信系统调试.pdf"
+			}
+		]
+	},
+	{
+		"output_dir": "09会议电视系统设备/",
+		"page_configs": [
+			{
+				"start_page": 125,
+				"end_page": 128,
+				"output_name": "09会议电视系统设备@01会议电视系统设备@01会议电视系统设备.pdf"
+			}
+		]
+	},
+	{
+		"output_dir": "10综合视频监控系统设备/",
+		"page_configs": [
+			{
+				"start_page": 129,
+				"end_page": 139,
+				"output_name": "10综合视频监控系统设备@01综合视频监控系统设备@01综合视频监控系统设备.pdf"
+			},
+			{
+				"start_page": 140,
+				"end_page": 143,
+				"output_name": "10综合视频监控系统设备@02安装视频采集点及汇集点设备@01安装视频采集点及汇集点设备.pdf"
+			}
+		]
+	},
+	{
+		"output_dir": "11应急通信系统设备/",
+		"page_configs": [
+			{
+				"start_page": 144,
+				"end_page": 146,
+				"output_name": "11应急通信系统设备@01安装应急通信系统设备@01安装应急通信系统设备.pdf"
+			},
+			{
+				"start_page": 147,
+				"end_page": 149,
+				"output_name": "11应急通信系统设备@02安装隧道应急电话设备@01安装隧道应急电话设备.pdf"
+			}
+		]
+	},
+	{
+		"output_dir": "12时钟及时间同步系统设备/",
+		"page_configs": [
+			{
+				"start_page": 150,
+				"end_page": 152,
+				"output_name": "12时钟及时间同步系统设备@01时钟及时间同步系统设备@01时钟及时间同步系统设备.pdf"
+			}
+		]
+	},
+	{
+		"output_dir": "13通信电源设备及防雷接地装置/",
+		"page_configs": [
+			{
+				"start_page": 153,
+				"end_page": 157,
+				"output_name": "13通信电源设备及防雷接地装置@01安装通信电源设备@01安装通信电源设备.pdf"
+			},
+			{
+				"start_page": 158,
+				"end_page": 160,
+				"output_name": "13通信电源设备及防雷接地装置@02安装防雷及地线装置@01安装防雷及地线装置.pdf"
+			}
+		]
+	},
+	{
+		"output_dir": "14电源及设备房屋环境监控设备/",
+		"page_configs": [
+			{
+				"start_page": 161,
+				"end_page": 162,
+				"output_name": "14电源及设备房屋环境监控设备@01电源及设备房屋环境监控设备@01电源及设备房屋环境监控设备.pdf"
+			}
+		]
+	},
+	{
+		"output_dir": "15综合布线/",
+		"page_configs": [
+			{
+				"start_page": 163,
+				"end_page": 168,
+				"output_name": "15综合布线@01综合布线@01综合布线.pdf"
+			}
+		]
+	},
+	{
+		"output_dir": "16附录_工程量组成/",
+		"page_configs": [
+			{
+				"start_page": 170,
+				"end_page": 170,
+				"output_name": "16附录_工程量组成@01沟槽及管道@01挖填光(电)缆沟.pdf"
+			},
+			{
+				"start_page": 171,
+				"end_page": 172,
+				"output_name": "16附录_工程量组成@01沟槽及管道@02铺设光(电)缆管道.pdf"
+			},
+			{
+				"start_page": 173,
+				"end_page": 178,
+				"output_name": "16附录_工程量组成@01沟槽及管道@01挖填光(电)缆沟.pdf"
+			},
+			{
+				"start_page": 179,
+				"end_page": 179,
+				"output_name": "16附录_工程量组成@02光(电)缆敷设@02安装光纤监测设备.pdf"
+			},
+			{
+				"start_page": 180,
+				"end_page": 182,
+				"output_name": "16附录_工程量组成@02光(电)缆敷设@03敷设长途电缆.pdf"
+			},
+			{
+				"start_page": 183,
+				"end_page": 186,
+				"output_name": "16附录_工程量组成@02光(电)缆敷设@04敷设地区通信光(电)缆.pdf"
+			},
+			{
+				"start_page": 187,
+				"end_page": 188,
+				"output_name": "16附录_工程量组成@03配线架(柜)及走线架(槽)安装@01配线架(柜)及走线架(槽)安装.pdf"
+			},
+			{
+				"start_page": 189,
+				"end_page": 193,
+				"output_name": "16附录_工程量组成@04传输及接入网设备@01安装传输及接入网设备.pdf"
+			},
+			{
+				"start_page": 194,
+				"end_page": 194,
+				"output_name": "16附录_工程量组成@04传输及接入网设备@02安装传输及接入网网管设备.pdf"
+			},
+				{
+				"start_page": 195,
+				"end_page": 195,
+				"output_name": "16附录_工程量组成@05数据通信网设备@01安装数据网设备.pdf"
+			},
+			{
+				"start_page": 196,
+				"end_page": 196,
+				"output_name": "16附录_工程量组成@05数据通信网设备@02安装数据网网管设备.pdf"
+			},
+			{
+				"start_page": 197,
+				"end_page": 197,
+				"output_name": "16附录_工程量组成@06电话交换设备@01电话交换设备.pdf"
+			},
+			{
+				"start_page": 198,
+				"end_page": 198,
+				"output_name": "16附录_工程量组成@07有线调度通信系统设备@01安装与调试调度交换机.pdf"
+			},
+			{
+				"start_page": 199,
+				"end_page": 200,
+				"output_name": "16附录_工程量组成@07有线调度通信系统设备@02安装与调试叫班系统.pdf"
+			},
+				{
+				"start_page": 201,
+				"end_page": 201,
+				"output_name": "16附录_工程量组成@08数字移动通信系统(GSM-R)@01架设漏泄同轴电缆.pdf"
+			},
+			{
+				"start_page": 202,
+				"end_page": 202,
+				"output_name": "16附录_工程量组成@08数字移动通信系统(GSM-R)@02架设通信铁塔.pdf"
+			},
+			{
+				"start_page": 203,
+				"end_page": 205,
+				"output_name": "16附录_工程量组成@08数字移动通信系统(GSM-R)@03安装无线网及附属设备.pdf"
+			},
+			{
+				"start_page": 206,
+				"end_page": 208,
+				"output_name": "16附录_工程量组成@08数字移动通信系统(GSM-R)@04安装核心网设备.pdf"
+			},
+			{
+				"start_page": 209,
+				"end_page": 209,
+				"output_name": "16附录_工程量组成@08数字移动通信系统(GSM-R)@05移动通信系统调试.pdf"
+			},
+			{
+				"start_page": 210,
+				"end_page": 210,
+				"output_name": "16附录_工程量组成@09会议电视系统设备@01会议电视系统设备.pdf"
+			},
+			{
+				"start_page": 211,
+				"end_page": 215,
+				"output_name": "16附录_工程量组成@10综合视频监控系统设备@01视频采集点及汇集点设备.pdf"
+			},
+			{
+				"start_page": 216,
+				"end_page": 216,
+				"output_name": "16附录_工程量组成@10综合视频监控系统设备@02视频采节点.pdf"
+			},
+				{
+				"start_page": 217,
+				"end_page": 217,
+				"output_name": "16附录_工程量组成@11应急通信系统设备@01安装应急通信系统设备.pdf"
+			},
+			{
+				"start_page": 218,
+				"end_page": 218,
+				"output_name": "16附录_工程量组成@11应急通信系统设备@02安装隧道应急电话设备.pdf"
+			},
+			{
+				"start_page": 219,
+				"end_page": 219,
+				"output_name": "16附录_工程量组成@12时钟及时间同步系统设备@01时钟及时间同步系统设备.pdf"
+			},
+			{
+				"start_page": 220,
+				"end_page": 221,
+				"output_name": "16附录_工程量组成@13通信电源设备及防雷接地装置@01安装通信电源设备.pdf"
+			},
+			{
+				"start_page": 222,
+				"end_page": 222,
+				"output_name": "16附录_工程量组成@13通信电源设备及防雷接地装置@02安装防雷及地线装置.pdf"
+			},
+			{
+				"start_page": 223,
+				"end_page": 223,
+				"output_name": "16附录_工程量组成@14电源及设备房屋环境监控设备@01电源及设备房屋环境监控设备.pdf"
+			},
+			{
+				"start_page": 223,
+				"end_page": 226,
+				"output_name": "16附录_工程量组成@15综合布线@01综合布线.pdf"
+			}
+		]
+	}
+]

+ 562 - 0
SourceCode/DataMiddleware/tools/pdf_json/v1/03.json

@@ -0,0 +1,562 @@
+[
+  {
+    "output_dir": "01洞身开挖、出砟/",
+    "page_configs": [
+      {
+        "start_page": 3,
+        "end_page": 6,
+        "output_name": "01洞身开挖、出砟@01洞身开挖@01隧道断面有效面积≤40m2(轨道运输).pdf"
+      },
+      {
+        "start_page": 7,
+        "end_page": 9,
+        "output_name": "01洞身开挖、出砟@01洞身开挖@02隧道断面有效面积≤40m2(汽车运输).pdf"
+      },
+      {
+        "start_page": 10,
+        "end_page": 13,
+        "output_name": "01洞身开挖、出砟@01洞身开挖@03隧道断面有效面积≤60m2(轨道运输).pdf"
+      },
+      {
+        "start_page": 14,
+        "end_page": 16,
+        "output_name": "01洞身开挖、出砟@01洞身开挖@04隧道断面有效面积≤60m2(汽车运输).pdf"
+      },
+      {
+        "start_page": 17,
+        "end_page": 19,
+        "output_name": "01洞身开挖、出砟@01洞身开挖@05隧道断面有效面积≤85m.pdf"
+      },
+      {
+        "start_page": 20,
+        "end_page": 22,
+        "output_name": "01洞身开挖、出砟@01洞身开挖@06隧道断面有效面积>85m2.pdf"
+      },
+      {
+        "start_page": 23,
+        "end_page": 24,
+        "output_name": "01洞身开挖、出砟@02出砟运输@01正洞轨道出砟.pdf"
+      },
+      {
+        "start_page": 25,
+        "end_page": 27,
+        "output_name": "01洞身开挖、出砟@02出砟运输@02正洞汽车出砟.pdf"
+      },
+      {
+        "start_page": 28,
+        "end_page": 31,
+        "output_name": "01洞身开挖、出砟@03正洞通过辅助坑道出砟运输@01通过有轨斜井出砟.pdf"
+      },
+      {
+        "start_page": 32,
+        "end_page": 32,
+        "output_name": "01洞身开挖、出砟@03正洞通过辅助坑道出砟运输@02通过无轨斜井出砟.pdf"
+      },
+      {
+        "start_page": 33,
+        "end_page": 34,
+        "output_name": "01洞身开挖、出砟@03正洞通过辅助坑道出砟运输@03通过平行导坑出砟.pdf"
+      },
+      {
+        "start_page": 35,
+        "end_page": 38,
+        "output_name": "01洞身开挖、出砟@03正洞通过辅助坑道出砟运输@04通过竖井出砟.pdf"
+      },
+      {
+        "start_page": 39,
+        "end_page": 39,
+        "output_name": "01洞身开挖、出砟@04洞外运砟@01出砟洞外汽车倒运、增运.pdf"
+      },
+      {
+        "start_page": 40,
+        "end_page": 40,
+        "output_name": "01洞身开挖、出砟@04洞外运砟@02有轨洞外增运.pdf"
+      },
+      {
+        "start_page": 41,
+        "end_page": 42,
+        "output_name": "01洞身开挖、出砟@05开挖台架和仰拱栈桥@01开挖台架.pdf"
+      },
+      {
+        "start_page": 43,
+        "end_page": 44,
+        "output_name": "01洞身开挖、出砟@05开挖台架和仰拱栈桥@02简易仰拱栈桥.pdf"
+      }
+    ]
+  },
+  {
+    "output_dir": "02支护/",
+    "page_configs": [
+      {
+        "start_page": 47,
+        "end_page": 48,
+        "output_name": "02支护@01喷射混凝土@01喷射普通混凝土.pdf"
+      },
+      {
+        "start_page": 49,
+        "end_page": 50,
+        "output_name": "02支护@01喷射混凝土@02喷射纤维混凝土.pdf"
+      },
+      {
+        "start_page": 51,
+        "end_page": 53,
+        "output_name": "02支护@02锚杆@01锚杆.pdf"
+      },
+      {
+        "start_page": 54,
+        "end_page": 55,
+        "output_name": "02支护@03钢筋网、格栅钢架、型钢钢架@01钢筋网、格栅钢架、型钢钢架.pdf"
+      },
+      {
+        "start_page": 56,
+        "end_page": 58,
+        "output_name": "02支护@04超前支护@01钻孔.pdf"
+      },
+      {
+        "start_page": 59,
+        "end_page": 60,
+        "output_name": "02支护@04超前支护@02注浆.pdf"
+      },
+      {
+        "start_page": 61,
+        "end_page": 62,
+        "output_name": "02支护@05拆除临时支护@01拆除临时支护.pdf"
+      },
+      {
+        "start_page": 63,
+        "end_page": 63,
+        "output_name": "02支护@06综合接地焊接@01综合接地焊接.pdf"
+      },
+      {
+        "start_page": 64,
+        "end_page": 66,
+        "output_name": "02支护@07支护台架@01支护台架.pdf"
+      }
+    ]
+  },
+  {
+    "output_dir": "03衬砌/",
+    "page_configs": [
+      {
+        "start_page": 69,
+        "end_page": 70,
+        "output_name": "03衬砌@01衬砌模板、台架@01衬砌钢台模.pdf"
+      },
+      {
+        "start_page": 71,
+        "end_page": 72,
+        "output_name": "03衬砌@01衬砌模板、台架@02衬砌组合模板.pdf"
+      },
+      {
+        "start_page": 73,
+        "end_page": 74,
+        "output_name": "03衬砌@01衬砌模板、台架@03沟槽模板.pdf"
+      },
+      {
+        "start_page": 75,
+        "end_page": 75,
+        "output_name": "03衬砌@01衬砌模板、台架@04防水板台架.pdf"
+      },
+      {
+        "start_page": 76,
+        "end_page": 77,
+        "output_name": "03衬砌@02模筑混凝土@01混凝土集中拌制.pdf"
+      },
+      {
+        "start_page": 78,
+        "end_page": 79,
+        "output_name": "03衬砌@02模筑混凝土@02混凝土浇筑.pdf"
+      },
+      {
+        "start_page": 80,
+        "end_page": 81,
+        "output_name": "03衬砌@03钢筋@01钢筋.pdf"
+      },
+      {
+        "start_page": 82,
+        "end_page": 84,
+        "output_name": "03衬砌@04钢筋混凝土盖板@01钢筋混凝土盖板.pdf"
+      },
+      {
+        "start_page": 85,
+        "end_page": 88,
+        "output_name": "03衬砌@05防水和排水@01防水和排水.pdf"
+      },
+      {
+        "start_page": 89,
+        "end_page": 90,
+        "output_name": "03衬砌@06中心水沟@01中心水沟开挖.pdf"
+      },
+      {
+        "start_page": 91,
+        "end_page": 91,
+        "output_name": "03衬砌@06中心水沟@02钢筋混凝土预制管铺设.pdf"
+      },
+      {
+        "start_page": 92,
+        "end_page": 94,
+        "output_name": "03衬砌@06中心水沟@03深埋中心水沟检查井.pdf"
+      },
+      {
+        "start_page": 95,
+        "end_page": 96,
+        "output_name": "03衬砌@07拱顶压浆@01拱顶压浆.pdf"
+      }
+    ]
+  },
+  {
+    "output_dir": "04通风及管线路/",
+    "page_configs": [
+      {
+        "start_page": 99,
+        "end_page": 102,
+        "output_name": "04通风及管线路@01通风@01通风.pdf"
+      },
+      {
+        "start_page": 103,
+        "end_page": 106,
+        "output_name": "04通风及管线路@02高压风水管、照明、电力线路@01高压风水管、照明、电力线路.pdf"
+      }
+    ]
+  },
+  {
+    "output_dir": "05运输/",
+    "page_configs": [
+      {
+        "start_page": 109,
+        "end_page": 110,
+        "output_name": "05运输@01混凝土运输@01洞外混凝土增运.pdf"
+      },
+      {
+        "start_page": 111,
+        "end_page": 112,
+        "output_name": "05运输@01混凝土运输@02正洞混凝土运输.pdf"
+      },
+      {
+        "start_page": 113,
+        "end_page": 116,
+        "output_name": "05运输@01混凝土运输@03通过辅助坑道运输混凝土.pdf"
+      },
+      {
+        "start_page": 117,
+        "end_page": 118,
+        "output_name": "05运输@02材料运输@01正洞运输材料.pdf"
+      },
+      {
+        "start_page": 119,
+        "end_page": 122,
+        "output_name": "05运输@02材料运输@02通过辅助坑道材料运输.pdf"
+      }
+    ]
+  },
+  {
+    "output_dir": "06洞门及明洞/",
+    "page_configs": [
+      {
+        "start_page": 125,
+        "end_page": 127,
+        "output_name": "06洞门及明洞@01洞门及明洞混凝土@01洞门及明洞混凝土.pdf"
+      },
+      {
+        "start_page": 128,
+        "end_page": 131,
+        "output_name": "06洞门及明洞@02洞门及明洞砌筑@01洞门及明洞砌筑.pdf"
+      },
+      {
+        "start_page": 132,
+        "end_page": 138,
+        "output_name": "06洞门及明洞@03洞门附属@01洞门附属.pdf"
+      },
+      {
+        "start_page": 139,
+        "end_page": 142,
+        "output_name": "06洞门及明洞@04明洞附属@01明洞附属.pdf"
+      }
+    ]
+  },
+  {
+    "output_dir": "07辅助坑道/",
+    "page_configs": [
+      {
+        "start_page": 145,
+        "end_page": 148,
+        "output_name": "07辅助坑道@01辅助坑道开挖@01斜井(轨道矿车提升运输).pdf"
+      },
+      {
+        "start_page": 149,
+        "end_page": 151,
+        "output_name": "07辅助坑道@01辅助坑道开挖@02斜井(汽车运输).pdf"
+      },
+      {
+        "start_page": 152,
+        "end_page": 155,
+        "output_name": "07辅助坑道@01辅助坑道开挖@03平行导坑(轨道运输).pdf"
+      },
+      {
+        "start_page": 156,
+        "end_page": 158,
+        "output_name": "07辅助坑道@01辅助坑道开挖@04平行导坑(汽车运输).pdf"
+      },
+      {
+        "start_page": 159,
+        "end_page": 164,
+        "output_name": "07辅助坑道@01辅助坑道开挖@05竖井.pdf"
+      },
+      {
+        "start_page": 165,
+        "end_page": 166,
+        "output_name": "07辅助坑道@02出砟运输@01斜井(轨道矿车提升运输).pdf"
+      },
+      {
+        "start_page": 167,
+        "end_page": 167,
+        "output_name": "07辅助坑道@02出砟运输@02斜井(汽车运输).pdf"
+      },
+      {
+        "start_page": 168,
+        "end_page": 169,
+        "output_name": "07辅助坑道@02出砟运输@03平行导坑(轨道运输).pdf"
+      },
+      {
+        "start_page": 170,
+        "end_page": 170,
+        "output_name": "07辅助坑道@02出砟运输@04平行导坑(汽车运输).pdf"
+      },
+      {
+        "start_page": 171,
+        "end_page": 174,
+        "output_name": "07辅助坑道@02出砟运输@05竖井(提升运输).pdf"
+      },
+      {
+        "start_page": 175,
+        "end_page": 176,
+        "output_name": "07辅助坑道@03衬砌@01混凝土集中拌制.pdf"
+      },
+      {
+        "start_page": 177,
+        "end_page": 177,
+        "output_name": "07辅助坑道@03衬砌@02混凝土浇筑.pdf"
+      },
+      {
+        "start_page": 178,
+        "end_page": 179,
+        "output_name": "07辅助坑道@04通风@01斜井.pdf"
+      },
+      {
+        "start_page": 180,
+        "end_page": 181,
+        "output_name": "07辅助坑道@04通风@02平行导坑.pdf"
+      },
+      {
+        "start_page": 182,
+        "end_page": 183,
+        "output_name": "07辅助坑道@04通风@03竖井.pdf"
+      },
+      {
+        "start_page": 184,
+        "end_page": 185,
+        "output_name": "07辅助坑道@05管线路@01斜井.pdf"
+      },
+      {
+        "start_page": 186,
+        "end_page": 187,
+        "output_name": "07辅助坑道@05管线路@02平行导坑.pdf"
+      },
+      {
+        "start_page": 188,
+        "end_page": 189,
+        "output_name": "07辅助坑道@05管线路@03竖井.pdf"
+      },
+      {
+        "start_page": 190,
+        "end_page": 191,
+        "output_name": "07辅助坑道@06混凝土运输@01斜井.pdf"
+      },
+      {
+        "start_page": 192,
+        "end_page": 193,
+        "output_name": "07辅助坑道@06混凝土运输@02平行导坑.pdf"
+      },
+		{
+        "start_page": 194,
+        "end_page": 195,
+        "output_name": "07辅助坑道@06混凝土运输@03竖井.pdf"
+      },
+      {
+        "start_page": 196,
+        "end_page": 197,
+        "output_name": "07辅助坑道@07材料运输@01斜井.pdf"
+      },
+      {
+        "start_page": 198,
+        "end_page": 199,
+        "output_name": "07辅助坑道@07材料运输@02平行导坑.pdf"
+      },
+      {
+        "start_page": 200,
+        "end_page": 201,
+        "output_name": "07辅助坑道@07材料运输@03竖井.pdf"
+      },
+      {
+        "start_page": 202,
+        "end_page": 203,
+        "output_name": "07辅助坑道@08辅助坑道模板、台架@01组合钢模架.pdf"
+      },
+      {
+        "start_page": 204,
+        "end_page": 206,
+        "output_name": "07辅助坑道@08辅助坑道模板、台架@02开挖台架.pdf"
+      }
+    ]
+  },
+  {
+    "output_dir": "08超前地质预报及监控量测/",
+    "page_configs": [
+      {
+        "start_page": 209,
+        "end_page": 210,
+        "output_name": "08超前地质预报及监控量测@01超前地质预报探测@01风钻加深炮孔超前水平探测.pdf"
+      },
+      {
+        "start_page": 211,
+        "end_page": 213,
+        "output_name": "08超前地质预报及监控量测@01超前地质预报探测@02钻机冲击钻超前水平探测.pdf"
+      },
+      {
+        "start_page": 214,
+        "end_page": 216,
+        "output_name": "08超前地质预报及监控量测@01超前地质预报探测@03钻机钻孔取芯超前水平探测.pdf"
+      },
+      {
+        "start_page": 217,
+        "end_page": 218,
+        "output_name": "08超前地质预报及监控量测@01超前地质预报探测@04地震波反射法物理探测.pdf"
+      },
+      {
+        "start_page": 219,
+        "end_page": 220,
+        "output_name": "08超前地质预报及监控量测@02施工监控量测@01施工监控量测.pdf"
+      }
+    ]
+  },
+  {
+    "output_dir": "09改扩建工程/",
+    "page_configs": [
+      {
+        "start_page": 223,
+        "end_page": 224,
+        "output_name": "09改扩建工程@01围岩开挖@01围岩开挖.pdf"
+      },
+      {
+        "start_page": 225,
+        "end_page": 227,
+        "output_name": "09改扩建工程@02圬工凿除@01圬工凿除.pdf"
+      },
+      {
+        "start_page": 228,
+        "end_page": 229,
+        "output_name": "09改扩建工程@03洞身衬砌@01洞身衬砌.pdf"
+      },
+      {
+        "start_page": 230,
+        "end_page": 230,
+        "output_name": "09改扩建工程@04出砟@01出砟.pdf"
+      },
+      {
+        "start_page": 231,
+        "end_page": 232,
+        "output_name": "09改扩建工程@05支护@01喷射混凝土.pdf"
+      },
+      {
+        "start_page": 233,
+        "end_page": 235,
+        "output_name": "09改扩建工程@05支护@02锚杆.pdf"
+      },
+      {
+        "start_page": 236,
+        "end_page": 238,
+        "output_name": "09改扩建工程@05支护@03钢筋网、格栅钢架、型钢钢架.pdf"
+      },
+      {
+        "start_page": 239,
+        "end_page": 239,
+        "output_name": "09改扩建工程@06防水和排水@01防水板.pdf"
+      },
+      {
+        "start_page": 240,
+        "end_page": 241,
+        "output_name": "09改扩建工程@06防水和排水@02衬砌背后压浆.pdf"
+      },
+      {
+        "start_page": 242,
+        "end_page": 242,
+        "output_name": "09改扩建工程@06防水和排水@03盲沟、止水带、透水软管.pdf"
+      },
+      {
+        "start_page": 243,
+        "end_page": 246,
+        "output_name": "09改扩建工程@06防水和排水@04漏水处理.pdf"
+      },
+      {
+        "start_page": 247,
+        "end_page": 249,
+        "output_name": "09改扩建工程@07其他@01线路加固.pdf"
+      },
+      {
+        "start_page": 250,
+        "end_page": 250,
+        "output_name": "09改扩建工程@07其他@02管线路铺拆.pdf"
+      },
+      {
+        "start_page": 251,
+        "end_page": 252,
+        "output_name": "09改扩建工程@07其他@03管线路使用费、照明用电.pdf"
+      }
+    ]
+  },
+  {
+    "output_dir": "10隧道机械化施工/",
+    "page_configs": [
+      {
+        "start_page": 255,
+        "end_page": 256,
+        "output_name": "10隧道机械化施工@01凿岩台车机械化开挖@01凿岩台车机械化开挖.pdf"
+      },
+      {
+        "start_page": 257,
+        "end_page": 259,
+        "output_name": "10隧道机械化施工@02辅助坑道机械化出砟@01正洞自斜井底皮带机出砟(配合钻爆法施工).pdf"
+      },
+      {
+        "start_page": 260,
+        "end_page": 261,
+        "output_name": "10隧道机械化施工@02辅助坑道机械化出砟@02平导出砟(挖装机装砟).pdf"
+      },
+      {
+        "start_page": 262,
+        "end_page": 263,
+        "output_name": "10隧道机械化施工@03衬砌机械化施工@01衬砌台车及模架.pdf"
+      },
+      {
+        "start_page": 264,
+        "end_page": 265,
+        "output_name": "10隧道机械化施工@03衬砌机械化施工@02栈桥.pdf"
+      },
+      {
+        "start_page": 266,
+        "end_page": 267,
+        "output_name": "10隧道机械化施工@03衬砌机械化施工@03防水板机械自动铺设.pdf"
+      },
+      {
+        "start_page": 268,
+        "end_page": 269,
+        "output_name": "10隧道机械化施工@04支护机械化施工@01湿喷机械手喷射混凝土.pdf"
+      },
+      {
+        "start_page": 270,
+        "end_page": 272,
+        "output_name": "10隧道机械化施工@04支护机械化施工@02凿岩台车锚杆作业.pdf"
+      }
+    ]
+  }
+]

+ 4 - 0
SourceCode/DataMiddleware/tools/pdf_split/__init__.py

@@ -0,0 +1,4 @@
+from processor import PDFProcessor
+from model import SplitModel
+
+__all__ = ['SplitModel', 'PDFProcessor']

+ 8 - 0
SourceCode/DataMiddleware/tools/pdf_split/main.py

@@ -0,0 +1,8 @@
+from processor import PDFProcessor
+
+def main():
+    """PDF拆分工具的主入口函数"""
+    PDFProcessor.split("01")
+
+if __name__ == '__main__':
+    main()

+ 26 - 0
SourceCode/DataMiddleware/tools/pdf_split/model.py

@@ -0,0 +1,26 @@
+from dataclasses import dataclass
+from typing import List
+
+@dataclass
+class PageConfig:
+    """单个PDF拆分页面配置
+    
+    Attributes:
+        start_page (int): 起始页码
+        end_page (int): 结束页码
+        output_name (str): 输出文件名称
+    """
+    start_page: int
+    end_page: int
+    output_name: str
+
+@dataclass
+class SplitModel:
+    """PDF拆分配置结构体
+    
+    Attributes:
+        output_dir (str): 输出相对目录路径,将与外部指定的基础目录拼接形成最终输出路径
+        page_configs (List[PageConfig]): 页面拆分配置列表
+    """
+    output_dir: str
+    page_configs: List[PageConfig]

+ 257 - 0
SourceCode/DataMiddleware/tools/pdf_split/processor.py

@@ -0,0 +1,257 @@
+import json,os
+from typing import List
+from PyPDF2 import PdfReader, PdfWriter
+from model import SplitModel,PageConfig
+from PIL import Image
+import io
+import pymupdf  
+
+class PDFProcessor:
+    """PDF处理器类,负责执行PDF文件的拆分操作"""
+
+    def __init__(self):
+        pass
+    
+    s="""
+    按照我提供的目录整理信息,页数要准确,要求:
+    1. 每章的标题精确到每节的一、二..的页数。  例如 第一章 第一节 一、xxxx 二、xxxx 。
+    2. 返回的结构体:```typescript
+    type PageConfig = {
+      start_page: number; // 起始页码
+      end_page: number; // 结束页码
+      output_name: string; // 输出文件名称
+    };
+    type SplitModel = {
+      output_dir: string; // 输出目录
+      page_configs: PageConfig[]; // 页面配置数组
+    };
+  ```
+    3. 输出文件名 格式 章@节@小节.pdf 例如 第一章 第一节 一、xxxx 的格式为 01xxxx@01xxxx@01xxxx.pdf (xxxx为具体标题内容)
+    4. 输出目录路径为 章节/ 例如 第一章 就是 01xxxx/
+    5. 目录一定要完整,不能有遗漏,不能有多余的目录
+    6. 帮我整理1,2,3,4,5,6,7,8,9,10章的目录信息,并返回SplitModel的json数组,一个章节一个SplitModel
+    7. end_page不能与下一个start_page相同
+    """
+
+    _generated_pdfs = []  # 类变量,用于存储生成的PDF文件路径
+
+    @staticmethod
+    def split(filename:str)-> None:
+        """将PDF文件按指定配置拆分成多个PDF文件"""
+        version = "v1"
+        base_json_dir = "./tools/pdf_json/"
+        base_input_dir = "./temp_files/pdf/source/"
+        base_output_dir = "./temp_files/pdf/output/"
+        json_file = f"{base_json_dir}/{version}/{filename}.json"
+        input_file = f"{base_input_dir}/{version}/{filename}.pdf"
+        output_dir = f"{base_output_dir}/{version}/{filename}/"
+        # 清空生成的PDF文件列表
+        PDFProcessor._generated_pdfs = []
+
+        try:
+            # 读取并解析JSON文件
+            with open(json_file, 'r', encoding='utf-8') as f:
+                json_data = json.load(f)
+
+            # 将JSON数据转换为SplitModel对象列表
+            split_models = []
+            for item in json_data:
+                page_configs = [PageConfig(**page) for page in item['page_configs']]
+                split_model = SplitModel(output_dir=item['output_dir'], page_configs=page_configs)
+                split_models.append(split_model)
+
+            # 调用batch_split_pdf进行处理
+            PDFProcessor.batch_split_pdf(input_file, output_dir, split_models)
+            
+            # 所有PDF文件拆分完成后,执行图片转换
+            PDFProcessor.convert_pdf_images(PDFProcessor._generated_pdfs)
+            
+            
+            print("PDF文件拆分成功!")
+        except FileNotFoundError:
+            print(f"错误: 找不到JSON文件 {json_file}")
+            return
+        except json.JSONDecodeError as e:
+            print(f"错误: JSON文件格式无效 {str(e)}")
+            return
+        except Exception as e:
+            print(f"处理过程中发生错误: {str(e)}")
+            return
+
+    @staticmethod
+    def batch_split_pdf(input_file: str, base_output_dir: str, split_models: List[SplitModel]) -> None:
+        """批量处理多个PDF拆分任务
+
+        Args:
+            input_file: 输入PDF文件路径
+            base_output_dir: 基础输出目录路径
+            split_models: SplitModel配置对象数组
+        """
+        try:
+            for split_model in split_models:
+                try:
+                    PDFProcessor.split_pdf(input_file, base_output_dir, split_model)
+                except Exception as e:
+                    print(f"处理拆分任务时发生错误: {str(e)}")
+                    continue
+        except Exception as e:
+            print(f"批量处理PDF文件时发生错误: {str(e)}")
+            return
+
+    @staticmethod
+    def split_pdf(input_file: str, base_output_dir: str, split_model: SplitModel) -> None:
+        """将PDF文件按指定配置拆分成多个PDF文件,并为每个拆分的PDF文件执行图片转换
+
+        Args:
+            input_file: 输入PDF文件路径
+            base_output_dir: 基础输出目录路径
+            split_model: SplitModel配置对象
+        """
+        try:
+            # 确保输出目录存在
+            output_dir = os.path.join(f"{base_output_dir}pdf/", split_model.output_dir)
+            os.makedirs(output_dir, exist_ok=True)
+            
+            # 读取PDF文件
+            reader = PdfReader(input_file)
+            total_pages = len(reader.pages)
+
+            # 处理每个页面配置
+            for page_config in split_model.page_configs:
+                try:
+                    # 验证页码范围
+                    if page_config.start_page < 1 or page_config.end_page > total_pages or page_config.start_page > page_config.end_page:
+                        print(f"警告: 页码范围 {page_config.start_page}-{page_config.end_page} 无效,已跳过")
+                        continue
+                    
+                    # 创建新的PDF文件
+                    writer = PdfWriter()
+                    
+                    # 添加指定范围的页面
+                    for page_num in range(page_config.start_page - 1, page_config.end_page):
+                        writer.add_page(reader.pages[page_num])
+                    
+                    # 生成输出文件名
+                    output_name = page_config.output_name
+                    if not page_config.output_name.endswith(".pdf"):
+                        output_name = f"{output_name}.pdf"
+                    output_file = os.path.join(output_dir, output_name)
+                    
+                    # 保存拆分后的PDF文件
+                    with open(output_file, 'wb') as output:
+                        writer.write(output)
+                        
+                    print(f"成功创建文件: {output_file}")
+                    PDFProcessor._generated_pdfs.append(output_file)
+                    
+                except Exception as e:
+                    print(f"处理页面配置时发生错误: {str(e)}")
+                    continue
+        except Exception as e:
+            print(f"处理PDF文件时发生错误: {str(e)}")
+            return
+    @staticmethod
+    def convert_pdf_images(generated_pdfs: List[str]) -> None:
+        """处理PDF文件的图片转换
+
+        Args:
+            generated_pdfs: 生成的PDF文件路径列表
+        """
+        print("开始处理图片转换...")
+        for pdf_file in generated_pdfs:
+            try:
+                result = PDFProcessor.extract_and_merge_images(pdf_file)
+                if not result:
+                    print(f"图片转换失败: {pdf_file}")
+            except Exception as e:
+                print(f"图片转换过程中发生错误: {str(e)}")
+                continue
+    @staticmethod
+    def extract_and_merge_images(input_file: str, output_file: str = None) -> str:
+        try:
+            pdf_document = pymupdf.open(input_file)
+            images = []
+            total_height = 0
+            max_width = 0
+
+            # 遍历每一页提取图片
+            for page_num in range(pdf_document.page_count):
+                page = pdf_document[page_num]
+                
+                # 获取页面上的所有图片,包括内嵌图片
+                pix = page.get_pixmap(matrix=pymupdf.Matrix(2, 2))  # 使用2倍缩放以获得更好的质量
+                img_data = pix.tobytes("png")
+                
+                # 将图片字节转换为PIL Image对象
+                image = Image.open(io.BytesIO(img_data))
+                if image.mode != 'RGB':
+                    image = image.convert('RGB')
+                
+                images.append(image)
+                total_height += image.height
+                max_width = max(max_width, image.width)
+
+            # 如果没有找到图片
+            if not images:
+                print("未在PDF中找到任何图片")
+                return ''
+
+            # 创建新的图片用于拼接
+            merged_image = Image.new('RGB', (max_width, total_height))
+            y_offset = 0
+
+            # 将所有图片垂直拼接
+            for img in images:
+                x_offset = (max_width - img.width) // 2
+                merged_image.paste(img, (x_offset, y_offset))
+                y_offset += img.height
+
+            # 设置输出路径
+            if output_file is None:
+                parts = input_file.rsplit('/pdf/', 1)
+                output_file = '/pdf/'.join(parts[:-1]) + '/img/' + parts[-1]
+                output_file = os.path.splitext(output_file)[0] + "_merged.png"
+                os.makedirs(os.path.dirname(output_file), exist_ok=True)
+
+            # 根据图片数量计算目标大小
+            target_size_per_image = 100 * 1024  # 每张图片100KB
+            max_size = target_size_per_image * len(images)
+            scale = 1.0
+            quality = 95
+
+            while True:
+                temp_buffer = io.BytesIO()
+                if scale < 1.0:
+                    new_size = (int(merged_image.width * scale), int(merged_image.height * scale))
+                    resized_image = merged_image.resize(new_size, Image.Resampling.LANCZOS)
+                    resized_image.save(temp_buffer, 'PNG', optimize=True, quality=quality)
+                else:
+                    merged_image.save(temp_buffer, 'PNG', optimize=True, quality=quality)
+                
+                size = temp_buffer.tell()
+                
+                if size <= max_size:
+                    with open(output_file, 'wb') as f:
+                        f.write(temp_buffer.getvalue())
+                        print(f"成功保存图片:[{(size // 1024)} KB] {output_file}")
+                    break
+                
+                if scale > 0.5:
+                    scale *= 0.9
+                else:
+                    # 如果达到最小缩放比例,直接保存当前结果
+                    with open(output_file, 'wb') as f:
+                        f.write(temp_buffer.getvalue())
+                        print(f"成功保存图片:[{(size // 1024)} KB] {output_file}")
+                    break
+
+            return output_file
+
+        except Exception as e:
+            print(f"处理图片时发生错误: {str(e)}")
+            return ''
+
+
+
+
+