yue
/
calc24game


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
							import docx, json, re, os, sys, time
from pathlib import Path
from core.db import db
# 设置编码
sys.stdout.reconfigure(encoding='utf-8')
sys.stderr.reconfigure(encoding='utf-8')

# 定义输出文件路径
output_file = './24game_data.json'

def extract_data_from_docx(docx_path):
    print(f"正在读取文档: {docx_path}")
    try:
        # 打开Word文档
        doc = docx.Document(docx_path)
        
        # 初始化数据结构
        data = []
        current_id = 0
        success_count = 0
        error_count = 0
        skipped_count = 0
        no_solution_count = 0
        
        # 遍历文档中的表格
        for table_index, table in enumerate(doc.tables):
            # 跳过表头行
            for row_index, row in enumerate(table.rows):
                if row_index == 0 or len(row.cells) < 2:  # 跳过表头和格式不正确的行
                    skipped_count += 1
                    continue
                # 跳过表头行
                if row.cells[0].text.strip() == "" or "答案" in row.cells[0].text:
                    skipped_count += 1
                    continue
                
                # 获取单元格内容
                try:
                    # 第一个单元格包含四个数字
                    numbers_text = row.cells[0].text.strip()
                    
                    # 跳过表头或非数据行
                    if "组合" in numbers_text or "答案" in numbers_text or len(numbers_text) < 3:
                        skipped_count += 1
                        continue
                        
                    # 尝试匹配格式：1, 2, 3, 4 或 1，2，3，4（中文逗号）
                    numbers_match = re.search(r'(\d+)[,，\s]\s*(\d+)[,，\s]\s*(\d+)[,，\s]\s*(\d+)', numbers_text)
                    
                    # 尝试匹配格式：1 2 3 4（空格分隔）
                    if not numbers_match:
                        numbers_match = re.search(r'(\d+)\s+(\d+)\s+(\d+)\s+(\d+)', numbers_text)
                    
                    # 尝试匹配任意格式的四个数字
                    if not numbers_match:
                        numbers = re.findall(r'\d+', numbers_text)
                        if len(numbers) >= 4:
                            num1, num2, num3, num4 = map(int, numbers[:4])
                        else:
                            print(f"警告: 无法解析数字: {numbers_text}")
                            error_count += 1
                            continue
                    else:
                        num1 = int(numbers_match.group(1))
                        num2 = int(numbers_match.group(2))
                        num3 = int(numbers_match.group(3))
                        num4 = int(numbers_match.group(4))
                    
                    # 第二个单元格包含解答
                    solutions_text = row.cells[1].text.strip()
                    
                    # 创建新的组合
                    current_id += 1
                    combination = {
                        'id': current_id,
                        'n1': num1,
                        'n2': num2,
                        'n3': num3,
                        'n4': num4,
                        's': []
                    }
                    
                    # 检查是否有解答
                    no_solution_patterns = ["无解", "无解答", "无解答案", "无", "无解★", "无解＊", "无解*"]
                    has_solution = True
                    
                    for pattern in no_solution_patterns:
                        if pattern in solutions_text:
                            has_solution = False
                            no_solution_count += 1
                            break
                    
                    # 如果有解答，解析它们
                    if solutions_text and has_solution:
                        # 分割多个解答（根据图片中的格式，解答可能用分号、分号+空格或其他分隔符分隔）
                        solutions = re.split(r'[;；]\s*|\n|\s+(?=\d+[.、)）])', solutions_text)
                        
                        for i, sol in enumerate(solutions, 1):
                            sol = sol.strip()
                            if not sol:
                                continue
                                
                            # 移除可能的序号前缀，如 "1." 或 "(1)"
                            sol = re.sub(r'^\d+[.、)）]\s*', '', sol)
                                
                            # 检查星号数量来确定flag值
                            flag = 0
                            # 处理不同类型的星号字符
                            if "@@" in sol :
                                flag = 2
                            elif '#' in sol :
                                flag = 1
                                
                            # 移除所有类型的星号
                            sol = sol.replace("@", "").replace("#", "").replace("*", "").replace("（","(").replace("）",")").strip()
                            
                            # 添加解答
                            solution = {
                                # 'q': current_id,  # 使用当前数据的ID
                                'c': sol,
                                'f': flag
                            }
                            combination['s'].append(solution)
                    
                    # 添加到数据列表
                    data.append(combination)
                    success_count += 1
                    
                except Exception as e:
                    print(f"处理行时出错: {e}")
                    error_count += 1
                    continue
        
        print(f"数据提取完成: 成功 {success_count} 条, 错误 {error_count} 条, 跳过 {skipped_count} 条, 无解 {no_solution_count} 条")
        return data, no_solution_count
    
    except Exception as e:
        print(f"处理文档时出错: {e}")
        return [], 0

def save_to_json(data, output_path, no_solution_count):
    print(f"正在保存数据到: {output_path}")
    try:
        # 创建输出目录
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        with open(output_path, 'w', encoding='utf-8') as f:
            for i, item in enumerate(data):
                json.dump(item, f, ensure_ascii=False,  separators=(',', ':'))
                f.write('\n')
        
        # 统计解答数量
        total_solutions = sum(len(item['s']) for item in data)
        
        # 统计不同难度的解答
        flag0_count = sum(1 for item in data for sol in item['s'] if sol['f'] == 0)
        flag1_count = sum(1 for item in data for sol in item['s'] if sol['f'] == 1)
        flag2_count = sum(1 for item in data for sol in item['s'] if sol['f'] == 2)
        
        # 输出完整统计信息
        print("\n统计信息汇总:")
        print(f"总组合数: {len(data)} 个")
        print(f"有解组合: {len(data) - no_solution_count} 个")
        print(f"无解组合: {no_solution_count} 个")
        print(f"总解答数: {total_solutions} 个")
        print(f"难度统计: 普通解法 {flag0_count} 个, 一星解法 {flag1_count} 个, 二星解法 {flag2_count} 个")
        
        return True
    except Exception as e:
        print(f"保存数据时出错: {e}")
        return False

def migrate_data():
    """将JSON数据迁移到SQLite数据库"""
    # 获取JSON数据文件路径
    json_path = Path(__file__).parent / '24game_data.json'
    
    # 导入数据
    try:
        db.import_json_data(json_path)
        print("数据迁移完成")
    except Exception as e:
        print(f"数据迁移失败: {e}")
def main():
    docx_path = './24点游戏所有组合答案修正版.docx'
    
    # 检查文件是否存在
    if not os.path.exists(docx_path):
        print(f"错误: 文件不存在 {docx_path}")
        return
    
    start_time = time.time()
    print("正在提取数据，请稍候...")
    
    # 提取数据
    data, no_solution_count = extract_data_from_docx(docx_path)
    
    # 检查是否成功提取数据
    if not data:
        print("错误: 未能从文档中提取数据")
        return
    
    # 保存为JSON
    save_to_json(data, output_file, no_solution_count)
    migrate_data()
    end_time = time.time()
    print(f"\n处理完成，耗时 {end_time - start_time:.2f} 秒")
    print(f"数据已保存到: {output_file}")

if __name__ == "__main__":
    main()