From f4f9c541e9917fa614e6e1b8e737167f44c89c43 Mon Sep 17 00:00:00 2001
From: HsiangNianian <i@jyunko.cn>
Date: Sun, 4 Jan 2026 15:59:54 +0800
Subject: feat: add log processing and LLM annotation functionality

---
 utils/process_log.py | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 utils/process_log.py

(limited to 'utils/process_log.py')

diff --git a/utils/process_log.py b/utils/process_log.py
new file mode 100644
index 0000000..fffc359
--- /dev/null
+++ b/utils/process_log.py
@@ -0,0 +1,53 @@
+import glob
+import json
+import re
+
+
+def process_g_files():
+    files = glob.glob("g*.txt")
+
+    if not files:
+        print("未找到以'g'开头的txt文件")
+        return
+
+    print(f"找到 {len(files)} 个文件: {', '.join(files)}")
+
+    all_entries = []
+
+    for file_path in files:
+        try:
+            with open(file_path, "r", encoding="utf-8") as file:
+                current_paragraph = []
+
+                for line in file:
+                    stripped_line = line.rstrip("\n")
+
+                    if stripped_line.strip():
+                        current_paragraph.append(stripped_line)
+                    else:
+                        if current_paragraph:
+                            paragraph_text = "\n".join(current_paragraph)
+                            cleaned_text = re.sub(r"\(\d+\)", "", paragraph_text)
+                            all_entries.append({"text": cleaned_text})
+                            current_paragraph = []
+
+                if current_paragraph:
+                    paragraph_text = "\n".join(current_paragraph)
+                    cleaned_text = re.sub(r"\(\d+\)", "", paragraph_text)
+                    all_entries.append({"text": cleaned_text})
+
+            print(f"处理文件 {file_path} 完成")
+
+        except Exception as e:
+            print(f"处理文件 {file_path} 时出错: {e}")
+
+    output_file = "processed_logs.json"
+    with open(output_file, "w", encoding="utf-8") as f:
+        json.dump(all_entries, f, ensure_ascii=False, indent=2)
+
+    print(f"\n处理完成! 共处理 {len(all_entries)} 个段落")
+    print(f"结果已保存到 {output_file}")
+
+
+if __name__ == "__main__":
+    process_g_files()
-- 
cgit v1.2.3-70-g09d2