aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
author简律纯 <i@jyunko.cn>2025-03-15 21:53:01 +0800
committerGitHub <noreply@github.com>2025-03-15 21:53:01 +0800
commit5319feea52f7266029b9a3a609a3f1ae494c6a60 (patch)
tree33ea3be55c3c102f5975a1f2796cf6e34062d026
parent965771fb0d85ddb27dc6c5dd7df822d1fb318286 (diff)
parent5f01c1710d4b6ae1e0ce9fba10f1528711a2f63f (diff)
downloadconventional_role_play-5319feea52f7266029b9a3a609a3f1ae494c6a60.tar.gz
conventional_role_play-5319feea52f7266029b9a3a609a3f1ae494c6a60.zip
Merge pull request #6 from pineoncellar/debug
feat: 🎨 More standardized log parse & Add dependency json5
-rw-r--r--pyproject.toml1
-rw-r--r--src/conventionalrp/core/parser.py96
-rw-r--r--test/example_log.log8
-rw-r--r--test/example_rule.json58
-rw-r--r--uv.lock11
5 files changed, 145 insertions, 29 deletions
diff --git a/pyproject.toml b/pyproject.toml
index 37f1421..78a737a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -101,6 +101,7 @@ line-ending = "auto"
dev = [
"docutils>=0.21.2",
"furo>=2024.8.6",
+ "json5>=0.10.0",
"myst-parser>=3.0.1",
"ruff>=0.9.6",
"sphinx>=7.4.7",
diff --git a/src/conventionalrp/core/parser.py b/src/conventionalrp/core/parser.py
index d5b91da..f451178 100644
--- a/src/conventionalrp/core/parser.py
+++ b/src/conventionalrp/core/parser.py
@@ -1,4 +1,4 @@
-import json
+import json5
import re
from pathlib import Path
@@ -15,7 +15,7 @@ class Parser:
with open(rules_path, "r", encoding="utf-8") as f:
file_content = f.read()
- rules = json.loads(file_content)
+ rules = json5.loads(file_content)
# validation rule format
if rules is None:
@@ -34,24 +34,88 @@ class Parser:
with open(log_path, "r", encoding="utf-8") as f:
log_content = f.read().splitlines()
+ current_metadata = None
+ current_content = []
+
# Iterate each line of the log
for line in log_content:
# pass blank line
if not line.strip():
continue
- # try to match the current line by rules
+ # metadata detect
+ is_metadata = False
for rule in self.rules:
- pattern = rule.get("pattern")
- rule_type = rule.get("type")
- match = re.search(pattern, line)
- if match:
- # matched
- content = match.group(1).strip()
- parsed_data.append({"content": content, "type": rule_type})
- break
- # no matched, marked as an unknown type
- else:
- parsed_data.append({"content": line.strip(), "type": "unknown"})
-
- return parsed_data
+ if rule.get("type") == "metadata":
+ patterns = rule.get("patterns", [])
+ for pattern in patterns:
+ match = re.search(pattern, line)
+ if match:
+ # If it's metadata, save the previous content
+ if current_metadata:
+ parsed_data.append({
+ **current_metadata,
+ "content": current_content
+ })
+ current_content = []
+
+ # Parsing new metadata
+ current_metadata = {}
+ groups = rule.get("groups", [])
+ for i, key in enumerate(groups):
+ if i + 1 <= len(match.groups()): # Ensure effective
+ current_metadata[key] = match.group(i + 1).strip()
+ is_metadata = True
+ break
+ if is_metadata:
+ break
+
+ if is_metadata:
+ continue # The metadata line has been processed, skip subsequent content matching
+
+ # content detect
+ remaining_line = line
+ while remaining_line:
+ matched = False
+ for rule in self.rules:
+ # pass metadata rule
+ if rule["type"] == "metadata":
+ continue
+
+ for pattern in rule["patterns"]:
+ match = re.match(pattern, remaining_line)
+ if match:
+ # If the matching content is not the beginning, it means that there is unknown content in front of it
+ if match.start() > 0:
+ current_content.append({
+ "type": "unknown",
+ "content": remaining_line[:match.start()]
+ })
+
+ # Extract matched content
+ entry = {"type": rule["type"], "content": match.group(0)}
+ for i, group in enumerate(rule["groups"]):
+ entry[group] = match.group(i+1).strip() if match.group(i+1) else ""
+
+ current_content.append(entry)
+ remaining_line = remaining_line[match.end():].lstrip()
+ matched = True
+ break
+ if matched:
+ break
+
+ if not matched:
+ current_content.append({
+ "type": "unknown",
+ "content": remaining_line
+ })
+ remaining_line = ""
+
+ # Process the last line
+ if current_metadata:
+ parsed_data.append({
+ **current_metadata,
+ "content": current_content
+ })
+
+ return parsed_data \ No newline at end of file
diff --git a/test/example_log.log b/test/example_log.log
index aff1d3a..b7e8597 100644
--- a/test/example_log.log
+++ b/test/example_log.log
@@ -10,6 +10,12 @@ MIKU(2754533655) 2025-01-27 19:58:39
以实玛利(1316702392) 2025-01-27 19:58:42
“很高兴认识你,我是以实玛利”
+麦奎恩·马瑟斯(602380092) 2025-01-27 20:05:18
+“你好,”#向前伸出手,“鄙人马瑟斯”
+
+麦奎恩·马瑟斯(602380092) 2025-01-27 20:05:18
+【这人不简单啊】#和对方握手,“幸会幸会”
+
以实玛利(1316702392) 2025-01-27 20:00:02
(白师傅,能不能别念了)
@@ -32,4 +38,4 @@ MIKU(2754533655) 2025-01-27 20:02:06
也站起身子探头探脑的看了看办公室
MIKU(2754533655) 2025-01-27 20:02:18
-可以注意到瑞德曼的办公桌上有一份卷宗,卷宗上写着“4·22 袭警案调查报告”的字样: \ No newline at end of file
+可以注意到瑞德曼的办公桌上有一份卷宗,卷宗上写着“4·22袭警案调查报告”的字样: \ No newline at end of file
diff --git a/test/example_rule.json b/test/example_rule.json
index d385f38..0cb5b6c 100644
--- a/test/example_rule.json
+++ b/test/example_rule.json
@@ -1,32 +1,66 @@
[
{
- "pattern": "^(\\S+)\\((\\d+)\\)\\s+(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2})",
+ // 匹配日志元数据,提取 id、QQ 账号和时间。例如:墨勒托.DW(1571806261) 2025-01-27 19:58:15
"type": "metadata",
- "description": "匹配日志元数据,提取 id、QQ 账号和时间。例如:墨勒托.DW(1571806261) 2025-01-27 19:58:15"
+ "patterns": [
+ "^(\\S+)\\((\\d+)\\)\\s+(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2})"
+ ],
+ "groups": [
+ "user_name",
+ "user_id",
+ "time"
+ ]
},
{
- "pattern": "^#(.+)",
"type": "action",
- "description": "匹配行动,以 # 开头。例如:# 我推开门"
+ "patterns": [
+ "^#s*((?:(?![“”\"(【】]).)+)"
+ ], // 排除后续特殊符号
+ "groups": [
+ "action_content"
+ ]
},
{
- "pattern": "“(.+)”",
"type": "speech",
- "description": "匹配玩家发言,双引号内的内容。例如:\"你好,我是冒险者\""
+ "patterns": [
+ "[“](.+?)[”]", // 中文引号
+ "\"(.*?)\"", // 英文引号
+ "”(.+?)“" // 混合引号
+ ],
+ "groups": [
+ "speech_content"
+ ]
},
{
- "pattern": "\\((.+)\\)",
"type": "ooc_speech",
- "description": "匹配场外发言,括号内的内容。例如:(今天没时间跑团)"
+ "patterns": [
+ // "((.*?))", // 英文括号
+ "((.*?))", // 中文括号
+ // "((.*)", // 未闭合英文括号
+ "((.*)" // 未闭合中文括号
+ ],
+ "groups": [
+ "ooc_content"
+ ]
},
{
- "pattern": "^(?:[\\.。]([^.。].+))",
+ // 匹配掷骰指令,以 . 或 。开头但是不匹配连续的指令前缀。例如:匹配".ra智力",不匹配"。。。"
"type": "dice_order",
- "description": "匹配掷骰指令,以 . 或 。 开头但是不匹配连续的指令前缀。例如:匹配.ra智力,不匹配'。。。'"
+ "patterns": [
+ "^(?:[\\.。]([^.。].+))"
+ ],
+ "groups": [
+ "dice_command"
+ ]
},
{
- "pattern": "【(.+)】",
+ // 匹配角色心理活动。例如:【这里好可怕】
"type": "thought",
- "description": "匹配角色心理活动。例如:【这里好可怕】"
+ "patterns": [
+ "【(.+)】"
+ ],
+ "groups": [
+ "thought_content"
+ ]
}
] \ No newline at end of file
diff --git a/uv.lock b/uv.lock
index 72093e4..e38f683 100644
--- a/uv.lock
+++ b/uv.lock
@@ -179,6 +179,7 @@ source = { editable = "." }
dev = [
{ name = "docutils" },
{ name = "furo" },
+ { name = "json5" },
{ name = "myst-parser", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
{ name = "myst-parser", version = "4.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
{ name = "ruff" },
@@ -196,6 +197,7 @@ dev = [
dev = [
{ name = "docutils", specifier = ">=0.21.2" },
{ name = "furo", specifier = ">=2024.8.6" },
+ { name = "json5", specifier = ">=0.10.0" },
{ name = "myst-parser", specifier = ">=3.0.1" },
{ name = "ruff", specifier = ">=0.9.6" },
{ name = "sphinx", specifier = ">=7.4.7" },
@@ -291,6 +293,15 @@ wheels = [
]
[[package]]
+name = "json5"
+version = "0.10.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/85/3d/bbe62f3d0c05a689c711cff57b2e3ac3d3e526380adb7c781989f075115c/json5-0.10.0.tar.gz", hash = "sha256:e66941c8f0a02026943c52c2eb34ebeb2a6f819a0be05920a6f5243cd30fd559", size = 48202 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/aa/42/797895b952b682c3dafe23b1834507ee7f02f4d6299b65aaa61425763278/json5-0.10.0-py3-none-any.whl", hash = "sha256:19b23410220a7271e8377f81ba8aacba2fdd56947fbb137ee5977cbe1f5e8dfa", size = 34049 },
+]
+
+[[package]]
name = "markdown-it-py"
version = "3.0.0"
source = { registry = "https://pypi.org/simple" }