diff options
| author | 2025-03-15 21:53:01 +0800 | |
|---|---|---|
| committer | 2025-03-15 21:53:01 +0800 | |
| commit | 5319feea52f7266029b9a3a609a3f1ae494c6a60 (patch) | |
| tree | 33ea3be55c3c102f5975a1f2796cf6e34062d026 | |
| parent | 965771fb0d85ddb27dc6c5dd7df822d1fb318286 (diff) | |
| parent | 5f01c1710d4b6ae1e0ce9fba10f1528711a2f63f (diff) | |
| download | conventional_role_play-5319feea52f7266029b9a3a609a3f1ae494c6a60.tar.gz conventional_role_play-5319feea52f7266029b9a3a609a3f1ae494c6a60.zip | |
Merge pull request #6 from pineoncellar/debug
feat: 🎨 More standardized log parse & Add dependency json5
| -rw-r--r-- | pyproject.toml | 1 | ||||
| -rw-r--r-- | src/conventionalrp/core/parser.py | 96 | ||||
| -rw-r--r-- | test/example_log.log | 8 | ||||
| -rw-r--r-- | test/example_rule.json | 58 | ||||
| -rw-r--r-- | uv.lock | 11 |
5 files changed, 145 insertions, 29 deletions
diff --git a/pyproject.toml b/pyproject.toml index 37f1421..78a737a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,6 +101,7 @@ line-ending = "auto" dev = [ "docutils>=0.21.2", "furo>=2024.8.6", + "json5>=0.10.0", "myst-parser>=3.0.1", "ruff>=0.9.6", "sphinx>=7.4.7", diff --git a/src/conventionalrp/core/parser.py b/src/conventionalrp/core/parser.py index d5b91da..f451178 100644 --- a/src/conventionalrp/core/parser.py +++ b/src/conventionalrp/core/parser.py @@ -1,4 +1,4 @@ -import json +import json5 import re from pathlib import Path @@ -15,7 +15,7 @@ class Parser: with open(rules_path, "r", encoding="utf-8") as f: file_content = f.read() - rules = json.loads(file_content) + rules = json5.loads(file_content) # validation rule format if rules is None: @@ -34,24 +34,88 @@ class Parser: with open(log_path, "r", encoding="utf-8") as f: log_content = f.read().splitlines() + current_metadata = None + current_content = [] + # Iterate each line of the log for line in log_content: # pass blank line if not line.strip(): continue - # try to match the current line by rules + # metadata detect + is_metadata = False for rule in self.rules: - pattern = rule.get("pattern") - rule_type = rule.get("type") - match = re.search(pattern, line) - if match: - # matched - content = match.group(1).strip() - parsed_data.append({"content": content, "type": rule_type}) - break - # no matched, marked as an unknown type - else: - parsed_data.append({"content": line.strip(), "type": "unknown"}) - - return parsed_data + if rule.get("type") == "metadata": + patterns = rule.get("patterns", []) + for pattern in patterns: + match = re.search(pattern, line) + if match: + # If it's metadata, save the previous content + if current_metadata: + parsed_data.append({ + **current_metadata, + "content": current_content + }) + current_content = [] + + # Parsing new metadata + current_metadata = {} + groups = rule.get("groups", []) + for i, key in enumerate(groups): + if i + 1 <= len(match.groups()): # Ensure effective + current_metadata[key] = match.group(i + 1).strip() + is_metadata = True + break + if is_metadata: + break + + if is_metadata: + continue # The metadata line has been processed, skip subsequent content matching + + # content detect + remaining_line = line + while remaining_line: + matched = False + for rule in self.rules: + # pass metadata rule + if rule["type"] == "metadata": + continue + + for pattern in rule["patterns"]: + match = re.match(pattern, remaining_line) + if match: + # If the matching content is not the beginning, it means that there is unknown content in front of it + if match.start() > 0: + current_content.append({ + "type": "unknown", + "content": remaining_line[:match.start()] + }) + + # Extract matched content + entry = {"type": rule["type"], "content": match.group(0)} + for i, group in enumerate(rule["groups"]): + entry[group] = match.group(i+1).strip() if match.group(i+1) else "" + + current_content.append(entry) + remaining_line = remaining_line[match.end():].lstrip() + matched = True + break + if matched: + break + + if not matched: + current_content.append({ + "type": "unknown", + "content": remaining_line + }) + remaining_line = "" + + # Process the last line + if current_metadata: + parsed_data.append({ + **current_metadata, + "content": current_content + }) + + return parsed_data
\ No newline at end of file diff --git a/test/example_log.log b/test/example_log.log index aff1d3a..b7e8597 100644 --- a/test/example_log.log +++ b/test/example_log.log @@ -10,6 +10,12 @@ MIKU(2754533655) 2025-01-27 19:58:39 以实玛利(1316702392) 2025-01-27 19:58:42
“很高兴认识你,我是以实玛利”
+麦奎恩·马瑟斯(602380092) 2025-01-27 20:05:18
+“你好,”#向前伸出手,“鄙人马瑟斯”
+
+麦奎恩·马瑟斯(602380092) 2025-01-27 20:05:18
+【这人不简单啊】#和对方握手,“幸会幸会”
+
以实玛利(1316702392) 2025-01-27 20:00:02
(白师傅,能不能别念了)
@@ -32,4 +38,4 @@ MIKU(2754533655) 2025-01-27 20:02:06 也站起身子探头探脑的看了看办公室
MIKU(2754533655) 2025-01-27 20:02:18
-可以注意到瑞德曼的办公桌上有一份卷宗,卷宗上写着“4·22 袭警案调查报告”的字样:
\ No newline at end of file +可以注意到瑞德曼的办公桌上有一份卷宗,卷宗上写着“4·22袭警案调查报告”的字样:
\ No newline at end of file diff --git a/test/example_rule.json b/test/example_rule.json index d385f38..0cb5b6c 100644 --- a/test/example_rule.json +++ b/test/example_rule.json @@ -1,32 +1,66 @@ [
{
- "pattern": "^(\\S+)\\((\\d+)\\)\\s+(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2})",
+ // 匹配日志元数据,提取 id、QQ 账号和时间。例如:墨勒托.DW(1571806261) 2025-01-27 19:58:15
"type": "metadata",
- "description": "匹配日志元数据,提取 id、QQ 账号和时间。例如:墨勒托.DW(1571806261) 2025-01-27 19:58:15"
+ "patterns": [
+ "^(\\S+)\\((\\d+)\\)\\s+(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2})"
+ ],
+ "groups": [
+ "user_name",
+ "user_id",
+ "time"
+ ]
},
{
- "pattern": "^#(.+)",
"type": "action",
- "description": "匹配行动,以 # 开头。例如:# 我推开门"
+ "patterns": [
+ "^#s*((?:(?![“”\"(【】]).)+)"
+ ], // 排除后续特殊符号
+ "groups": [
+ "action_content"
+ ]
},
{
- "pattern": "“(.+)”",
"type": "speech",
- "description": "匹配玩家发言,双引号内的内容。例如:\"你好,我是冒险者\""
+ "patterns": [
+ "[“](.+?)[”]", // 中文引号
+ "\"(.*?)\"", // 英文引号
+ "”(.+?)“" // 混合引号
+ ],
+ "groups": [
+ "speech_content"
+ ]
},
{
- "pattern": "\\((.+)\\)",
"type": "ooc_speech",
- "description": "匹配场外发言,括号内的内容。例如:(今天没时间跑团)"
+ "patterns": [
+ // "((.*?))", // 英文括号
+ "((.*?))", // 中文括号
+ // "((.*)", // 未闭合英文括号
+ "((.*)" // 未闭合中文括号
+ ],
+ "groups": [
+ "ooc_content"
+ ]
},
{
- "pattern": "^(?:[\\.。]([^.。].+))",
+ // 匹配掷骰指令,以 . 或 。开头但是不匹配连续的指令前缀。例如:匹配".ra智力",不匹配"。。。"
"type": "dice_order",
- "description": "匹配掷骰指令,以 . 或 。 开头但是不匹配连续的指令前缀。例如:匹配.ra智力,不匹配'。。。'"
+ "patterns": [
+ "^(?:[\\.。]([^.。].+))"
+ ],
+ "groups": [
+ "dice_command"
+ ]
},
{
- "pattern": "【(.+)】",
+ // 匹配角色心理活动。例如:【这里好可怕】
"type": "thought",
- "description": "匹配角色心理活动。例如:【这里好可怕】"
+ "patterns": [
+ "【(.+)】"
+ ],
+ "groups": [
+ "thought_content"
+ ]
}
]
\ No newline at end of file @@ -179,6 +179,7 @@ source = { editable = "." } dev = [ { name = "docutils" }, { name = "furo" }, + { name = "json5" }, { name = "myst-parser", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "myst-parser", version = "4.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "ruff" }, @@ -196,6 +197,7 @@ dev = [ dev = [ { name = "docutils", specifier = ">=0.21.2" }, { name = "furo", specifier = ">=2024.8.6" }, + { name = "json5", specifier = ">=0.10.0" }, { name = "myst-parser", specifier = ">=3.0.1" }, { name = "ruff", specifier = ">=0.9.6" }, { name = "sphinx", specifier = ">=7.4.7" }, @@ -291,6 +293,15 @@ wheels = [ ] [[package]] +name = "json5" +version = "0.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/85/3d/bbe62f3d0c05a689c711cff57b2e3ac3d3e526380adb7c781989f075115c/json5-0.10.0.tar.gz", hash = "sha256:e66941c8f0a02026943c52c2eb34ebeb2a6f819a0be05920a6f5243cd30fd559", size = 48202 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/aa/42/797895b952b682c3dafe23b1834507ee7f02f4d6299b65aaa61425763278/json5-0.10.0-py3-none-any.whl", hash = "sha256:19b23410220a7271e8377f81ba8aacba2fdd56947fbb137ee5977cbe1f5e8dfa", size = 34049 }, +] + +[[package]] name = "markdown-it-py" version = "3.0.0" source = { registry = "https://pypi.org/simple" } |
