From d7799f1ff7fca7525fd09c2e51f366be1d0886b5 Mon Sep 17 00:00:00 2001 From: pine Date: Thu, 13 Mar 2025 22:59:24 +0800 Subject: feat: simple parser rules load and log processing --- src/conventionalrp/core/parser.py | 49 +++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/src/conventionalrp/core/parser.py b/src/conventionalrp/core/parser.py index 32b1b9f..73f349f 100644 --- a/src/conventionalrp/core/parser.py +++ b/src/conventionalrp/core/parser.py @@ -1,15 +1,54 @@ +import json +import re +from pathlib import Path + class Parser: def __init__(self): self.rules = [] - def load_rules(self, rules): + def load_rules(self, rules_path : str): """Load parsing rules.""" + if not Path(rules_path).exists(): + raise FileNotFoundError(f"No such file or directory: {rules_path} ") + + with open(rules_path, "r", encoding="utf-8") as f: + rules = json.load(f) + + # validation rule format + if rules is None: + raise ValueError(f"Rule file cannot be empty.") + # to be continue... + self.rules = rules - def parse_log(self, log): + def parse_log(self, log_path: str): """Parse the TRPG log based on loaded rules.""" parsed_data = [] - for rule in self.rules: - # Implement rule-based parsing logic here - pass + + if not Path(log_path).exists(): + raise FileNotFoundError(f"No such file or directory: {log_path} ") + + with open(log_path, "r", encoding="utf-8") as f: + log_content = f.read().splitlines() + + # Iterate each line of the log + for line in log_content: + # pass blank line + if not line.strip(): + continue + + # try to match the current line by rules + for rule in self.rules: + pattern = rule.get("pattern") + rule_type = rule.get("type") + match = re.search(pattern, line) + if match: + # matched + content = match.group(1).strip() + parsed_data.append({"content": content, "type": rule_type}) + break + # no matched, marked as an unknown type + else: + parsed_data.append({"content": line.strip(), "type": "unknown"}) + return parsed_data \ No newline at end of file -- cgit v1.2.3-70-g09d2 From ee15a8b3174048f1c9f7f53a51d1e5b7a2410054 Mon Sep 17 00:00:00 2001 From: pine Date: Thu, 13 Mar 2025 23:00:52 +0800 Subject: feat: :page_facing_up: Add sample rule and log files --- test/example_log.log | 35 +++++++++++++++++++++++++++++++++++ test/example_rule.json | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 test/example_log.log create mode 100644 test/example_rule.json diff --git a/test/example_log.log b/test/example_log.log new file mode 100644 index 0000000..aff1d3a --- /dev/null +++ b/test/example_log.log @@ -0,0 +1,35 @@ +墨勒托.DW(1571806261) 2025-01-27 19:58:15 +#吹个口哨 + +MIKU(2754533655) 2025-01-27 19:58:39 +她和没听到一样走远了 + +卡梅拉(371697639) 2025-01-27 19:58:39 +什么奇怪的拟声词 + +以实玛利(1316702392) 2025-01-27 19:58:42 +“很高兴认识你,我是以实玛利” + +以实玛利(1316702392) 2025-01-27 20:00:02 +(白师傅,能不能别念了) + +以实玛利(1316702392) 2025-01-27 20:00:07 +(变语音团了) + +西吉斯蒙德(1826367949) 2025-01-27 20:00:36 +(有无温州酒店推荐) + +以实玛利(1316702392) 2025-01-27 20:00:43 +.rc 侦查 + +墨勒托.DW(1571806261) 2025-01-27 20:00:46 +(汉庭( + +MIKU(2754533655) 2025-01-27 20:01:58 +那么你在他看走廊探头探脑时 + +MIKU(2754533655) 2025-01-27 20:02:06 +也站起身子探头探脑的看了看办公室 + +MIKU(2754533655) 2025-01-27 20:02:18 +可以注意到瑞德曼的办公桌上有一份卷宗,卷宗上写着“4·22 袭警案调查报告”的字样: \ No newline at end of file diff --git a/test/example_rule.json b/test/example_rule.json new file mode 100644 index 0000000..d385f38 --- /dev/null +++ b/test/example_rule.json @@ -0,0 +1,32 @@ +[ + { + "pattern": "^(\\S+)\\((\\d+)\\)\\s+(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2})", + "type": "metadata", + "description": "匹配日志元数据,提取 id、QQ 账号和时间。例如:墨勒托.DW(1571806261) 2025-01-27 19:58:15" + }, + { + "pattern": "^#(.+)", + "type": "action", + "description": "匹配行动,以 # 开头。例如:# 我推开门" + }, + { + "pattern": "“(.+)”", + "type": "speech", + "description": "匹配玩家发言,双引号内的内容。例如:\"你好,我是冒险者\"" + }, + { + "pattern": "\\((.+)\\)", + "type": "ooc_speech", + "description": "匹配场外发言,括号内的内容。例如:(今天没时间跑团)" + }, + { + "pattern": "^(?:[\\.。]([^.。].+))", + "type": "dice_order", + "description": "匹配掷骰指令,以 . 或 。 开头但是不匹配连续的指令前缀。例如:匹配.ra智力,不匹配'。。。'" + }, + { + "pattern": "【(.+)】", + "type": "thought", + "description": "匹配角色心理活动。例如:【这里好可怕】" + } +] \ No newline at end of file -- cgit v1.2.3-70-g09d2