From 4f56bec03e050678b706ff34a9c8db186b63a4e1 Mon Sep 17 00:00:00 2001 From: pine Date: Mon, 24 Mar 2025 11:02:56 +0800 Subject: style: :art: Update rule file format --- src/conventionalrp/core/parser.py | 50 ++++++++++----------- test/example_rule.json | 94 ++++++++++++++++----------------------- 2 files changed, 63 insertions(+), 81 deletions(-) diff --git a/src/conventionalrp/core/parser.py b/src/conventionalrp/core/parser.py index f451178..f214c4c 100644 --- a/src/conventionalrp/core/parser.py +++ b/src/conventionalrp/core/parser.py @@ -23,6 +23,7 @@ class Parser: # to be continue... self.rules = rules + print(f"Rules loaded: {rules}\n") def parse_log(self, log_path: str): """Parse the TRPG log based on loaded rules.""" @@ -45,39 +46,38 @@ class Parser: # metadata detect is_metadata = False - for rule in self.rules: - if rule.get("type") == "metadata": - patterns = rule.get("patterns", []) - for pattern in patterns: - match = re.search(pattern, line) - if match: - # If it's metadata, save the previous content - if current_metadata: - parsed_data.append({ - **current_metadata, - "content": current_content - }) - current_content = [] - - # Parsing new metadata - current_metadata = {} - groups = rule.get("groups", []) - for i, key in enumerate(groups): - if i + 1 <= len(match.groups()): # Ensure effective - current_metadata[key] = match.group(i + 1).strip() - is_metadata = True - break - if is_metadata: - break + metadata_content = self.rules.get("metadata") + patterns = metadata_content.get("patterns", []) + for pattern in patterns: + match = re.search(pattern, line) + if match: + # If it's metadata, save the previous content + if current_metadata: + parsed_data.append({ + **current_metadata, + "content": current_content + }) + current_content = [] + + # Parsing new metadata + current_metadata = {} + groups = metadata_content.get("groups", []) + for i, key in enumerate(groups): + if i + 1 <= len(match.groups()): # Ensure effective + current_metadata[key] = match.group(i + 1).strip() + is_metadata = True + break if is_metadata: continue # The metadata line has been processed, skip subsequent content matching # content detect remaining_line = line + rules = self.rules.get("content") while remaining_line: matched = False - for rule in self.rules: + + for rule in rules: # pass metadata rule if rule["type"] == "metadata": continue diff --git a/test/example_rule.json b/test/example_rule.json index 0cb5b6c..750789a 100644 --- a/test/example_rule.json +++ b/test/example_rule.json @@ -1,66 +1,48 @@ -[ +{ + "metadata": { + // 匹配日志元数据,提取 id、QQ 账号和时间。例如:墨勒托.DW(1571806261) 2025-01-27 19:58:15 + "type": "metadata", + "patterns": [ + "^(\\S+)\\((\\d+)\\)\\s+(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2})" + ], + "groups": ["user_name", "user_id", "time"] + }, + "content": [ { - // 匹配日志元数据,提取 id、QQ 账号和时间。例如:墨勒托.DW(1571806261) 2025-01-27 19:58:15 - "type": "metadata", - "patterns": [ - "^(\\S+)\\((\\d+)\\)\\s+(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2})" - ], - "groups": [ - "user_name", - "user_id", - "time" - ] + "type": "action", + "patterns": ["^#s*((?:(?![“”\"(【】]).)+)"], // 排除后续特殊符号 + "groups": ["action_content"] }, { - "type": "action", - "patterns": [ - "^#s*((?:(?![“”\"(【】]).)+)" - ], // 排除后续特殊符号 - "groups": [ - "action_content" - ] + "type": "speech", + "patterns": [ + "[“](.+?)[”]", // 中文引号 + "\"(.*?)\"", // 英文引号 + "”(.+?)“" // 混合引号 + ], + "groups": ["speech_content"] }, { - "type": "speech", - "patterns": [ - "[“](.+?)[”]", // 中文引号 - "\"(.*?)\"", // 英文引号 - "”(.+?)“" // 混合引号 - ], - "groups": [ - "speech_content" - ] + "type": "ooc_speech", + "patterns": [ + // "((.*?))", // 英文括号 + "((.*?))", // 中文括号 + // "((.*)", // 未闭合英文括号 + "((.*)" // 未闭合中文括号 + ], + "groups": ["ooc_content"] }, { - "type": "ooc_speech", - "patterns": [ - // "((.*?))", // 英文括号 - "((.*?))", // 中文括号 - // "((.*)", // 未闭合英文括号 - "((.*)" // 未闭合中文括号 - ], - "groups": [ - "ooc_content" - ] + // 匹配掷骰指令,以 . 或 。开头但是不匹配连续的指令前缀。例如:匹配".ra智力",不匹配"。。。" + "type": "dice_order", + "patterns": ["^(?:[\\.。]([^.。].+))"], + "groups": ["dice_command"] }, { - // 匹配掷骰指令,以 . 或 。开头但是不匹配连续的指令前缀。例如:匹配".ra智力",不匹配"。。。" - "type": "dice_order", - "patterns": [ - "^(?:[\\.。]([^.。].+))" - ], - "groups": [ - "dice_command" - ] - }, - { - // 匹配角色心理活动。例如:【这里好可怕】 - "type": "thought", - "patterns": [ - "【(.+)】" - ], - "groups": [ - "thought_content" - ] + // 匹配角色心理活动。例如:【这里好可怕】 + "type": "thought", + "patterns": ["【(.+)】"], + "groups": ["thought_content"] } -] \ No newline at end of file + ] +} -- cgit v1.2.3-70-g09d2