diff options
Diffstat (limited to 'src/conventionalrp/core')
| -rw-r--r-- | src/conventionalrp/core/parser.py | 96 |
1 files changed, 80 insertions, 16 deletions
diff --git a/src/conventionalrp/core/parser.py b/src/conventionalrp/core/parser.py index d5b91da..f451178 100644 --- a/src/conventionalrp/core/parser.py +++ b/src/conventionalrp/core/parser.py @@ -1,4 +1,4 @@ -import json +import json5 import re from pathlib import Path @@ -15,7 +15,7 @@ class Parser: with open(rules_path, "r", encoding="utf-8") as f: file_content = f.read() - rules = json.loads(file_content) + rules = json5.loads(file_content) # validation rule format if rules is None: @@ -34,24 +34,88 @@ class Parser: with open(log_path, "r", encoding="utf-8") as f: log_content = f.read().splitlines() + current_metadata = None + current_content = [] + # Iterate each line of the log for line in log_content: # pass blank line if not line.strip(): continue - # try to match the current line by rules + # metadata detect + is_metadata = False for rule in self.rules: - pattern = rule.get("pattern") - rule_type = rule.get("type") - match = re.search(pattern, line) - if match: - # matched - content = match.group(1).strip() - parsed_data.append({"content": content, "type": rule_type}) - break - # no matched, marked as an unknown type - else: - parsed_data.append({"content": line.strip(), "type": "unknown"}) - - return parsed_data + if rule.get("type") == "metadata": + patterns = rule.get("patterns", []) + for pattern in patterns: + match = re.search(pattern, line) + if match: + # If it's metadata, save the previous content + if current_metadata: + parsed_data.append({ + **current_metadata, + "content": current_content + }) + current_content = [] + + # Parsing new metadata + current_metadata = {} + groups = rule.get("groups", []) + for i, key in enumerate(groups): + if i + 1 <= len(match.groups()): # Ensure effective + current_metadata[key] = match.group(i + 1).strip() + is_metadata = True + break + if is_metadata: + break + + if is_metadata: + continue # The metadata line has been processed, skip subsequent content matching + + # content detect + remaining_line = line + while remaining_line: + matched = False + for rule in self.rules: + # pass metadata rule + if rule["type"] == "metadata": + continue + + for pattern in rule["patterns"]: + match = re.match(pattern, remaining_line) + if match: + # If the matching content is not the beginning, it means that there is unknown content in front of it + if match.start() > 0: + current_content.append({ + "type": "unknown", + "content": remaining_line[:match.start()] + }) + + # Extract matched content + entry = {"type": rule["type"], "content": match.group(0)} + for i, group in enumerate(rule["groups"]): + entry[group] = match.group(i+1).strip() if match.group(i+1) else "" + + current_content.append(entry) + remaining_line = remaining_line[match.end():].lstrip() + matched = True + break + if matched: + break + + if not matched: + current_content.append({ + "type": "unknown", + "content": remaining_line + }) + remaining_line = "" + + # Process the last line + if current_metadata: + parsed_data.append({ + **current_metadata, + "content": current_content + }) + + return parsed_data
\ No newline at end of file |
