aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/conventionalrp/core/parser.py96
1 files changed, 80 insertions, 16 deletions
diff --git a/src/conventionalrp/core/parser.py b/src/conventionalrp/core/parser.py
index d5b91da..f451178 100644
--- a/src/conventionalrp/core/parser.py
+++ b/src/conventionalrp/core/parser.py
@@ -1,4 +1,4 @@
-import json
+import json5
import re
from pathlib import Path
@@ -15,7 +15,7 @@ class Parser:
with open(rules_path, "r", encoding="utf-8") as f:
file_content = f.read()
- rules = json.loads(file_content)
+ rules = json5.loads(file_content)
# validation rule format
if rules is None:
@@ -34,24 +34,88 @@ class Parser:
with open(log_path, "r", encoding="utf-8") as f:
log_content = f.read().splitlines()
+ current_metadata = None
+ current_content = []
+
# Iterate each line of the log
for line in log_content:
# pass blank line
if not line.strip():
continue
- # try to match the current line by rules
+ # metadata detect
+ is_metadata = False
for rule in self.rules:
- pattern = rule.get("pattern")
- rule_type = rule.get("type")
- match = re.search(pattern, line)
- if match:
- # matched
- content = match.group(1).strip()
- parsed_data.append({"content": content, "type": rule_type})
- break
- # no matched, marked as an unknown type
- else:
- parsed_data.append({"content": line.strip(), "type": "unknown"})
-
- return parsed_data
+ if rule.get("type") == "metadata":
+ patterns = rule.get("patterns", [])
+ for pattern in patterns:
+ match = re.search(pattern, line)
+ if match:
+ # If it's metadata, save the previous content
+ if current_metadata:
+ parsed_data.append({
+ **current_metadata,
+ "content": current_content
+ })
+ current_content = []
+
+ # Parsing new metadata
+ current_metadata = {}
+ groups = rule.get("groups", [])
+ for i, key in enumerate(groups):
+ if i + 1 <= len(match.groups()): # Ensure effective
+ current_metadata[key] = match.group(i + 1).strip()
+ is_metadata = True
+ break
+ if is_metadata:
+ break
+
+ if is_metadata:
+ continue # The metadata line has been processed, skip subsequent content matching
+
+ # content detect
+ remaining_line = line
+ while remaining_line:
+ matched = False
+ for rule in self.rules:
+ # pass metadata rule
+ if rule["type"] == "metadata":
+ continue
+
+ for pattern in rule["patterns"]:
+ match = re.match(pattern, remaining_line)
+ if match:
+ # If the matching content is not the beginning, it means that there is unknown content in front of it
+ if match.start() > 0:
+ current_content.append({
+ "type": "unknown",
+ "content": remaining_line[:match.start()]
+ })
+
+ # Extract matched content
+ entry = {"type": rule["type"], "content": match.group(0)}
+ for i, group in enumerate(rule["groups"]):
+ entry[group] = match.group(i+1).strip() if match.group(i+1) else ""
+
+ current_content.append(entry)
+ remaining_line = remaining_line[match.end():].lstrip()
+ matched = True
+ break
+ if matched:
+ break
+
+ if not matched:
+ current_content.append({
+ "type": "unknown",
+ "content": remaining_line
+ })
+ remaining_line = ""
+
+ # Process the last line
+ if current_metadata:
+ parsed_data.append({
+ **current_metadata,
+ "content": current_content
+ })
+
+ return parsed_data \ No newline at end of file