import json5
import re
from pathlib import Path


class Parser:
    def __init__(self):
        self.rules = []

    def load_rules(self, rules_path: str):
        """Load parsing rules."""
        if not Path(rules_path).exists():
            raise FileNotFoundError(f"No such file or directory: {rules_path} ")

        with open(rules_path, "r", encoding="utf-8") as f:
            file_content = f.read()

        rules = json5.loads(file_content)

        # validation rule format
        if rules is None:
            raise ValueError(f"Rule file cannot be empty.")
        # to be continue...

        self.rules = rules
        print(f"Rules loaded: {rules}\n")

    def parse_log(self, log_path: str):
        """Parse the TRPG log based on loaded rules."""
        parsed_data = []

        if not Path(log_path).exists():
            raise FileNotFoundError(f"No such file or directory: {log_path} ")

        with open(log_path, "r", encoding="utf-8") as f:
            log_content = f.read().splitlines()

        current_metadata = None
        current_content = []

        # Iterate each line of the log
        for line in log_content:
            # pass blank line
            if not line.strip():
                continue

            # metadata detect
            is_metadata = False
            metadata_content = self.rules.get("metadata")
            patterns = metadata_content.get("patterns", [])
            for pattern in patterns:
                match = re.search(pattern, line)
                if match:
                    # If it's metadata, save the previous content
                    if current_metadata:
                        parsed_data.append({
                            **current_metadata,
                            "content": current_content
                        })
                        current_content = []

                    # Parsing new metadata
                    current_metadata = {}
                    groups = metadata_content.get("groups", [])
                    for i, key in enumerate(groups):
                        if i + 1 <= len(match.groups()):  # Ensure effective
                            current_metadata[key] = match.group(i + 1).strip()
                    is_metadata = True
                    break

            if is_metadata:
                continue  # The metadata line has been processed, skip subsequent content matching

            # content detect
            remaining_line = line
            rules = self.rules.get("content")
            while remaining_line:
                matched = False
                
                for rule in rules:
                    # pass metadata rule
                    if rule["type"] == "metadata":
                        continue

                    for pattern in rule["patterns"]:
                        match = re.match(pattern, remaining_line)
                        if match:
                            # If the matching content is not the beginning, it means that there is unknown content in front of it
                            if match.start() > 0:
                                current_content.append({
                                    "type": "unknown",
                                    "content": remaining_line[:match.start()]
                                })
                            
                            # Extract matched content
                            entry = {"type": rule["type"], "content": match.group(0)}
                            for i, group in enumerate(rule["groups"]):
                                entry[group] = match.group(i+1).strip() if match.group(i+1) else ""
                            
                            current_content.append(entry)
                            remaining_line = remaining_line[match.end():].lstrip()
                            matched = True
                            break
                    if matched:
                        break
                
                if not matched:
                    current_content.append({
                        "type": "unknown",
                        "content": remaining_line
                    })
                    remaining_line = ""

        # Process the last line
        if current_metadata:
            parsed_data.append({
                **current_metadata,
                "content": current_content
            })

        return parsed_data