diff options
Diffstat (limited to 'src/conventionalrp')
| -rw-r--r-- | src/conventionalrp/core/processor.py | 86 | ||||
| -rw-r--r-- | src/conventionalrp/extractors/rule_extractor.py | 75 | ||||
| -rw-r--r-- | src/conventionalrp/renderers/markdown_renderer.py | 37 |
3 files changed, 164 insertions, 34 deletions
diff --git a/src/conventionalrp/core/processor.py b/src/conventionalrp/core/processor.py index 4e2f573..bc74ffb 100644 --- a/src/conventionalrp/core/processor.py +++ b/src/conventionalrp/core/processor.py @@ -1,22 +1,68 @@ +from typing import List, Dict, Any, Optional + + class Processor: - def __init__(self, rules): - self.rules = rules + """处理器,用于处理解析后的token""" + + def __init__(self, rules: Optional[Dict[str, Any]] = None): + """ + 初始化处理器 + + Args: + rules: 处理规则(可选) + """ + self.rules = rules or {} - def process_tokens(self, tokens): + def process_tokens(self, tokens: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """ + 处理token列表 + + Args: + tokens: 解析后的token列表 + + Returns: + 处理后的数据列表 + """ processed_data = [] for token in tokens: - processed_data.append(self.apply_rules(token)) + processed_token = self.apply_rules(token) + processed_data.append(processed_token) return processed_data - def apply_rules(self, token): - # Implement rule application logic here - for rule in self.rules: - if rule.matches(token): - return rule.apply(token) - return token + def apply_rules(self, token: Dict[str, Any]) -> Dict[str, Any]: + """ + 对单个token应用规则 + + Args: + token: 单个token + + Returns: + 处理后的token + """ + # 基础实现:直接返回token + # 可以在此添加更多处理逻辑 + processed = token.copy() + + # 添加处理时间戳 + if "timestamp" in processed: + processed["processed"] = True + + return processed - def generate_output(self, processed_data, format_type): - # Implement output generation logic based on format_type + def generate_output(self, processed_data: List[Dict[str, Any]], format_type: str) -> str: + """ + 生成指定格式的输出 + + Args: + processed_data: 处理后的数据 + format_type: 输出格式 (json/html/markdown) + + Returns: + 格式化后的字符串 + + Raises: + ValueError: 不支持的格式类型 + """ if format_type == "json": return self.generate_json_output(processed_data) elif format_type == "html": @@ -24,21 +70,21 @@ class Processor: elif format_type == "markdown": return self.generate_markdown_output(processed_data) else: - raise ValueError("Unsupported format type") + raise ValueError(f"Unsupported format type: {format_type}") - def generate_json_output(self, processed_data): + def generate_json_output(self, processed_data: List[Dict[str, Any]]) -> str: + """生成JSON格式输出""" import json + return json.dumps(processed_data, ensure_ascii=False, indent=2) - return json.dumps(processed_data) - - def generate_html_output(self, processed_data): - # Implement HTML output generation + def generate_html_output(self, processed_data: List[Dict[str, Any]]) -> str: + """生成HTML格式输出""" return ( "<html><body>" + "".join(f"<p>{data}</p>" for data in processed_data) + "</body></html>" ) - def generate_markdown_output(self, processed_data): - # Implement Markdown output generation + def generate_markdown_output(self, processed_data: List[Dict[str, Any]]) -> str: + """生成Markdown格式输出""" return "\n".join(f"- {data}" for data in processed_data) diff --git a/src/conventionalrp/extractors/rule_extractor.py b/src/conventionalrp/extractors/rule_extractor.py index b0d03d5..bfc60c8 100644 --- a/src/conventionalrp/extractors/rule_extractor.py +++ b/src/conventionalrp/extractors/rule_extractor.py @@ -1,3 +1,8 @@ +import json5 +from pathlib import Path +from typing import Dict, Any, Optional + + class BaseExtractor: def extract(self): raise NotImplementedError("This method should be overridden by subclasses.") @@ -7,19 +12,65 @@ class BaseExtractor: class RuleExtractor(BaseExtractor): - def __init__(self, config_file): + """规则提取器,用于从配置文件加载解析规则""" + + def __init__(self, config_file: Optional[str] = None): + """ + 初始化规则提取器 + + Args: + config_file: 规则配置文件路径(可选) + """ self.config_file = config_file - self.rules = self.load_rules_from_file() + self.rules: Dict[str, Any] = {} + if config_file: + self.rules = self.load_rules_from_file(config_file) - def load_rules_from_file(self): - import json + def load_rules_from_file(self, config_file: str) -> Dict[str, Any]: + """ + 从文件加载规则 + + Args: + config_file: 规则配置文件路径 + + Returns: + 解析后的规则字典 + + Raises: + FileNotFoundError: 文件不存在 + ValueError: 文件内容为空或格式错误 + """ + if not Path(config_file).exists(): + raise FileNotFoundError(f"Rule file not found: {config_file}") + + with open(config_file, "r", encoding="utf-8") as file: + content = file.read() + + rules = json5.loads(content) + + if not rules: + raise ValueError("Rule file cannot be empty") + + return rules - with open(self.config_file, "r") as file: - return json.load(file) + def load_rules(self, config_file: str) -> Dict[str, Any]: + """ + 加载规则(兼容旧接口) + + Args: + config_file: 规则配置文件路径 + + Returns: + 解析后的规则字典 + """ + self.rules = self.load_rules_from_file(config_file) + return self.rules - def extract(self): - # Implement rule extraction logic here - extracted_rules = [] - for rule in self.rules: - extracted_rules.append(rule) # Placeholder for actual extraction logic - return extracted_rules + def extract(self) -> Dict[str, Any]: + """ + 提取规则 + + Returns: + 规则字典 + """ + return self.rules diff --git a/src/conventionalrp/renderers/markdown_renderer.py b/src/conventionalrp/renderers/markdown_renderer.py index fab429f..9df59a2 100644 --- a/src/conventionalrp/renderers/markdown_renderer.py +++ b/src/conventionalrp/renderers/markdown_renderer.py @@ -1,17 +1,50 @@ from .base import BaseRenderer +from typing import List, Dict, Any, Union class MarkdownRenderer(BaseRenderer): - def render(self, data): + def render(self, data: Union[List[Dict[str, Any]], Dict[str, Any]]) -> str: """ Renders the given data in Markdown format. Args: - data (dict): The data to render. + data: The data to render (can be list or dict). Returns: str: The rendered Markdown string. """ + if isinstance(data, list): + return self._render_list(data) + elif isinstance(data, dict): + return self._render_dict(data) + else: + return str(data) + + def _render_list(self, data: List[Dict[str, Any]]) -> str: + """渲染列表数据为 Markdown""" + markdown_output = "# TRPG Log\n\n" + + for i, entry in enumerate(data, 1): + if entry.get("type") == "metadata": + markdown_output += f"## Entry {i}\n\n" + markdown_output += f"**Timestamp**: {entry.get('timestamp', 'N/A')} \n" + markdown_output += f"**Speaker**: {entry.get('speaker', 'N/A')} \n\n" + + content_items = entry.get("content", []) + if content_items: + markdown_output += "**Content**:\n\n" + for content in content_items: + content_type = content.get("type", "unknown") + content_text = content.get("content", "") + markdown_output += f"- [{content_type}] {content_text}\n" + markdown_output += "\n" + else: + markdown_output += f"- {entry}\n" + + return markdown_output + + def _render_dict(self, data: Dict[str, Any]) -> str: + """渲染字典数据为 Markdown""" markdown_output = "" for key, value in data.items(): markdown_output += f"## {key}\n\n{value}\n\n" |
