aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib.rs')
-rw-r--r--src/lib.rs252
1 files changed, 243 insertions, 9 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 8f19266..684001c 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,19 +1,253 @@
use pyo3::prelude::*;
+use pyo3::types::{PyDict, PyList};
+use regex::Regex;
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
-#[pyfunction]
-fn sum_as_string(a: usize, b: usize) -> PyResult<String> {
- Ok((a + b).to_string())
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[pyclass]
+pub enum TokenType {
+ Metadata,
+ DiceRoll,
+ Dialogue,
+ Action,
+ Ooc,
+ System,
+ Text,
+ Unknown,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[pyclass]
+pub struct Token {
+ #[pyo3(get, set)]
+ pub token_type: String,
+ #[pyo3(get, set)]
+ pub content: String,
+ #[pyo3(get, set)]
+ pub metadata: HashMap<String, String>,
+}
+
+#[pymethods]
+impl Token {
+ #[new]
+ fn new(token_type: String, content: String) -> Self {
+ Token {
+ token_type,
+ content,
+ metadata: HashMap::new(),
+ }
+ }
+
+ fn add_metadata(&mut self, key: String, value: String) {
+ self.metadata.insert(key, value);
+ }
+
+ fn get_metadata(&self, key: &str) -> Option<String> {
+ self.metadata.get(key).cloned()
+ }
+ fn to_dict(&self, py: Python) -> PyResult<Py<PyAny>> {
+ let dict = PyDict::new(py);
+ dict.set_item("type", &self.token_type)?;
+ dict.set_item("content", &self.content)?;
+
+ let metadata_dict = PyDict::new(py);
+ for (k, v) in &self.metadata {
+ metadata_dict.set_item(k, v)?;
+ }
+ dict.set_item("metadata", metadata_dict)?;
+
+ Ok(dict.into())
+ }
+
+ fn __repr__(&self) -> String {
+ format!("Token(type={}, content={})", self.token_type, self.content)
+ }
+}
+
+#[pyclass]
+pub struct RegexRule {
+ pattern: Regex,
+ #[pyo3(get, set)]
+ pub rule_type: String,
+ #[pyo3(get, set)]
+ pub priority: i32,
+}
+
+#[pymethods]
+impl RegexRule {
+ #[new]
+ fn new(pattern: String, rule_type: String, priority: i32) -> PyResult<Self> {
+ let regex = Regex::new(&pattern)
+ .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(
+ format!("Invalid regex pattern: {}", e)
+ ))?;
+
+ Ok(RegexRule {
+ pattern: regex,
+ rule_type,
+ priority,
+ })
+ }
+
+ fn matches(&self, text: &str) -> bool {
+ self.pattern.is_match(text)
+ }
+
+
+ fn extract(&self, text: &str) -> Option<Vec<String>> {
+ self.pattern.captures(text).map(|caps| {
+ caps.iter()
+ .skip(1) // 跳过完整匹配
+ .filter_map(|m| m.map(|m| m.as_str().to_string()))
+ .collect()
+ })
+ }
+
+ fn find_all(&self, text: &str, py: Python) -> PyResult<Py<PyAny>> {
+ let matches: Vec<(usize, usize, String)> = self.pattern
+ .find_iter(text)
+ .map(|m| (m.start(), m.end(), m.as_str().to_string()))
+ .collect();
+
+ let list = PyList::empty(py);
+ for (start, end, matched) in matches {
+ let dict = PyDict::new(py);
+ dict.set_item("start", start)?;
+ dict.set_item("end", end)?;
+ dict.set_item("text", matched)?;
+ list.append(dict)?;
+ }
+
+ Ok(list.into())
+ }
+}
+
+#[pyclass]
+pub struct TextParser {
+ rules: Vec<(Regex, String, i32)>, // (pattern, type, priority)
+}
+
+#[pymethods]
+impl TextParser {
+ #[new]
+ fn new() -> Self {
+ TextParser { rules: Vec::new() }
+ }
+
+ fn add_rule(&mut self, pattern: String, rule_type: String, priority: i32) -> PyResult<()> {
+ let regex = Regex::new(&pattern)
+ .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(
+ format!("Invalid regex: {}", e)
+ ))?;
+
+ self.rules.push((regex, rule_type, priority));
+ self.rules.sort_by(|a, b| b.2.cmp(&a.2));
+
+ Ok(())
+ }
+
+ fn parse_line(&self, text: &str) -> Vec<(String, String, usize, usize)> {
+ let mut results = Vec::new();
+ let mut processed_ranges: Vec<(usize, usize)> = Vec::new();
+ for (pattern, rule_type, _priority) in &self.rules {
+ for mat in pattern.find_iter(text) {
+ let start = mat.start();
+ let end = mat.end();
+
+ let overlaps = processed_ranges.iter().any(|(s, e)| {
+ (start >= *s && start < *e) || (end > *s && end <= *e) || (start <= *s && end >= *e)
+ });
+
+ if !overlaps {
+ results.push((
+ rule_type.clone(),
+ mat.as_str().to_string(),
+ start,
+ end,
+ ));
+ processed_ranges.push((start, end));
+ }
+ }
+ }
+
+ results.sort_by_key(|r| r.2);
+ results
+ }
+ fn parse_lines(&self, lines: Vec<String>, py: Python) -> PyResult<Py<PyAny>> {
+ let list = PyList::empty(py);
+
+ for line in lines {
+ let parsed = self.parse_line(&line);
+ let line_result = PyList::empty(py);
+
+ for (rule_type, content, start, end) in parsed {
+ let dict = PyDict::new(py);
+ dict.set_item("type", rule_type)?;
+ dict.set_item("content", content)?;
+ dict.set_item("start", start)?;
+ dict.set_item("end", end)?;
+ line_result.append(dict)?;
+ }
+
+ list.append(line_result)?;
+ }
+
+ Ok(list.into())
+ }
+
+ fn clear_rules(&mut self) {
+ self.rules.clear();
+ }
+
+ fn rule_count(&self) -> usize {
+ self.rules.len()
+ }
}
#[pyclass]
-pub struct Base {}
+pub struct FastMatcher {
+ patterns: Vec<String>,
+}
+
+#[pymethods]
+impl FastMatcher {
+ #[new]
+ fn new(patterns: Vec<String>) -> Self {
+ FastMatcher { patterns }
+ }
+
+
+ fn contains_any(&self, text: &str) -> bool {
+ self.patterns.iter().any(|p| text.contains(p))
+ }
+
+
+ fn find_matches(&self, text: &str) -> Vec<String> {
+ self.patterns
+ .iter()
+ .filter(|p| text.contains(*p))
+ .cloned()
+ .collect()
+ }
+
+ fn count_matches(&self, text: &str, py: Python) -> PyResult<Py<PyAny>> {
+ let dict = PyDict::new(py);
+ for pattern in &self.patterns {
+ let count = text.matches(pattern.as_str()).count();
+ dict.set_item(pattern, count)?;
+ }
+ Ok(dict.into())
+ }
+}
-/// A Python module implemented in Rust. The name of this function must match
-/// the `lib.name` setting in the `Cargo.toml`, else Python will not be able to
-/// import the module.
#[pymodule]
fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
- m.add_function(wrap_pyfunction!(sum_as_string, m)?)?;
- m.add_class::<Base>()?;
+ // 添加类
+ m.add_class::<Token>()?;
+ m.add_class::<RegexRule>()?;
+ m.add_class::<TextParser>()?;
+ m.add_class::<FastMatcher>()?;
+
Ok(())
} \ No newline at end of file