refactor!: built in rust

author: 简律纯 <i@jyunko.cn> 2024-02-28 11:37:54 +0800
committer: 简律纯 <i@jyunko.cn> 2024-02-28 11:37:54 +0800
commit: 672e0d03d3ffdba740d166f2c4a7243f142cec5f (patch)
tree: 46cdb4ea015877e7828710a47c945aa2e834beb0 /nivis-python/lexer.py
parent: fc6fc64ca3ff7df62dd270fc021f674e56b2dedf (diff)
download: TRPGNivis-672e0d03d3ffdba740d166f2c4a7243f142cec5f.tar.gz
TRPGNivis-672e0d03d3ffdba740d166f2c4a7243f142cec5f.zip
1 files changed, 276 insertions, 0 deletions
diff --git a/nivis-python/lexer.py b/nivis-python/lexer.py
new file mode 100644
index 0000000..7ba94e3
--- /dev/null
+++ b/nivis-python/lexer.py
@@ -0,0 +1,276 @@
+"""
+Token and Lexer Documentation
+=============================
+
+This module provides the `Token` and `Lexer` classes for tokenizing input strings.
+
+Token Class
+-----------
+
+The `Token` class represents a token with a type, value, and position in the input string. It is a subclass of the built-in `dict` class.
+
+Attributes:
+- `type` (str): The type of the token.
+- `value` (str or int): The value of the token.
+- `position` (int): The position of the token in the input string.
+
+Methods:
+- `__getattr__(self, name)`: Retrieves the value of an attribute by name. Raises an `AttributeError` if the attribute does not exist.
+
+Lexer Class
+-----------
+
+The `Lexer` class tokenizes an input string using a set of rules.
+
+Attributes:
+- `input` (str): The input string to tokenize.
+- `position` (int): The current position in the input string.
+- `tokens` (list): The list of tokens generated by the lexer.
+
+Methods:
+- `get_next_token(self)`: Retrieves the next token from the input string.
+- `__iter__(self)`: Returns an iterator over the tokens.
+- `__getitem__(self, index)`: Retrieves a token by index.
+- `__len__(self)`: Returns the number of tokens.
+
+Usage Example
+-------------
+
+```python
+lexer = Lexer('''
+@newMessage: {
+    ? message == 1: reply: hi
+    ! reply: no
+}
+''')
+
+token = lexer.get_next_token()
+while token['type'] != 'EOF':
+    print(f'Type: {token["type"]}, Value: {token["value"]}, Position: {token["position"]}')
+    token = lexer.get_next_token()
+
+print("\nAll tokens:")
+print([t['type'] for t in lexer])
+"""
+from .exception import ValueError
+
+__all__ = ["Token", "Lexer"]
+
+
+class Token(dict):
+    """
+    A class representing a token in the lexer.
+
+    Args:
+        type: The type of the token.
+        value: The value of the token.
+        position: The position of the token.
+
+    Returns:
+        None
+
+    Example:
+        ```python
+        token = Token("identifier", "x", (1, 5))
+        ```
+    """
+
+    def __init__(self, type, value, position):
+        """
+        Initializes a Token object.
+
+        Args:
+            type: The type of the token.
+            value: The value of the token.
+            position: The position of the token.
+
+        Returns:
+            None
+        """
+        super().__init__(type=type, value=value, position=position)
+
+    def __getattr__(self, name):
+        """
+        Retrieves the value of an attribute from the Token object.
+
+        Args:
+            name: The name of the attribute.
+
+        Returns:
+            The value of the attribute.
+
+        Raises:
+            AttributeError: Raised when the attribute does not exist.
+        """
+        try:
+            return self[name]
+        except KeyError as e:
+            raise AttributeError(f"'Token' object has no attribute '{name}'") from e
+
+
+class Lexer:
+    """
+    A class representing a lexer for Psi code.
+
+    Args:
+        input: The input code to be lexed.
+
+    Returns:
+        None
+
+    Example:
+        ```python
+        lexer = Lexer("x = 10")
+        for token in lexer:
+            print(token)
+        ```
+    """
+
+    def __init__(self, input):
+        """
+        Initializes a Lexer object.
+
+        Args:
+            input: The input code to be lexed.
+
+        Returns:
+            None
+        """
+        self.input = input
+        self.position = 0
+        self.tokens = []
+
+    def get_next_token(self):
+        """
+        Retrieves the next token from the input code.
+
+        Returns:
+            The next token.
+
+        Raises:
+            Exception: Raised when an unknown character is encountered.
+        """
+        while self.position < len(self.input):
+            current_char = self.input[self.position]
+
+            if current_char.isspace():
+                self.position += 1
+                continue
+
+            if current_char == "#":
+                self.position += 1
+                while (
+                    self.position < len(self.input)
+                    and self.input[self.position] != "\n"
+                ):
+                    self.position += 1
+                continue
+
+            if (
+                current_char == "/"
+                and self.position + 1 < len(self.input)
+                and self.input[self.position + 1] == "*"
+            ):
+                self.position += 2
+                while self.position < len(self.input) - 1 and (
+                    self.input[self.position] != "*"
+                    or self.input[self.position + 1] != "/"
+                ):
+                    self.position += 1
+                if self.position < len(self.input) - 1:
+                    self.position += 2
+                continue
+
+            if current_char.isalpha():
+                start_position = self.position
+                while (
+                    self.position < len(self.input)
+                    and self.input[self.position].isalnum()
+                ):
+                    self.position += 1
+                token = Token(
+                    "IDENTIFIER",
+                    self.input[start_position : self.position],
+                    start_position,
+                )
+                self.tokens.append(token)
+                return token
+
+            if current_char.isdigit():
+                start_position = self.position
+                while (
+                    self.position < len(self.input)
+                    and self.input[self.position].isdigit()
+                ):
+                    self.position += 1
+                token = Token(
+                    "INTEGER",
+                    int(self.input[start_position : self.position]),
+                    start_position,
+                )
+                self.tokens.append(token)
+                return token
+
+            if current_char in {"<", ">", "=", "!", "&", "|", "@"}:
+                if self.position + 1 < len(self.input) and self.input[
+                    self.position + 1
+                ] in {"=", "&", "|"}:
+                    token = Token(
+                        "OPERATOR",
+                        current_char + self.input[self.position + 1],
+                        self.position,
+                    )
+                    self.position += 2
+                else:
+                    token = Token("OPERATOR", current_char, self.position)
+                    self.position += 1
+                self.tokens.append(token)
+                return token
+
+            if current_char in {"{", "}", "(", ")", "[", "]", ";", ",", ".", ":"}:
+                return self._extracted_from_get_next_token_64("SEPARATOR", current_char)
+            if current_char in {"?", "!", "|"}:
+                return self._extracted_from_get_next_token_64("CONTROL", current_char)
+            self.position += 1
+            raise ValueError(f"Unknown character: {current_char}")
+
+        token = Token("EOF", None, self.position)
+        self.tokens.append(token)
+        return token
+
+    # TODO Rename this here and in `get_next_token`
+    def _extracted_from_get_next_token_64(self, arg0, current_char):
+        token = Token(arg0, current_char, self.position)
+        self.position += 1
+        self.tokens.append(token)
+        return token
+
+    def __iter__(self):
+        """
+        Returns an iterator over the tokens.
+
+        Returns:
+            An iterator over the tokens.
+        """
+        return iter(self.tokens)
+
+    def __getitem__(self, index):
+        """
+        Retrieves the token at the specified index.
+
+        Args:
+            index: The index of the token.
+
+        Returns:
+            The token at the specified index.
+        """
+        return self.tokens[index]
+
+    def __len__(self):
+        """
+        Returns the number of tokens.
+
+        Returns:
+            The number of tokens.
+        """
+        return len(self.tokens)
author	简律纯 <i@jyunko.cn>	2024-02-28 11:37:54 +0800
committer	简律纯 <i@jyunko.cn>	2024-02-28 11:37:54 +0800
commit	672e0d03d3ffdba740d166f2c4a7243f142cec5f (patch)
tree	46cdb4ea015877e7828710a47c945aa2e834beb0 /nivis-python/lexer.py
parent	fc6fc64ca3ff7df62dd270fc021f674e56b2dedf (diff)
download	TRPGNivis-672e0d03d3ffdba740d166f2c4a7243f142cec5f.tar.gz TRPGNivis-672e0d03d3ffdba740d166f2c4a7243f142cec5f.zip