aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/nivis-python/lexer.py
diff options
context:
space:
mode:
Diffstat (limited to 'nivis-python/lexer.py')
-rw-r--r--nivis-python/lexer.py276
1 files changed, 0 insertions, 276 deletions
diff --git a/nivis-python/lexer.py b/nivis-python/lexer.py
deleted file mode 100644
index 7ba94e3..0000000
--- a/nivis-python/lexer.py
+++ /dev/null
@@ -1,276 +0,0 @@
-"""
-Token and Lexer Documentation
-=============================
-
-This module provides the `Token` and `Lexer` classes for tokenizing input strings.
-
-Token Class
------------
-
-The `Token` class represents a token with a type, value, and position in the input string. It is a subclass of the built-in `dict` class.
-
-Attributes:
-- `type` (str): The type of the token.
-- `value` (str or int): The value of the token.
-- `position` (int): The position of the token in the input string.
-
-Methods:
-- `__getattr__(self, name)`: Retrieves the value of an attribute by name. Raises an `AttributeError` if the attribute does not exist.
-
-Lexer Class
------------
-
-The `Lexer` class tokenizes an input string using a set of rules.
-
-Attributes:
-- `input` (str): The input string to tokenize.
-- `position` (int): The current position in the input string.
-- `tokens` (list): The list of tokens generated by the lexer.
-
-Methods:
-- `get_next_token(self)`: Retrieves the next token from the input string.
-- `__iter__(self)`: Returns an iterator over the tokens.
-- `__getitem__(self, index)`: Retrieves a token by index.
-- `__len__(self)`: Returns the number of tokens.
-
-Usage Example
--------------
-
-```python
-lexer = Lexer('''
-@newMessage: {
- ? message == 1: reply: hi
- ! reply: no
-}
-''')
-
-token = lexer.get_next_token()
-while token['type'] != 'EOF':
- print(f'Type: {token["type"]}, Value: {token["value"]}, Position: {token["position"]}')
- token = lexer.get_next_token()
-
-print("\nAll tokens:")
-print([t['type'] for t in lexer])
-"""
-from .exception import ValueError
-
-__all__ = ["Token", "Lexer"]
-
-
-class Token(dict):
- """
- A class representing a token in the lexer.
-
- Args:
- type: The type of the token.
- value: The value of the token.
- position: The position of the token.
-
- Returns:
- None
-
- Example:
- ```python
- token = Token("identifier", "x", (1, 5))
- ```
- """
-
- def __init__(self, type, value, position):
- """
- Initializes a Token object.
-
- Args:
- type: The type of the token.
- value: The value of the token.
- position: The position of the token.
-
- Returns:
- None
- """
- super().__init__(type=type, value=value, position=position)
-
- def __getattr__(self, name):
- """
- Retrieves the value of an attribute from the Token object.
-
- Args:
- name: The name of the attribute.
-
- Returns:
- The value of the attribute.
-
- Raises:
- AttributeError: Raised when the attribute does not exist.
- """
- try:
- return self[name]
- except KeyError as e:
- raise AttributeError(f"'Token' object has no attribute '{name}'") from e
-
-
-class Lexer:
- """
- A class representing a lexer for Psi code.
-
- Args:
- input: The input code to be lexed.
-
- Returns:
- None
-
- Example:
- ```python
- lexer = Lexer("x = 10")
- for token in lexer:
- print(token)
- ```
- """
-
- def __init__(self, input):
- """
- Initializes a Lexer object.
-
- Args:
- input: The input code to be lexed.
-
- Returns:
- None
- """
- self.input = input
- self.position = 0
- self.tokens = []
-
- def get_next_token(self):
- """
- Retrieves the next token from the input code.
-
- Returns:
- The next token.
-
- Raises:
- Exception: Raised when an unknown character is encountered.
- """
- while self.position < len(self.input):
- current_char = self.input[self.position]
-
- if current_char.isspace():
- self.position += 1
- continue
-
- if current_char == "#":
- self.position += 1
- while (
- self.position < len(self.input)
- and self.input[self.position] != "\n"
- ):
- self.position += 1
- continue
-
- if (
- current_char == "/"
- and self.position + 1 < len(self.input)
- and self.input[self.position + 1] == "*"
- ):
- self.position += 2
- while self.position < len(self.input) - 1 and (
- self.input[self.position] != "*"
- or self.input[self.position + 1] != "/"
- ):
- self.position += 1
- if self.position < len(self.input) - 1:
- self.position += 2
- continue
-
- if current_char.isalpha():
- start_position = self.position
- while (
- self.position < len(self.input)
- and self.input[self.position].isalnum()
- ):
- self.position += 1
- token = Token(
- "IDENTIFIER",
- self.input[start_position : self.position],
- start_position,
- )
- self.tokens.append(token)
- return token
-
- if current_char.isdigit():
- start_position = self.position
- while (
- self.position < len(self.input)
- and self.input[self.position].isdigit()
- ):
- self.position += 1
- token = Token(
- "INTEGER",
- int(self.input[start_position : self.position]),
- start_position,
- )
- self.tokens.append(token)
- return token
-
- if current_char in {"<", ">", "=", "!", "&", "|", "@"}:
- if self.position + 1 < len(self.input) and self.input[
- self.position + 1
- ] in {"=", "&", "|"}:
- token = Token(
- "OPERATOR",
- current_char + self.input[self.position + 1],
- self.position,
- )
- self.position += 2
- else:
- token = Token("OPERATOR", current_char, self.position)
- self.position += 1
- self.tokens.append(token)
- return token
-
- if current_char in {"{", "}", "(", ")", "[", "]", ";", ",", ".", ":"}:
- return self._extracted_from_get_next_token_64("SEPARATOR", current_char)
- if current_char in {"?", "!", "|"}:
- return self._extracted_from_get_next_token_64("CONTROL", current_char)
- self.position += 1
- raise ValueError(f"Unknown character: {current_char}")
-
- token = Token("EOF", None, self.position)
- self.tokens.append(token)
- return token
-
- # TODO Rename this here and in `get_next_token`
- def _extracted_from_get_next_token_64(self, arg0, current_char):
- token = Token(arg0, current_char, self.position)
- self.position += 1
- self.tokens.append(token)
- return token
-
- def __iter__(self):
- """
- Returns an iterator over the tokens.
-
- Returns:
- An iterator over the tokens.
- """
- return iter(self.tokens)
-
- def __getitem__(self, index):
- """
- Retrieves the token at the specified index.
-
- Args:
- index: The index of the token.
-
- Returns:
- The token at the specified index.
- """
- return self.tokens[index]
-
- def __len__(self):
- """
- Returns the number of tokens.
-
- Returns:
- The number of tokens.
- """
- return len(self.tokens)