aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/psi/lexer.py
diff options
context:
space:
mode:
author简律纯 <i@jyunko.cn>2023-09-27 17:31:16 +0800
committer简律纯 <i@jyunko.cn>2023-09-27 17:31:16 +0800
commitba4129933cdb6d91e695b2de900b8753652ec385 (patch)
treec520d508bf50cd22ea3123840f4aff77f148256b /psi/lexer.py
parent3ad303968524f6dc57b7d5900e33963c77342552 (diff)
downloadTRPGNivis-ba4129933cdb6d91e695b2de900b8753652ec385.tar.gz
TRPGNivis-ba4129933cdb6d91e695b2de900b8753652ec385.zip
feat(pyproject): 优化python包管理结构
Diffstat (limited to 'psi/lexer.py')
-rw-r--r--psi/lexer.py153
1 files changed, 0 insertions, 153 deletions
diff --git a/psi/lexer.py b/psi/lexer.py
deleted file mode 100644
index 2fce0eb..0000000
--- a/psi/lexer.py
+++ /dev/null
@@ -1,153 +0,0 @@
-"""
-Token and Lexer Documentation
-=============================
-
-This module provides the `Token` and `Lexer` classes for tokenizing input strings.
-
-Token Class
------------
-
-The `Token` class represents a token with a type, value, and position in the input string. It is a subclass of the built-in `dict` class.
-
-Attributes:
-- `type` (str): The type of the token.
-- `value` (str or int): The value of the token.
-- `position` (int): The position of the token in the input string.
-
-Methods:
-- `__getattr__(self, name)`: Retrieves the value of an attribute by name. Raises an `AttributeError` if the attribute does not exist.
-
-Lexer Class
------------
-
-The `Lexer` class tokenizes an input string using a set of rules.
-
-Attributes:
-- `input` (str): The input string to tokenize.
-- `position` (int): The current position in the input string.
-- `tokens` (list): The list of tokens generated by the lexer.
-
-Methods:
-- `get_next_token(self)`: Retrieves the next token from the input string.
-- `__iter__(self)`: Returns an iterator over the tokens.
-- `__getitem__(self, index)`: Retrieves a token by index.
-- `__len__(self)`: Returns the number of tokens.
-
-Usage Example
--------------
-
-```python
-lexer = Lexer('''
-@newMessage: {
- ? message == 1: reply: hi
- ! reply: no
-}
-''')
-
-token = lexer.get_next_token()
-while token['type'] != 'EOF':
- print(f'Type: {token["type"]}, Value: {token["value"]}, Position: {token["position"]}')
- token = lexer.get_next_token()
-
-print("\nAll tokens:")
-print([t['type'] for t in lexer])
-"""
-
-__all__ = ['Token', 'Lexer']
-
-class Token(dict):
- def __init__(self, type, value, position):
- super().__init__(type=type, value=value, position=position)
-
- def __getattr__(self, name):
- try:
- return self[name]
- except KeyError:
- raise AttributeError(f"'Token' object has no attribute '{name}'")
-
-class Lexer:
- def __init__(self, input):
- self.input = input
- self.position = 0
- self.tokens = []
-
- def get_next_token(self):
- while self.position < len(self.input):
- current_char = self.input[self.position]
-
- if current_char.isspace():
- self.position += 1
- continue
-
- if current_char == '#':
- self.position += 1
- while (self.position < len(self.input) and
- self.input[self.position] != '\n'):
- self.position += 1
- continue
-
- if current_char == '/' and self.position + 1 < len(self.input) and self.input[self.position + 1] == '*':
- self.position += 2
- while (self.position < len(self.input) - 1 and
- (self.input[self.position] != '*' or self.input[self.position + 1] != '/')):
- self.position += 1
- if self.position < len(self.input) - 1:
- self.position += 2
- continue
-
- if current_char.isalpha():
- start_position = self.position
- while (self.position < len(self.input) and
- self.input[self.position].isalnum()):
- self.position += 1
- token = Token('IDENTIFIER', self.input[start_position:self.position], start_position)
- self.tokens.append(token)
- return token
-
- if current_char.isdigit():
- start_position = self.position
- while (self.position < len(self.input) and
- self.input[self.position].isdigit()):
- self.position += 1
- token = Token('INTEGER', int(self.input[start_position:self.position]), start_position)
- self.tokens.append(token)
- return token
-
- if current_char in {'<', '>', '=', '!', '&', '|', '@'}:
- if (self.position + 1 < len(self.input) and
- self.input[self.position + 1] in {'=', '&', '|'}):
- token = Token('OPERATOR', current_char + self.input[self.position + 1], self.position)
- self.position += 2
- else:
- token = Token('OPERATOR', current_char, self.position)
- self.position += 1
- self.tokens.append(token)
- return token
-
- if current_char in {'{', '}', '(', ')', '[', ']', ';', ',', '.', ':'}:
- token = Token('SEPARATOR', current_char, self.position)
- self.position += 1
- self.tokens.append(token)
- return token
-
- if current_char in {'?', '!', '|'}:
- token = Token('CONTROL', current_char, self.position)
- self.position += 1
- self.tokens.append(token)
- return token
-
- self.position += 1
- raise Exception(f'Unknown character: {current_char}')
-
- token = Token('EOF', None, self.position)
- self.tokens.append(token)
- return token
-
- def __iter__(self):
- return iter(self.tokens)
-
- def __getitem__(self, index):
- return self.tokens[index]
-
- def __len__(self):
- return len(self.tokens) \ No newline at end of file