""" Token and Lexer Documentation ============================= This module provides the `Token` and `Lexer` classes for tokenizing input strings. Token Class ----------- The `Token` class represents a token with a type, value, and position in the input string. It is a subclass of the built-in `dict` class. Attributes: - `type` (str): The type of the token. - `value` (str or int): The value of the token. - `position` (int): The position of the token in the input string. Methods: - `__getattr__(self, name)`: Retrieves the value of an attribute by name. Raises an `AttributeError` if the attribute does not exist. Lexer Class ----------- The `Lexer` class tokenizes an input string using a set of rules. Attributes: - `input` (str): The input string to tokenize. - `position` (int): The current position in the input string. - `tokens` (list): The list of tokens generated by the lexer. Methods: - `get_next_token(self)`: Retrieves the next token from the input string. - `__iter__(self)`: Returns an iterator over the tokens. - `__getitem__(self, index)`: Retrieves a token by index. - `__len__(self)`: Returns the number of tokens. Usage Example ------------- ```python lexer = Lexer(''' @newMessage: { ? message == 1: reply: hi ! reply: no } ''') token = lexer.get_next_token() while token['type'] != 'EOF': print(f'Type: {token["type"]}, Value: {token["value"]}, Position: {token["position"]}') token = lexer.get_next_token() print("\nAll tokens:") print([t['type'] for t in lexer]) """ __all__ = ['Token', 'Lexer'] class Token(dict): def __init__(self, type, value, position): super().__init__(type=type, value=value, position=position) def __getattr__(self, name): try: return self[name] except KeyError: raise AttributeError(f"'Token' object has no attribute '{name}'") class Lexer: def __init__(self, input): self.input = input self.position = 0 self.tokens = [] def get_next_token(self): while self.position < len(self.input): current_char = self.input[self.position] if current_char.isspace(): self.position += 1 continue if current_char == '#': self.position += 1 while (self.position < len(self.input) and self.input[self.position] != '\n'): self.position += 1 continue if current_char == '/' and self.position + 1 < len(self.input) and self.input[self.position + 1] == '*': self.position += 2 while (self.position < len(self.input) - 1 and (self.input[self.position] != '*' or self.input[self.position + 1] != '/')): self.position += 1 if self.position < len(self.input) - 1: self.position += 2 continue if current_char.isalpha(): start_position = self.position while (self.position < len(self.input) and self.input[self.position].isalnum()): self.position += 1 token = Token('IDENTIFIER', self.input[start_position:self.position], start_position) self.tokens.append(token) return token if current_char.isdigit(): start_position = self.position while (self.position < len(self.input) and self.input[self.position].isdigit()): self.position += 1 token = Token('INTEGER', int(self.input[start_position:self.position]), start_position) self.tokens.append(token) return token if current_char in {'<', '>', '=', '!', '&', '|', '@'}: if (self.position + 1 < len(self.input) and self.input[self.position + 1] in {'=', '&', '|'}): token = Token('OPERATOR', current_char + self.input[self.position + 1], self.position) self.position += 2 else: token = Token('OPERATOR', current_char, self.position) self.position += 1 self.tokens.append(token) return token if current_char in {'{', '}', '(', ')', '[', ']', ';', ',', '.', ':'}: token = Token('SEPARATOR', current_char, self.position) self.position += 1 self.tokens.append(token) return token if current_char in {'?', '!', '|'}: token = Token('CONTROL', current_char, self.position) self.position += 1 self.tokens.append(token) return token self.position += 1 raise Exception(f'Unknown character: {current_char}') token = Token('EOF', None, self.position) self.tokens.append(token) return token def __iter__(self): return iter(self.tokens) def __getitem__(self, index): return self.tokens[index] def __len__(self): return len(self.tokens)