CrossGL · Shamantak12 · Sep 21, 2024 · Sep 21, 2024 · Sep 21, 2024 · Sep 21, 2024
@@ -133,4 +133,4 @@ def tokenize(self):
                     f"Illegal character '{unmatched_char}' at position {pos}\n{highlighted_code}"
                 )
 
-        self.tokens.append(("EOF", None))  # End of file token
+        self.tokens.append(("EOF", None))  
@@ -9,6 +9,66 @@ def tokenize_code(code: str) -> List:
     return lexer.tokens
 
 
+class Lexer:
+    def __init__(self, input_code):
+        self.input_code = input_code
+        self.tokens = []
+        self.tokenize()
+
+    def tokenize(self):
+        pos = 0
+        while pos < len(self.input_code):
+            match = None
+            for token_spec in token_specification:
+                pattern, tag = token_spec
+                regex = re.compile(pattern)
+                match = regex.match(self.input_code, pos)
+                if match:
+                    token = (tag, match.group(0))
+                    self.tokens.append(token)
+                    pos = match.end(0)
+                    break
+            if not match:
+                unmatched_char = self.input_code[pos]
+                highlighted_code = (
+                    self.input_code[:pos]
+                    + "["
+                    + self.input_code[pos]
+                    + "]"
+                    + self.input_code[pos + 1 :]
+                )
+                raise SyntaxError(
+                    f"Illegal character '{unmatched_char}' at position {pos}\n{highlighted_code}"
+                )
+        self.tokens.append(("EOF", None))
+
+
+# Example token definitions (including the provided excerpt)
+token_specification = [
+    ("WHITESPACE", r"\s+"),
+    ("IF", r"\bif\b"),
+    ("ELSE", r"\belse\b"),
+    ("FOR", r"\bfor\b"),
+    ("RETURN", r"\breturn\b"),
+    ("BITWISE_SHIFT_LEFT", r"<<"),
+    ("BITWISE_SHIFT_RIGHT", r">>"),
+    ("LESS_EQUAL", r"<="),
+    ("GREATER_EQUAL", r">="),
+    ("GREATER_THAN", r">"),
+    ("LESS_THAN", r"<"),
+    ("INCREMENT", r"\+\+"),
+    ("DECREMENT", r"--"),
+    ("EQUAL", r"=="),
+    ("NOT_EQUAL", r"!="),
+    ("ASSIGN_AND", r"&="),
+    ("ASSIGN_OR", r"\|="),
+    ("ASSIGN_XOR", r"\^="),
+    ("LOGICAL_AND", r"&&"),
+    ("LOGICAL_OR", r"\|\|"),
+    # Add other token definitions here
+]
+
+
 def test_input_output_tokenization():
     code = """
     input vec3 position;