-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
556 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,3 @@ | ||
/target | ||
notes.txt | ||
.gitignore | ||
pkg/ | ||
web/node_modules | ||
web/.next | ||
.gitignore |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
# [name] | ||
|
||
A simple bytecode compiler written in Rust with stack-based VM. | ||
|
||
## Overview | ||
|
||
|
||
|
||
|
||
|
||
## Getting Started | ||
|
||
Try the language in the [playground]([name].vercel.app). | ||
|
||
### Example | ||
|
||
```bash | ||
cargo install [name] | ||
echo "let a = 10; print(a);" > example.rs | ||
[name] run example.rs | ||
``` | ||
|
||
Try more examples in the [examples](./examples) directory. | ||
|
||
## Features | ||
|
||
- [x] Variables | ||
- [x] Arithmetic operations | ||
- [x] Logical operations | ||
- [x] Control flow (if, else) | ||
- [x] Loops (while) | ||
- [ ] Functions | ||
- [ ] Closures | ||
- [ ] Autograd | ||
- [ ] Tensor Support | ||
|
||
## Implementation | ||
|
||
The source code goes through the following stages: | ||
|
||
1. [Tokenization](#tokenization) | ||
2. [Parsing](#parsing) | ||
3. [Compilation](#compilation) | ||
4. [Execution](#execution) | ||
|
||
### Tokenization | ||
|
||
[scanner.rs](./src/scanner.rs) | ||
|
||
Here the scanner/lexer take the raw source code and converts in into a list of [predefined tokens](./src/scanner.rs). | ||
|
||
Here I am using rust [logos](https://github.com/maciejhirsz/logos) to create the lexer, which converts the source code into vector of tokens. | ||
|
||
For example, the source code: | ||
|
||
```rust | ||
let a = 10; | ||
print(a); | ||
``` | ||
|
||
Get's converted into: | ||
|
||
```rust | ||
[LET, Identifier, EQUAL, Number(10.0), SEMICOLON, PRINT, LeftParen, Identifier, RightParen, SEMICOLON] | ||
``` | ||
|
||
### Parsing | ||
|
||
[parser.rs](./src/ast.rs) | ||
|
||
The parser takes the list of tokens and converts it into an Abstract Syntax Tree (AST). Here we also handle the precedence of the operators, syntax errors, syntatic sugar like `+=`, `-=`, etc. | ||
|
||
For example, the tokens: | ||
|
||
```rust | ||
[LET, Identifier, EQUAL, Number(10.0), SEMICOLON, PRINT, LeftParen, Identifier, RightParen, SEMICOLON] | ||
``` | ||
|
||
Get's converted into: | ||
|
||
```rust | ||
Let("a", [Number(10.0)]) | ||
Print([Identifier("a")]) | ||
``` | ||
|
||
I am using a combination of recursive descent parser and [pratt parser](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html) to parse the tokens. | ||
|
||
Pratt parser is used to parse the expressions, and recursive descent parser for everything else. | ||
|
||
### Compilation | ||
|
||
[compiler.rs](./src/compiler.rs) | ||
|
||
The compiler takes the AST and converts it into a list of bytecode instructions. | ||
|
||
For example, the AST: | ||
|
||
```rust | ||
Let("a", [Number(10.0)]) | ||
Print([Identifier("a")]) | ||
``` | ||
|
||
Get's converted into: | ||
|
||
``` | ||
0 OP_CONSTANT cons->[1] | tnsr->10 | ||
2 OP_DEFINE_GLOBAL cons->[0] | intr->a | ||
4 OP_GET_GLOBAL cons->[2] | intr->a | ||
6 OP_PRINT | ||
7 OP_RETURN | ||
``` | ||
|
||
[Chunk](./src/chunk.rs) - It's stored the bytecode instructions along with any constants like strings, numbers, etc. | ||
|
||
### Execution | ||
|
||
[vm.rs](./src/vm.rs) | ||
|
||
The VM takes the list of bytecode instructions and executes them. It is simply a loop that reads the instructions and executes them. | ||
|
||
The VM has a stack-based architecture, which means that the instructions are executed by pushing and popping values from the stack. | ||
|
||
For example, the bytecode: | ||
|
||
``` | ||
0 OP_CONSTANT cons->[1] | tnsr->10 | ||
2 OP_DEFINE_GLOBAL cons->[0] | intr->a | ||
4 OP_GET_GLOBAL cons->[2] | intr->a | ||
6 OP_PRINT | ||
7 OP_RETURN | ||
``` | ||
|
||
Get's executed and prints: | ||
|
||
``` | ||
10 | ||
``` | ||
|
||
## References | ||
|
||
- [Crafting Interpreters](https://craftinginterpreters.com/) | ||
- [Pratt Parsing](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html) | ||
- [Logos](https://github.com/maciejhirsz/logos) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
/* tslint:disable */ | ||
/* eslint-disable */ | ||
/** | ||
* @param {string} src | ||
* @returns {string} | ||
*/ | ||
export function run_source(src: string): string; | ||
|
||
export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module; | ||
|
||
export interface InitOutput { | ||
readonly memory: WebAssembly.Memory; | ||
readonly run_source: (a: number, b: number, c: number) => void; | ||
readonly __wbindgen_add_to_stack_pointer: (a: number) => number; | ||
readonly __wbindgen_malloc: (a: number, b: number) => number; | ||
readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number; | ||
readonly __wbindgen_free: (a: number, b: number, c: number) => void; | ||
} | ||
|
||
export type SyncInitInput = BufferSource | WebAssembly.Module; | ||
/** | ||
* Instantiates the given `module`, which can either be bytes or | ||
* a precompiled `WebAssembly.Module`. | ||
* | ||
* @param {SyncInitInput} module | ||
* | ||
* @returns {InitOutput} | ||
*/ | ||
export function initSync(module: SyncInitInput): InitOutput; | ||
|
||
/** | ||
* If `module_or_path` is {RequestInfo} or {URL}, makes a request and | ||
* for everything else, calls `WebAssembly.instantiate` directly. | ||
* | ||
* @param {InitInput | Promise<InitInput>} module_or_path | ||
* | ||
* @returns {Promise<InitOutput>} | ||
*/ | ||
export default function __wbg_init (module_or_path?: InitInput | Promise<InitInput>): Promise<InitOutput>; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
let wasm; | ||
|
||
let WASM_VECTOR_LEN = 0; | ||
|
||
let cachedUint8Memory0 = null; | ||
|
||
function getUint8Memory0() { | ||
if (cachedUint8Memory0 === null || cachedUint8Memory0.byteLength === 0) { | ||
cachedUint8Memory0 = new Uint8Array(wasm.memory.buffer); | ||
} | ||
return cachedUint8Memory0; | ||
} | ||
|
||
const cachedTextEncoder = (typeof TextEncoder !== 'undefined' ? new TextEncoder('utf-8') : { encode: () => { throw Error('TextEncoder not available') } } ); | ||
|
||
const encodeString = (typeof cachedTextEncoder.encodeInto === 'function' | ||
? function (arg, view) { | ||
return cachedTextEncoder.encodeInto(arg, view); | ||
} | ||
: function (arg, view) { | ||
const buf = cachedTextEncoder.encode(arg); | ||
view.set(buf); | ||
return { | ||
read: arg.length, | ||
written: buf.length | ||
}; | ||
}); | ||
|
||
function passStringToWasm0(arg, malloc, realloc) { | ||
|
||
if (realloc === undefined) { | ||
const buf = cachedTextEncoder.encode(arg); | ||
const ptr = malloc(buf.length, 1) >>> 0; | ||
getUint8Memory0().subarray(ptr, ptr + buf.length).set(buf); | ||
WASM_VECTOR_LEN = buf.length; | ||
return ptr; | ||
} | ||
|
||
let len = arg.length; | ||
let ptr = malloc(len, 1) >>> 0; | ||
|
||
const mem = getUint8Memory0(); | ||
|
||
let offset = 0; | ||
|
||
for (; offset < len; offset++) { | ||
const code = arg.charCodeAt(offset); | ||
if (code > 0x7F) break; | ||
mem[ptr + offset] = code; | ||
} | ||
|
||
if (offset !== len) { | ||
if (offset !== 0) { | ||
arg = arg.slice(offset); | ||
} | ||
ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0; | ||
const view = getUint8Memory0().subarray(ptr + offset, ptr + len); | ||
const ret = encodeString(arg, view); | ||
|
||
offset += ret.written; | ||
ptr = realloc(ptr, len, offset, 1) >>> 0; | ||
} | ||
|
||
WASM_VECTOR_LEN = offset; | ||
return ptr; | ||
} | ||
|
||
let cachedInt32Memory0 = null; | ||
|
||
function getInt32Memory0() { | ||
if (cachedInt32Memory0 === null || cachedInt32Memory0.byteLength === 0) { | ||
cachedInt32Memory0 = new Int32Array(wasm.memory.buffer); | ||
} | ||
return cachedInt32Memory0; | ||
} | ||
|
||
const cachedTextDecoder = (typeof TextDecoder !== 'undefined' ? new TextDecoder('utf-8', { ignoreBOM: true, fatal: true }) : { decode: () => { throw Error('TextDecoder not available') } } ); | ||
|
||
if (typeof TextDecoder !== 'undefined') { cachedTextDecoder.decode(); }; | ||
|
||
function getStringFromWasm0(ptr, len) { | ||
ptr = ptr >>> 0; | ||
return cachedTextDecoder.decode(getUint8Memory0().subarray(ptr, ptr + len)); | ||
} | ||
/** | ||
* @param {string} src | ||
* @returns {string} | ||
*/ | ||
export function run_source(src) { | ||
let deferred2_0; | ||
let deferred2_1; | ||
try { | ||
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); | ||
const ptr0 = passStringToWasm0(src, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); | ||
const len0 = WASM_VECTOR_LEN; | ||
wasm.run_source(retptr, ptr0, len0); | ||
var r0 = getInt32Memory0()[retptr / 4 + 0]; | ||
var r1 = getInt32Memory0()[retptr / 4 + 1]; | ||
deferred2_0 = r0; | ||
deferred2_1 = r1; | ||
return getStringFromWasm0(r0, r1); | ||
} finally { | ||
wasm.__wbindgen_add_to_stack_pointer(16); | ||
wasm.__wbindgen_free(deferred2_0, deferred2_1, 1); | ||
} | ||
} | ||
|
||
async function __wbg_load(module, imports) { | ||
if (typeof Response === 'function' && module instanceof Response) { | ||
if (typeof WebAssembly.instantiateStreaming === 'function') { | ||
try { | ||
return await WebAssembly.instantiateStreaming(module, imports); | ||
|
||
} catch (e) { | ||
if (module.headers.get('Content-Type') != 'application/wasm') { | ||
console.warn("`WebAssembly.instantiateStreaming` failed because your server does not serve wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n", e); | ||
|
||
} else { | ||
throw e; | ||
} | ||
} | ||
} | ||
|
||
const bytes = await module.arrayBuffer(); | ||
return await WebAssembly.instantiate(bytes, imports); | ||
|
||
} else { | ||
const instance = await WebAssembly.instantiate(module, imports); | ||
|
||
if (instance instanceof WebAssembly.Instance) { | ||
return { instance, module }; | ||
|
||
} else { | ||
return instance; | ||
} | ||
} | ||
} | ||
|
||
function __wbg_get_imports() { | ||
const imports = {}; | ||
imports.wbg = {}; | ||
|
||
return imports; | ||
} | ||
|
||
function __wbg_init_memory(imports, maybe_memory) { | ||
|
||
} | ||
|
||
function __wbg_finalize_init(instance, module) { | ||
wasm = instance.exports; | ||
__wbg_init.__wbindgen_wasm_module = module; | ||
cachedInt32Memory0 = null; | ||
cachedUint8Memory0 = null; | ||
|
||
|
||
return wasm; | ||
} | ||
|
||
function initSync(module) { | ||
if (wasm !== undefined) return wasm; | ||
|
||
const imports = __wbg_get_imports(); | ||
|
||
__wbg_init_memory(imports); | ||
|
||
if (!(module instanceof WebAssembly.Module)) { | ||
module = new WebAssembly.Module(module); | ||
} | ||
|
||
const instance = new WebAssembly.Instance(module, imports); | ||
|
||
return __wbg_finalize_init(instance, module); | ||
} | ||
|
||
async function __wbg_init(input) { | ||
if (wasm !== undefined) return wasm; | ||
|
||
if (typeof input === 'undefined') { | ||
input = new URL('ai_bg.wasm', import.meta.url); | ||
} | ||
const imports = __wbg_get_imports(); | ||
|
||
if (typeof input === 'string' || (typeof Request === 'function' && input instanceof Request) || (typeof URL === 'function' && input instanceof URL)) { | ||
input = fetch(input); | ||
} | ||
|
||
__wbg_init_memory(imports); | ||
|
||
const { instance, module } = await __wbg_load(await input, imports); | ||
|
||
return __wbg_finalize_init(instance, module); | ||
} | ||
|
||
export { initSync } | ||
export default __wbg_init; |
Oops, something went wrong.