Skip to content

Commit

Permalink
Merge pull request #1723 from actonlang/revamp-stdin
Browse files Browse the repository at this point in the history
Revamp stdin_install & add env actions
  • Loading branch information
plajjan authored Mar 7, 2024
2 parents efddc00 + 9513a16 commit 7e354d3
Show file tree
Hide file tree
Showing 7 changed files with 211 additions and 7 deletions.
10 changes: 4 additions & 6 deletions base/builtin/env.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,8 @@ extern int return_val;
printf("%s", s->str);
return $R_CONT(c$cont, B_None);
}
void read_stdin(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) {
log_info("read_stdin: %p", stream->data);

void read_stdin(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) {
if (nread < 0){
if (nread == UV_EOF) {
uv_close((uv_handle_t *)stream, NULL);
Expand All @@ -50,11 +49,9 @@ void read_stdin(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) {
cb->$class->__asyn__(cb, to$bytesD_len(buf->base, nread));
}
}

if (buf->base)
acton_free(buf->base);
}
$R B_EnvD_stdin_installG_local (B_Env self, $Cont c$cont, $action cb) {

$R B_EnvD__on_stdin_bytesG_local (B_Env self, $Cont c$cont, $action cb) {
// This should be the only call in env that does IO stuff, so it is safe to
// pin affinity here (and not earlier)..
pin_actor_affinity();
Expand All @@ -64,6 +61,7 @@ void read_stdin(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) {
uv_read_start((uv_stream_t*)tty, alloc_buffer, read_stdin);
return $R_CONT(c$cont, B_None);
}

$R B_EnvD_exitG_local (B_Env self, $Cont c$cont, B_int n) {
return_val = from$int(n);
rts_shutdown();
Expand Down
88 changes: 87 additions & 1 deletion base/src/__builtin__.act
Original file line number Diff line number Diff line change
Expand Up @@ -927,6 +927,38 @@ def type(a: value) -> str:
"""
NotImplemented

actor StringDecoder(cb_out: action(str) -> None, encoding: ?str="utf-8", on_error: ?action(str, bytes) -> None):
"""Bytes to string decoder

Decodes bytes to string using the provided encoding. If no encoding is given
UTF-8 is used. The decoder is stateful in order to buffer incomplete multi-
byte characters.
"""
MAX_UNICODE_CHAR_SIZE = 4
var buf: bytes = b""

if encoding is not None:
if encoding.lower() != "utf-8":
raise ValueError("Only utf-8 encoding is supported")

def decode(input: bytes) -> None:
buf += input
# Attempt to decode all of buf. If it fails we are likely in the middle
# of a multi-byte character so we try again by removing the last bytes
# iteratively until we succeed. UTF-8 has up to 4 bytes per character.
for i in range(1, MAX_UNICODE_CHAR_SIZE+1):
try:
s = buf[:-i].decode()
buf = buf[-i:]
cb_out(s)
return
except ValueError:
pass
if on_error is not None:
on_error("Invalid UTF-8", buf)
else:
raise ValueError("Invalid UTF-8: %s" % str(buf))

## Environment ################################################

class WorldCap():
Expand Down Expand Up @@ -968,10 +1000,64 @@ actor Env (wc: WorldCap, sc: SysCap, args: list[str]):
argv = args
nr_wthreads: int = 0

action def getenv(name: str) -> ?str:
"""Get the value of an environment variable"""
res = getenvb(name.encode())
if res is not None:
return res.decode()
return None

action def getenvb(name: bytes) -> ?bytes:
"""Get the value of an environment variable"""
NotImplemented

action def setenv(n: str, v: str) -> None:
"""Set the value of an environment variable"""
setenvb(n.encode(), v.encode())

action def setenvb(n: bytes, v: bytes) -> None:
"""Set the value of an environment variable"""
NotImplemented

action def unsetenv(n: str) -> None:
"""Unset an environment variable"""
unsetenvb(n.encode())

action def unsetenvb(n: bytes) -> None:
"""Unset an environment variable"""
NotImplemented

action def stdout_write(s: str) -> None:
NotImplemented

action def stdin_install(cb: action(str) -> None) -> None:
action def stdin_install(on_stdin: ?action(str) -> None, encoding: ?str=None, on_error: ?action(str, bytes) -> None, on_stdin_bytes: ?action(bytes) -> None) -> None:
if on_stdin is None and on_stdin_bytes is None:
raise ValueError("At least one of on_stdin or on_stdin_bytes must be set")
elif on_stdin_bytes is not None:
if encoding is not None:
raise ValueError("encoding must not be set when on_stdin_bytes is set, it is only used for decoding stdin bytes to string")
if on_error is not None:
raise ValueError("on_error must not be set when on_stdin_bytes is set, it is only used for decoding error when decoding stdin bytes to string")
_on_stdin_bytes(on_stdin_bytes)
elif on_stdin is not None:
if encoding is None:
# If no encoding is given, attempt to discover the encoding used
# Default to utf-8 if we're unable to discover the encoding
encoding = "utf-8"
# Read encoding from the LANG environment variable
lang_env = getenv("LANG")
if lang_env is not None:
try:
encoding = lang_env.split(".")[1].lower()
except:
pass
# If stdin is attached to a terminal, attempt to discover the
# encoding used by the terminal by inspecting the LANG environment
# variable.
sd = StringDecoder(on_stdin, encoding, on_error)
_on_stdin_bytes(sd.decode)

action def _on_stdin_bytes(cb: action(bytes) -> None) -> None:
NotImplemented

action def exit(n: int):
Expand Down
51 changes: 51 additions & 0 deletions base/src/__builtin__.ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,54 @@ B_str B_BaseExceptionD__name (B_BaseException self) {
B_str B_type(B_value a) {
return to$str(unmangle_name(a->$class->$GCINFO));
}

$R B_EnvD_getenvbG_local (B_Env self, $Cont C_cont, B_bytes name) {
// uv_os_getenv is not threadsafe but our Env actor forces serial execution

// Try to use a small fixed size buffer
size_t len = 256;
char smallval[256];
char *value = smallval;

const char* env_var = fromB_bytes(name);

// First, query the required buffer size by passing NULL as the buffer
int r = uv_os_getenv(env_var, value, &len);
if (r == UV_ENOENT) {
// The environment variable does not exist
return $R_CONT(C_cont, B_None);
} else if (r == UV_ENOBUFS) {
// Allocate the buffer and actually get the environment variable value
value = (char*)acton_malloc(len);
r = uv_os_getenv(env_var, value, &len);
}
if (r < 0) {
char *s;
asprintf(&s, "Failed to read the environment variable %s: %s", env_var, uv_strerror(r));
$RAISE((B_BaseException)B_RuntimeErrorG_new(to$str(s)));
}
return $R_CONT(C_cont, to$bytes(value));
}

$R B_EnvD_setenvbG_local (B_Env self, $Cont C_cont, B_bytes name, B_bytes value) {
const char* env_var = fromB_bytes(name);
const char* env_val = fromB_bytes(value);
int r = uv_os_setenv(env_var, env_val);
if (r < 0) {
char *s;
asprintf(&s, "Failed to set the environment variable %s: %s", env_var, uv_strerror(r));
$RAISE((B_BaseException)B_RuntimeErrorG_new(to$str(s)));
}
return $R_CONT(C_cont, B_None);
}

$R B_EnvD_unsetenvbG_local (B_Env self, $Cont C_cont, B_bytes name) {
const char* env_var = fromB_bytes(name);
int r = uv_os_unsetenv(env_var);
if (r < 0) {
char *s;
asprintf(&s, "Failed to unset the environment variable %s: %s", env_var, uv_strerror(r));
$RAISE((B_BaseException)B_RuntimeErrorG_new(to$str(s)));
}
return $R_CONT(C_cont, B_None);
}
3 changes: 3 additions & 0 deletions docs/acton-by-example/src/SUMMARY.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
- [Explicit types](types/explicit.md)
- [Security](security.md)
- [Capabilities](security/capabilities.md)
- [Environment](environment.md)
- [Environment variables](environment/variables.md)
- [Reading stdin input](environment/stdin.md)
- [Standard library](stdlib.md)
- [Regular Expression](stdlib/re.md)

Expand Down
4 changes: 4 additions & 0 deletions docs/acton-by-example/src/environment.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Environment

The environment of an Acton application is the outside world. Any useful application typically needs to interact with the environment in some way, like reading arguments or taking input from stdin and printing output.

39 changes: 39 additions & 0 deletions docs/acton-by-example/src/environment/stdin.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Reading stdin input

Read input from stdin by installing a handler for stdin data. The returned data is `str`
```python
actor main(env):
def interact(input):
print("Got some input:", input)

env.stdin_install(interact)
```


It is possible to specify the encoding and an on_error() callback which is invoked if there are problem with decoding the data. When encoding is not specified (default `None`), an attempt is made to discover the encoding by reading the `LANG` environment variable. If no encoding is discovered, the default is to use `utf-8`.

```python
actor main(env):
def interact(input):
print("Got some input:", input)

def on_stdin_error(err, data):
print("Some error with decoding the input data:", err)
print("Raw bytes data:", data)

env.stdin_install(on_stdin=interact, encoding="utf-8", on_error=on_stdin_error)
```

You can read the raw data in `bytes` form by installing a bytes handler instead:

```python
actor main(env):
def interact(bytes_input):
# Note how the input might contain parts (some bytes) of a multi-byte
# Unicode character in which case decoding will fail
print("Got some input:", bytes_input.decode())

env.stdin_install(on_stdin_bytes=interact)
```

This allows reading binary data and more explicit control over how to decode the data.
23 changes: 23 additions & 0 deletions docs/acton-by-example/src/environment/variables.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Environment variables

It is possible to read, set and unset environment variables. The standard functions `env.getenv`, `env.setenv` and `env.unsetenv` all assume `str` input and output, which is a convenience based on the assumption that all data is encoded using UTF-8. POSIX systems really use binary encoding for both environment names and variables. To access the environment as bytes and handle decoding explicitly, use `env.getenvb`, `env.setenvb` and `env.unsetenvb`.

Source:
```python
actor main(env):
env_user = env.getenv("USER")
if env_user is not None:
print("User:", env_user)
env.setenv("FOO", "bar")
env.unsetenv("LANG")
foo_env = env.getenv("FOO")
if foo_env is not None:
print("FOO:", foo_env)
env.exit(0)
```

Output:
```sh
User: myuser
FOO: bar
```

0 comments on commit 7e354d3

Please sign in to comment.