Skip to content

Commit

Permalink
Add module loading and writing
Browse files Browse the repository at this point in the history
This involves compiling namespaces to .cpp files within the class path
and then loading them up again. Namespaces are written to their own
file, with a special `__init` file also written. The `__init` file needs
to be loaded prior to the namespace file being loaded, since it contains
all of the dependencies. Any functions created within the namespace are
nested modules, using the same `foo.bar$spam` syntax as the JVM. They're
all written to individual files, same as with Clojure, and they're
loaded using the `__init` module.

Having this allows us to load `clojure.core` from pre-compiled .cpp
files, rather than from jank source. The startup time of the compiler,
when loading `clojure.core` this way, drops from 8.7s to 3.7s, which
means it's more than twice as fast now. We can drop this further by
compiling the .cpp files to LLVM IR files, or, even further, to object
files or pre-compiled modules (PCMs). The framework for this is present,
but Cling doesn't actually have an interface to load either of those, so
some more intricate work will be needed. I'm going to stick with the
.cpp files for now and flesh out the rest of the module loading. This
bout of work is not primarily focused on performance gains.
  • Loading branch information
jeaye committed Oct 10, 2023
1 parent dd890ab commit e17744e
Show file tree
Hide file tree
Showing 19 changed files with 681 additions and 138 deletions.
18 changes: 16 additions & 2 deletions include/cpp/jank/codegen/processor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ namespace jank::codegen
native_string unboxed_name;
};

enum class compilation_target
{
ns,
function,
repl
};

/* Codegen processors render a single function expression to a C++ functor. REPL expressions
* are wrapped in a nullary functor. These functors nest arbitrarily, if an expression has more
* fn values of its own, each one rendered with its own codegen processor. */
Expand All @@ -52,12 +59,16 @@ namespace jank::codegen
processor
(
runtime::context &rt_ctx,
analyze::expression_ptr const &expr
analyze::expression_ptr const &expr,
native_string_view const &module,
compilation_target target
);
processor
(
runtime::context &rt_ctx,
analyze::expr::function<analyze::expression> const &expr
analyze::expr::function<analyze::expression> const &expr,
native_string_view const &module,
compilation_target target
);
processor(processor const &) = delete;
processor(processor &&) noexcept = default;
Expand Down Expand Up @@ -159,6 +170,7 @@ namespace jank::codegen
void build_footer();
native_string expression_str(bool box_needed, bool const auto_call);

native_string module_init_str(native_string_view const &module);

void format_elided_var
(
Expand Down Expand Up @@ -191,7 +203,9 @@ namespace jank::codegen
/* This is stored just to keep the expression alive. */
analyze::expression_ptr root_expr{};
analyze::expr::function<analyze::expression> const &root_fn;
native_string module;

compilation_target target{};
runtime::obj::symbol struct_name;
fmt::memory_buffer header_buffer;
fmt::memory_buffer body_buffer;
Expand Down
5 changes: 5 additions & 0 deletions include/cpp/jank/detail/to_runtime_data.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once

#include <boost/filesystem/path.hpp>

#include <jank/runtime/seq.hpp>

namespace jank::detail
Expand All @@ -26,6 +28,9 @@ namespace jank::detail
inline runtime::object_ptr to_runtime_data(runtime::obj::symbol const &d)
{ return make_box<runtime::obj::symbol>(d); }

inline runtime::object_ptr to_runtime_data(boost::filesystem::path const &p)
{ return make_box(p.string()); }

template <typename K, typename V>
runtime::object_ptr to_runtime_data(native_unordered_map<K, V> const &m)
{
Expand Down
5 changes: 5 additions & 0 deletions include/cpp/jank/evaluate.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
namespace jank::evaluate
{
analyze::expr::function<analyze::expression> wrap_expression(analyze::expression_ptr const expr);
analyze::expr::function<analyze::expression> wrap_expressions
(
native_vector<analyze::expression_ptr> const &exprs,
analyze::processor const &an_prc
);

runtime::object_ptr eval
(
Expand Down
9 changes: 5 additions & 4 deletions include/cpp/jank/jit/processor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,18 @@
#include <cling/Interpreter/Interpreter.h>

#include <jank/result.hpp>
#include <jank/runtime/context.hpp>
#include <jank/codegen/processor.hpp>

namespace jank::runtime
{ struct context; }

namespace jank::jit
{
struct processor
{
processor();
processor(runtime::context &rt_ctx);

result<option<runtime::object_ptr>, native_string> eval
(runtime::context &rt_ctx, codegen::processor &cg_prc) const;
result<option<runtime::object_ptr>, native_string> eval(codegen::processor &cg_prc) const;
void eval_string(native_string const &s) const;

std::unique_ptr<cling::Interpreter> interpreter;
Expand Down
2 changes: 2 additions & 0 deletions include/cpp/jank/result.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ namespace jank
std::cout << "error: expected ok result, but found: " << err << std::endl;
throw err;
}
void expect_ok() const
{ assert_ok(); }

E const& expect_err() const
{ return boost::get<E>(data); }
Expand Down
31 changes: 25 additions & 6 deletions include/cpp/jank/runtime/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@

#include <jank/result.hpp>
#include <jank/analyze/processor.hpp>
#include <jank/runtime/module/loader.hpp>
#include <jank/runtime/ns.hpp>
#include <jank/runtime/var.hpp>
#include <jank/runtime/obj/keyword.hpp>
#include <jank/jit/processor.hpp>

namespace jank::jit
{ struct processor; }
Expand All @@ -18,7 +20,7 @@ namespace jank::runtime
{
struct context
{
context();
context(option<native_string_view> const &class_path = none);
context(context const&);
context(context &&) = delete;

Expand All @@ -41,10 +43,19 @@ namespace jank::runtime
static object_ptr print(object_ptr o, object_ptr more);
static object_ptr println(object_ptr more);

void eval_prelude(jit::processor const &);
object_ptr eval_file(native_string_view const &path, jit::processor const &);
object_ptr eval_string(native_string_view const &code, jit::processor const &);
native_vector<analyze::expression_ptr> analyze_string(native_string_view const &code, jit::processor const &jit_prc, native_bool const eval = true);
void eval_prelude();
object_ptr eval_file(native_string_view const &path);
object_ptr eval_string(native_string_view const &code);
native_vector<analyze::expression_ptr> analyze_string(native_string_view const &code, native_bool const eval = true);

/* Finds the specified module on the class path and loads it. If
* the module is already loaded, nothing is done. */
result<void, native_string> load_module(native_string_view const &module);

/* Does all the same work as load_module, but also writes compiled files to the file system. */
result<void, native_string> compile_module(native_string_view const &module);

void write_module(native_string_view const &module, native_string_view const &contents) const;

/* Generates a unique name for use with anything from codgen structs,
* lifted vars, to shadowed locals. */
Expand Down Expand Up @@ -74,6 +85,14 @@ namespace jank::runtime
/* The analyze processor is reused across evaluations so we can keep the semantic information
* of previous code. This is essential for REPL use. */
/* TODO: This needs to be synchronized. */
jank::analyze::processor an_prc{ *this };
analyze::processor an_prc{ *this };
jit::processor jit_prc;
/* TODO: This needs to be a dynamic var. */
bool compiling{};
/* TODO: This needs to be a dynamic var. */
native_string_view current_module;
native_unordered_map<native_string, native_vector<native_string>> module_dependencies;
native_string output_dir{ "classes" };
module::loader module_loader;
};
}
33 changes: 28 additions & 5 deletions include/cpp/jank/runtime/module/loader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

#include <boost/filesystem/path.hpp>

namespace jank::runtime
{ struct context; }

namespace jank::jit
{ struct processor; }

namespace jank::runtime::module
{
struct file_entry
Expand All @@ -12,9 +18,16 @@ namespace jank::runtime::module
option<native_string> archive_path;
/* If there's an archive path, this path is within the archive. Otherwise, it's the
* filesystem path. */
boost::filesystem::path path;
native_string path;
};

native_string path_to_module(boost::filesystem::path const &path);
boost::filesystem::path module_to_path(native_string_view const &module);
native_string module_to_native_ns(native_string_view const &module);
native_string nest_module(native_string const &module, native_string const &sub);
native_string nest_native_ns(native_string const &native_ns, native_string const &end);
native_bool is_nested_module(native_string const &module);

struct loader
{
/* A module entry represents one or more files on the classpath which prove that module.
Expand All @@ -29,10 +42,10 @@ namespace jank::runtime::module
* subsequent matches are ignored. */
struct entry
{
option<file_entry> pcm;
option<file_entry> cpp;
option<file_entry> jank;
option<file_entry> cljc;
option<file_entry> cpp;
option<file_entry> pcm;
};

/* These separators match what the JVM does on each system. */
Expand All @@ -42,13 +55,23 @@ namespace jank::runtime::module
static constexpr char module_separator{ ':' };
#endif

loader() = default;
loader(native_string_view const &paths);
loader(context &rt_ctx, native_string_view const &paths);

native_bool is_loaded(native_string_view const &) const;
result<void, native_string> load_ns(native_string_view const &module);
result<void, native_string> load(native_string_view const &module);
result<void, native_string> load_pcm(file_entry const &entry);
result<void, native_string> load_cpp(file_entry const &entry);
result<void, native_string> load_jank(file_entry const &entry);
result<void, native_string> load_cljc(file_entry const &entry);

object_ptr to_runtime_data() const;

context &rt_ctx;
native_string paths;
/* This maps module strings to entries. Module strings are like fully qualified Java
* class names. */
native_unordered_map<native_string, entry> entries;
native_set<native_string> loaded;
};
}
2 changes: 0 additions & 2 deletions include/cpp/jank/runtime/obj/jit_function.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ namespace jank::runtime
static_object() = default;
static_object(static_object &&) = default;
static_object(static_object const &) = default;
static_object(object &&base);
static_object(object_ptr const fn, object_ptr const start);

/* behavior::objectable */
Expand All @@ -31,7 +30,6 @@ namespace jank::runtime
/* behavior::metadatable */
object_ptr with_meta(object_ptr m);

/* TODO: Doesn't have an offset of 0. */
object base{ object_type::jit_function };
behavior::callable_ptr data{};
option<object_ptr> meta;
Expand Down
3 changes: 3 additions & 0 deletions include/cpp/jank/type.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include <map>
#include <set>
#include <string_view>

#include <folly/FBVector.h>
Expand All @@ -27,6 +28,8 @@ namespace jank
using native_vector = folly::fbvector<T, native_allocator<T>>;
template <typename K, typename V>
using native_map = std::map<K, V, native_allocator<std::pair<K const, V>>>;
template <typename T>
using native_set = std::set<T, std::less<T>, native_allocator<T>>;

/* TODO: Try out unordered_flat_map once vcpkg has boost 1.81.0. */
template
Expand Down
41 changes: 34 additions & 7 deletions src/cpp/jank/analyze/processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ namespace jank::analyze
}
else
{ name = runtime::context::unique_string("fn"); }
name = runtime::munge(name);

native_vector<expr::function_arity<expression>> arities;

Expand Down Expand Up @@ -421,15 +422,41 @@ namespace jank::analyze
}
}

return make_box<expression>
auto ret
(
expr::function<expression>
{
expression_base{ {}, expr_type, current_frame },
name,
std::move(arities)
}
make_box<expression>
(
expr::function<expression>
{
expression_base{ {}, expr_type, current_frame },
name,
std::move(arities)
}
)
);

if(rt_ctx.compiling)
{
/* Register this module as a dependency of the current module so we can generate
* code to load it. */
auto const &ns_sym(make_box<runtime::obj::symbol>("clojure.core/*ns*"));
auto const &ns_var(rt_ctx.find_var(ns_sym).unwrap());
auto const module
(
runtime::module::nest_module
(
runtime::detail::to_string(ns_var->get_root()),
runtime::munge(name)
)
);
rt_ctx.module_dependencies[rt_ctx.current_module].emplace_back(module);
fmt::println("module dep {} -> {}", rt_ctx.current_module, module);

codegen::processor cg_prc{ rt_ctx, ret, module, codegen::compilation_target::function };
rt_ctx.write_module(module, cg_prc.declaration_str());
}

return ret;
}

processor::expression_result processor::analyze_recur
Expand Down
Loading

0 comments on commit e17744e

Please sign in to comment.