From b4de69839e8669126143cda004e08154134fdee3 Mon Sep 17 00:00:00 2001 From: Luis Michaelis Date: Thu, 5 Oct 2023 21:00:37 +0200 Subject: [PATCH] feat(Daedalus): initial AST parser and initial type checker --- .clang-tidy | 2 +- CMakeLists.txt | 7 + include/zenkit/daedalus/Compiler.hh | 68 +++ include/zenkit/daedalus/Module.hh | 257 ++++++++ include/zenkit/daedalus/SyntaxTree.hh | 326 +++++++++++ src/daedalus/Compiler.cc | 215 +++++++ src/daedalus/Module.cc | 327 +++++++++++ src/daedalus/SyntaxTree.cc | 809 ++++++++++++++++++++++++++ src/daedalus/Tokenizer.cc | 353 +++++++++++ src/daedalus/Tokenizer.hh | 108 ++++ src/daedalus/TypeCheck.cc | 620 ++++++++++++++++++++ src/daedalus/TypeCheck.hh | 8 + src/daedalus/TypeStore.cc | 294 ++++++++++ src/daedalus/TypeStore.hh | 40 ++ tests/TestDaedalusCompiler.cc | 39 ++ 15 files changed, 3472 insertions(+), 1 deletion(-) create mode 100644 include/zenkit/daedalus/Compiler.hh create mode 100644 include/zenkit/daedalus/Module.hh create mode 100644 include/zenkit/daedalus/SyntaxTree.hh create mode 100644 src/daedalus/Compiler.cc create mode 100644 src/daedalus/Module.cc create mode 100644 src/daedalus/SyntaxTree.cc create mode 100644 src/daedalus/Tokenizer.cc create mode 100644 src/daedalus/Tokenizer.hh create mode 100644 src/daedalus/TypeCheck.cc create mode 100644 src/daedalus/TypeCheck.hh create mode 100644 src/daedalus/TypeStore.cc create mode 100644 src/daedalus/TypeStore.hh create mode 100644 tests/TestDaedalusCompiler.cc diff --git a/.clang-tidy b/.clang-tidy index 9ce9b01e..13679b52 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,4 +1,4 @@ Checks: '-*,bugprone-*,cppcoreguidelines-*,concurrency-*,clang-analyzer-*,hicpp-*,misc-*,modernize-*,performance-*,' 'portability-*,readability-*,-modernize-use-trailing-return-type,-cppcoreguidelines-pro-bounds-pointer-arithmetic,' - '-cppcoreguidelines-prefer-member-initializer' + '-cppcoreguidelines-prefer-member-initializer,-*-no-recursion' WarningsAsErrors: '*' diff --git a/CMakeLists.txt b/CMakeLists.txt index b85bfea5..db1b219a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,6 +43,12 @@ list(APPEND _ZK_SOURCES src/Archive.cc src/Boxes.cc src/CutsceneLibrary.cc + src/daedalus/Compiler.cc + src/daedalus/Module.cc + src/daedalus/SyntaxTree.cc + src/daedalus/Tokenizer.cc + src/daedalus/TypeCheck.cc + src/daedalus/TypeStore.cc src/DaedalusScript.cc src/Date.cc src/DaedalusVm.cc @@ -72,6 +78,7 @@ list(APPEND _ZK_TESTS tests/TestArchive.cc tests/TestCutsceneLibrary.cc tests/TestDaedalusScript.cc + tests/TestDaedalusCompiler.cc tests/TestFont.cc tests/TestMaterial.cc tests/TestModel.cc diff --git a/include/zenkit/daedalus/Compiler.hh b/include/zenkit/daedalus/Compiler.hh new file mode 100644 index 00000000..fbb2b12b --- /dev/null +++ b/include/zenkit/daedalus/Compiler.hh @@ -0,0 +1,68 @@ +// Copyright © 2023 GothicKit Contributors. +// SPDX-License-Identifier: MIT +#pragma once +#include "zenkit/Error.hh" +#include "zenkit/Library.hh" +#include "zenkit/Stream.hh" + +#include +#include +#include +#include + +namespace zenkit::daedalus { + struct CompilationSource; + + struct SourceLocation { + std::shared_ptr source; + std::size_t line {0}, column {0}; + + [[nodiscard]] std::string format() const; + }; + + class CompilerError : public Error { + public: + CompilerError(std::string type, SourceLocation loc, std::string message); + + [[nodiscard]] std::string format() const; + + public: + SourceLocation location; + std::string type; + }; + + class SyntaxError : public CompilerError { + public: + SyntaxError(SourceLocation loc, std::string message); + }; + + class TypeError : public CompilerError { + public: + TypeError(SourceLocation loc, std::string message); + }; + + struct CompilationSource { + explicit CompilationSource(std::filesystem::path const& path); + explicit CompilationSource(std::vector source); + explicit CompilationSource(std::unique_ptr source); + + [[nodiscard]] std::string describe() const; + + std::filesystem::path const path; + std::unique_ptr read; + }; + + bool operator==(CompilationSource const& a, CompilationSource const& b); + + class Compiler { + public: + ZKAPI void add(std::filesystem::path const& source); + ZKAPI void add_raw(std::string_view source); + ZKAPI void add_src(std::filesystem::path const& source); + + ZKAPI void compile(); + + private: + std::vector> _m_sources; + }; +} // namespace zenkit::daedalus diff --git a/include/zenkit/daedalus/Module.hh b/include/zenkit/daedalus/Module.hh new file mode 100644 index 00000000..29f5be99 --- /dev/null +++ b/include/zenkit/daedalus/Module.hh @@ -0,0 +1,257 @@ +// Copyright © 2023 GothicKit Contributors. +// SPDX-License-Identifier: MIT +#pragma once +#include "zenkit/daedalus/SyntaxTree.hh" + +#include +#include +#include +#include +#include +#include + +namespace zenkit::daedalus { + enum class SymbolKind { + VAR, + CONST, + CLASS, + PROTOTYPE, + INSTANCE, + FUNC, + EXTERN, + }; + + enum class PrimitiveType { + VOID = 0, + INT, + INT_ARRAY, + FLOAT, + FLOAT_ARRAY, + STRING, + STRING_ARRAY, + FUNC, + FUNC_ARRAY, + }; + + class ClassSymbol; + + class Type { + public: + explicit Type(PrimitiveType t); + explicit Type(ClassSymbol* t); + + [[nodiscard]] bool is_convertible(Type other) const noexcept; + + [[nodiscard]] bool is_primitive() const noexcept; + [[nodiscard]] bool is_void() const noexcept; + [[nodiscard]] bool is_array() const noexcept; + [[nodiscard]] Type to_array() const noexcept; + [[nodiscard]] Type to_elemental() const noexcept; + + [[nodiscard]] PrimitiveType get_primitive_type() const; + [[nodiscard]] ClassSymbol* get_complex_type() const; + + [[nodiscard]] std::string const& describe() const; + + static Type VOID; + static Type INT; + static Type FLOAT; + static Type STRING; + static Type FUNC; + + private: + friend bool operator==(Type a, Type b) noexcept; + + std::variant _m_type; + }; + + bool operator==(Type a, Type b) noexcept; + bool operator!=(Type a, Type b) noexcept; + + class Symbol { + public: + Symbol(SymbolKind type, std::string name); + + Symbol(Symbol&&) = default; + virtual ~Symbol() noexcept = default; + + [[nodiscard]] SymbolKind get_kind() const noexcept; + [[nodiscard]] std::string const& get_name() const noexcept; + + protected: + SymbolKind _m_type; + std::string const _m_name; + }; + + class VarSymbol : public Symbol { + public: + VarSymbol(std::string name, Type type); + VarSymbol(std::string name, Type type, AstVariable ast); + + [[nodiscard]] Type get_type() const; + + void set_size(int size); + [[nodiscard]] int get_size() const; + + [[nodiscard]] inline std::optional get_ast() noexcept { + if (_m_ast) return &*_m_ast; + return std::nullopt; + } + + private: + Type _m_value_type; + int _m_size = 0; + std::optional _m_ast; + }; + + class ConstSymbol : public Symbol { + public: + ConstSymbol(std::string name, Type type); + ConstSymbol(std::string name, Type type, AstConstant ast); + + [[nodiscard]] Type get_type() const; + + void set_size(int size); + [[nodiscard]] int get_size() const; + + [[nodiscard]] inline std::optional get_ast() noexcept { + if (_m_ast) return &*_m_ast; + return std::nullopt; + } + + private: + Type _m_value_type; + int _m_size = 0; + std::optional _m_ast; + }; + + class Scope { + public: + std::vector& get_local_vars(); + + VarSymbol* add_local_var(std::string name, Type type, AstVariable ast); + + private: + // TODO: For extra fine-grained control, properly scope these. + std::vector _m_local_vars; + }; + + class ClassSymbol : public Symbol { + public: + explicit ClassSymbol(std::string name); + ClassSymbol(std::string name, AstClass ast); + + VarSymbol* get_member(std::string const& name); + VarSymbol* get_member(AstIdentifier const& name); + + void add_member(std::string name, Type type); + void add_member(std::string name, Type type, AstVariable ast); + + std::vector& get_members(); + + [[nodiscard]] inline std::optional get_ast() noexcept { + if (_m_ast) return &*_m_ast; + return std::nullopt; + } + + private: + std::vector _m_members; + std::optional _m_ast; + }; + + class PrototypeSymbol : public Symbol { + public: + PrototypeSymbol(std::string name, ClassSymbol* base); + PrototypeSymbol(std::string name, ClassSymbol* base, AstPrototype ast); + + [[nodiscard]] ClassSymbol* get_base() const noexcept; + + Scope& get_scope(); + + [[nodiscard]] inline std::optional get_ast() noexcept { + if (_m_ast) return &*_m_ast; + return std::nullopt; + } + + private: + ClassSymbol* _m_base; + std::optional _m_ast; + Scope _m_scope; + }; + + class InstanceSymbol : public Symbol { + public: + InstanceSymbol(std::string name, Symbol* base); + InstanceSymbol(std::string name, Symbol* base, AstInstance ast); + + [[nodiscard]] Symbol* get_base() const noexcept; + [[nodiscard]] ClassSymbol* get_base_class() const noexcept; + + Scope& get_scope(); + + [[nodiscard]] inline std::optional get_ast() noexcept { + if (_m_ast) return &*_m_ast; + return std::nullopt; + } + + private: + Symbol* _m_base; + std::optional _m_ast; + Scope _m_scope; + }; + + class FunctionSymbol : public Symbol { + public: + FunctionSymbol(std::string name, Type rtype); + FunctionSymbol(std::string name, Type rtype, AstFunction ast); + + [[nodiscard]] Type get_return_type() const; + + VarSymbol* get_argument(std::string const& name); + VarSymbol* get_argument(AstIdentifier const& name); + + void add_argument(std::string name, Type type); + void add_argument(std::string name, Type type, AstVariable ast); + + std::vector& get_arguments(); + + Scope& get_scope(); + + [[nodiscard]] inline std::optional get_ast() noexcept { + if (_m_ast) return &*_m_ast; + return std::nullopt; + } + + private: + Type _m_return_type; + std::vector _m_args; + std::optional _m_ast; + Scope _m_scope; + }; + + class ExternSymbol : public Symbol { + public: + ExternSymbol(std::string name, Type rtype); + ExternSymbol(std::string name, Type rtype, AstExternal ast); + + [[nodiscard]] Type get_return_type() const; + + VarSymbol* get_argument(std::string const& name); + VarSymbol* get_argument(AstIdentifier const& name); + + void add_argument(std::string name, Type type); + void add_argument(std::string name, Type type, AstVariable ast); + + std::vector& get_arguments(); + + [[nodiscard]] inline std::optional get_ast() noexcept { + if (_m_ast) return &*_m_ast; + return std::nullopt; + } + + private: + Type _m_return_type; + std::vector _m_args; + std::optional _m_ast; + }; +} // namespace zenkit::daedalus diff --git a/include/zenkit/daedalus/SyntaxTree.hh b/include/zenkit/daedalus/SyntaxTree.hh new file mode 100644 index 00000000..fccc92f7 --- /dev/null +++ b/include/zenkit/daedalus/SyntaxTree.hh @@ -0,0 +1,326 @@ +// Copyright © 2023 GothicKit Contributors. +// SPDX-License-Identifier: MIT +#pragma once +#include "zenkit/Stream.hh" +#include "zenkit/daedalus/Compiler.hh" + +#include +#include +#include +#include +#include +#include + +#define ZK_AST_NODE(T, k) \ +public: \ + using ptr = std::unique_ptr; \ + \ + [[nodiscard]] inline AstNodeKind kind() const noexcept override { \ + return AstNodeKind::k; \ + } + +namespace zenkit::daedalus { + enum class AstNodeKind { + SCRIPT, + IDENTIFIER, + IDENTIFIER_QUALIFIED, + CONSTANT, + CLASS, + VARIABLE, + INSTANCE, + PROTOTYPE, + FUNCTION, + EXTERNAL, + + LITERAL_INT, + LITERAL_FLOAT, + LITERAL_STRING, + LITERAL_ARRAY, + + EXPR_REFERENCE, + EXPR_CALL, + EXPR_BINARY, + EXPR_UNARY, + + STMT_BLOCK, + STMT_CONDITION, + STMT_EXPRESSION, + STMT_RETURN, + }; + + enum class AstBinaryOp { + ASSIGN = 0, + ASSIGN_ADD, + ASSIGN_SUBTRACT, + ASSIGN_MULTIPLY, + ASSIGN_DIVIDE, + LOGICAL_AND, + LOGICAL_OR, + BITWISE_AND, + BITWISE_OR, + EQUAL, + NOT_EQUAL, + LESS_THAN, + LESS_THAN_OR_EQUAL, + GREATER_THAN, + GREATER_THAN_OR_EQUAL, + RIGHT_SHIFT, + LEFT_SHIFT, + ADD, + SUBTRACT, + MULTIPLY, + DIVIDE, + MODULO, + }; + + enum class AstUnaryOp { + UNARY_NOT, + UNARY_PLUS, + UNARY_MINUS, + UNARY_COMPLEMENT, + }; + + class AstNode { + public: + using ptr = std::unique_ptr; + + virtual ~AstNode() noexcept = default; + [[nodiscard]] virtual AstNodeKind kind() const noexcept = 0; + + public: + SourceLocation location {}; + }; + + class AstIdentifier final : public AstNode { + ZK_AST_NODE(AstIdentifier, IDENTIFIER); + + public: + std::string value; + }; + + class AstQualifiedIdentifier final : public AstNode { + ZK_AST_NODE(AstQualifiedIdentifier, IDENTIFIER_QUALIFIED); + + public: + AstIdentifier base; + std::optional element; + }; + + class AstExpression : public AstNode { + public: + using ptr = std::unique_ptr; + }; + + class AstLiteralInteger final : public AstExpression { + ZK_AST_NODE(AstLiteralInteger, LITERAL_INT); + + public: + int value {0}; + }; + + class AstLiteralFloat final : public AstExpression { + ZK_AST_NODE(AstLiteralFloat, LITERAL_FLOAT); + + public: + float value {0}; + }; + + class AstLiteralString final : public AstExpression { + ZK_AST_NODE(AstLiteralString, LITERAL_STRING); + + public: + std::string value; + }; + + class AstLiteralArray final : public AstExpression { + ZK_AST_NODE(AstLiteralArray, LITERAL_ARRAY); + + public: + using AstExpression::AstExpression; + + public: + std::vector value; + }; + + class AstExpressionReference final : public AstExpression { + ZK_AST_NODE(AstExpressionReference, EXPR_REFERENCE); + + public: + using AstExpression::AstExpression; + + public: + AstQualifiedIdentifier name; + std::optional index; + }; + + class AstExpressionBinary final : public AstExpression { + ZK_AST_NODE(AstExpressionBinary, EXPR_BINARY) + + public: + public: + AstBinaryOp type; + AstExpression::ptr lhs; + AstExpression::ptr rhs; + }; + + class AstExpressionUnary final : public AstExpression { + ZK_AST_NODE(AstExpressionUnary, EXPR_UNARY); + + public: + AstUnaryOp type; + AstExpression::ptr rhs; + }; + + class AstExpressionCall final : public AstExpression { + ZK_AST_NODE(AstExpressionCall, EXPR_CALL); + + public: + using AstExpression::AstExpression; + + public: + AstQualifiedIdentifier target; + std::vector args; + }; + + class AstVariable : public AstNode { + ZK_AST_NODE(AstVariable, VARIABLE); + + public: + using AstNode::AstNode; + + public: + AstIdentifier name; + AstIdentifier type; + std::optional size; + }; + + class AstConstant final : public AstVariable { + ZK_AST_NODE(AstConstant, CONSTANT); + + public: + using AstVariable::AstVariable; + + public: + AstExpression::ptr value; + }; + + class AstStatement : public AstNode { + public: + using ptr = std::unique_ptr; + using AstNode::AstNode; + }; + + class AstStatementBlock final : public AstNode { + ZK_AST_NODE(AstStatementBlock, STMT_BLOCK); + + public: + std::vector locals {}; + std::vector statements {}; + }; + + class AstStatementExpression final : public AstStatement { + ZK_AST_NODE(AstStatementExpression, STMT_EXPRESSION); + + public: + AstExpression::ptr expr; + }; + + class AstStatementReturn final : public AstStatement { + ZK_AST_NODE(AstStatementReturn, STMT_RETURN); + + public: + using AstStatement::AstStatement; + + public: + std::optional value; + }; + + class AstStatementCondition final : public AstStatement { + ZK_AST_NODE(AstStatementCondition, STMT_CONDITION); + + public: + using AstStatement::AstStatement; + + public: + AstExpression::ptr condition; + AstStatementBlock body; + + std::vector else_if; + std::optional else_body; + }; + + class AstClass final : public AstNode { + ZK_AST_NODE(AstClass, CLASS); + + public: + using AstNode::AstNode; + + public: + AstIdentifier name; + std::vector members; + }; + + class AstInstance final : public AstNode { + ZK_AST_NODE(AstInstance, INSTANCE); + + public: + using AstNode::AstNode; + + public: + AstIdentifier name; + AstIdentifier base; + std::optional body; + }; + + class AstPrototype final : public AstNode { + ZK_AST_NODE(AstPrototype, PROTOTYPE); + + public: + using AstNode::AstNode; + + public: + AstIdentifier name; + AstIdentifier base; + AstStatementBlock body; + }; + + class AstFunction final : public AstNode { + ZK_AST_NODE(AstFunction, FUNCTION); + + public: + AstIdentifier name {}; + AstIdentifier rtype {}; + std::vector args {}; + AstStatementBlock body {}; + }; + + class AstExternal final : public AstNode { + ZK_AST_NODE(AstExternal, EXTERNAL); + + public: + AstIdentifier name {}; + AstIdentifier rtype {}; + std::vector args {}; + }; + + class AstScript final : public AstNode { + ZK_AST_NODE(AstScript, SCRIPT); + + public: + AstScript() = default; + + public: + std::vector classes; + std::vector prototypes; + std::vector instances; + std::vector variables; + std::vector constants; + std::vector functions; + std::vector externals; + }; + + ZKAPI void parse_script(AstScript* ast, std::filesystem::path const& source); + ZKAPI void parse_script(AstScript* ast, std::unique_ptr source); + ZKAPI void parse_script(AstScript* ast, CompilationSource source); + ZKAPI void parse_script(AstScript* ast, std::shared_ptr source); +} // namespace zenkit::daedalus diff --git a/src/daedalus/Compiler.cc b/src/daedalus/Compiler.cc new file mode 100644 index 00000000..23e077b0 --- /dev/null +++ b/src/daedalus/Compiler.cc @@ -0,0 +1,215 @@ +// Copyright © 2023 GothicKit Contributors. +// SPDX-License-Identifier: MIT + +#include "zenkit/daedalus/Compiler.hh" +#include "zenkit/Misc.hh" +#include "zenkit/daedalus/SyntaxTree.hh" + +#include "../Internal.hh" + +#include "TypeCheck.hh" +#include "TypeStore.hh" + +#include + +namespace zenkit::daedalus { + static std::vector find_children(std::filesystem::path const& parent, + std::string const& child) { + if (!std::filesystem::is_directory(parent)) { + return {}; + } + + std::string prefix = child.substr(child.find_first_not_of(' '), child.find_last_not_of(' ') + 1), suffix; + std::transform(prefix.begin(), prefix.end(), prefix.begin(), ::toupper); + + auto wildcard = prefix.find('*'); + if (wildcard != std::string::npos) { + suffix = prefix.substr(wildcard + 1); + prefix = prefix.substr(0, wildcard); + } + + std::vector paths; + for (auto const& node : std::filesystem::directory_iterator(parent)) { + std::string name = node.path().filename(); + std::transform(name.begin(), name.end(), name.begin(), ::toupper); + + auto suffix_loc = wildcard == std::string::npos ? prefix.size() : name.rfind(suffix); + + if (name.find(prefix) == 0 && suffix_loc == (name.size() - suffix.size())) { + paths.push_back(node); + } + } + + return paths; + } + + CompilerError::CompilerError(std::string type, SourceLocation location, std::string message) + : Error(std::move(message)), location(std::move(location)), type(std::move(type)) {} + + SyntaxError::SyntaxError(SourceLocation location, std::string message) + : CompilerError("SyntaxError", std::move(location), std::move(message)) {} + + TypeError::TypeError(SourceLocation location, std::string message) + : CompilerError("TypeError", std::move(location), std::move(message)) {} + + std::string CompilerError::format() const { + std::string repr = this->type + ": " + this->message + "\n"; + + std::string filename = this->location.source->path.empty() ? "" : this->location.source->path; + repr += " --> " + filename + ":" + std::to_string(this->location.line) + ":" + + std::to_string(this->location.column); + repr += "\n"; + + return repr + "\n" + this->location.format(); + } + + std::string SourceLocation::format() const { + std::string filename = this->source->path.empty() ? "" : this->source->path.filename(); + + auto line_start = this->line - 5, line_end = this->line; + long line_chars = (long) fmax(::floor(::log10((double) line_end)) + 1, 3); + long line_length = 0; + + if (line_start > line_end) { + line_start = 0; + } + + std::string last_line; + std::string code_lines; + + auto off = this->source->read->tell(); + auto* r = this->source->read.get(); + r->seek(0, Whence::BEG); + + for (auto i = 0u; i < line_end; ++i) { + last_line = r->read_line(false); + if (i < line_start) continue; + + line_length = (long) ::fmax((double) line_length, (double) last_line.length()); + + auto lineno_string = std::to_string(i + 1); + code_lines += std::string(line_chars - lineno_string.length(), ' '); + code_lines += lineno_string; + code_lines += " | "; + code_lines += last_line; + code_lines += "\n"; + } + + r->seek(static_cast(off), Whence::BEG); + + code_lines += std::string(line_chars, ' ') + " | "; + long col = (long) fmin((double) this->column - 2, (double) last_line.size()); + for (auto i = 0; i < col; ++i) { + code_lines += std::isspace(last_line[i]) ? last_line[i] : ' '; + } + code_lines += "^~ Here."; + + long separator_length; + if ((long) filename.length() > line_length) { + separator_length = 10; + } else { + separator_length = line_length - (long) filename.length(); + } + + std::string repr = std::string(line_chars - 3, ' '); + repr += "----|--("; + repr += filename; + repr += ")"; + repr += std::string((long) ::fmax(10., (double) separator_length), '-'); + return repr + "\n" + code_lines; + } + + CompilationSource::CompilationSource(std::filesystem::path const& path) : path(path), read(Read::from(path)) {} + + CompilationSource::CompilationSource(std::vector source) + : path(""), read(Read::from(std::move(source))) {} + + CompilationSource::CompilationSource(std::unique_ptr source) : path(""), read(std::move(source)) {} + + std::string CompilationSource::describe() const { + return this->path.empty() ? "" : this->path.filename(); + } + + bool operator==(CompilationSource const& a, CompilationSource const& b) { + return !a.path.empty() && a.path == b.path; + } + + void Compiler::add(std::filesystem::path const& source) { + for (auto& src : _m_sources) { + if (src->path == source) { + return; + } + } + + _m_sources.push_back(std::make_shared(source)); + } + + void Compiler::add_raw(std::string_view source) { + _m_sources.push_back(std::make_shared( + std::vector(reinterpret_cast(source.data()), + reinterpret_cast(source.data() + source.size())))); + } + + void Compiler::add_src(std::filesystem::path const& source) { + auto dir = source.parent_path(); + auto rd = Read::from(source); + + while (!rd->eof()) { + std::string line = rd->read_line(true); + std::replace(line.begin(), line.end(), '\\', '/'); + + std::filesystem::path path = line; + path.make_preferred(); + + bool found = true; + std::filesystem::path root = dir; + std::vector files; + + for (auto& node : path) { + auto res = find_children(root, node); + + // No matching child was found. + if (res.empty()) { + found = false; + break; + } + + if (res.size() == 1 && std::filesystem::is_directory(res[0])) { + root = res[0]; + continue; + } + + files.insert(files.end(), res.begin(), res.end()); + } + + if (found) { + for (auto& file : files) { + this->add(file); + } + } else { + ZKLOGW("Daedalus.Compiler", ".src reference \"%s\" not found", line.c_str()); + } + } + } + + void Compiler::compile() { + ZKLOGI("Daedalus.Compiler", "Compiling ..."); + + try { + AstScript root {}; + for (auto& source : _m_sources) { + ZKLOGD("Daedalus.Compiler", "Parsing module %s ...", source->describe().c_str()); + parse_script(&root, source); + } + + ZKLOGD("Daedalus.Compiler", "Checking types ..."); + TypeStore types; + types.add_script(std::move(root)); + check_types(&types); + + } catch (CompilerError& err) { + ZKLOGE("Daedalus.Compiler", "CompilerError:\n\n%s\n", err.format().c_str()); + throw err; + } + } +} // namespace zenkit::daedalus diff --git a/src/daedalus/Module.cc b/src/daedalus/Module.cc new file mode 100644 index 00000000..51f037ea --- /dev/null +++ b/src/daedalus/Module.cc @@ -0,0 +1,327 @@ +// Copyright © 2023 GothicKit Contributors. +// SPDX-License-Identifier: MIT +#include "zenkit/daedalus/Module.hh" +#include "zenkit/daedalus/SyntaxTree.hh" + +namespace zenkit::daedalus { + std::string const PRIMITIVE_TYPE_NAMES[] = { + "void", + "int", + "int[]", + "float", + "float[]", + "string", + "string[]", + "func", + "func[]", + }; + + Type Type::VOID {PrimitiveType::VOID}; + Type Type::INT {PrimitiveType::INT}; + Type Type::FLOAT {PrimitiveType::FLOAT}; + Type Type::STRING {PrimitiveType::STRING}; + Type Type::FUNC {PrimitiveType::FUNC}; + + Type::Type(PrimitiveType t) : _m_type(t) {} + + Type::Type(ClassSymbol* t) : _m_type(t) {} + + bool Type::is_convertible(Type other) const noexcept { + if (*this == other) { + return true; + } + + if (this->is_primitive() && other.is_primitive()) { + return (this->get_primitive_type() == PrimitiveType::INT && + other.get_primitive_type() == PrimitiveType::FLOAT); + } else if (!this->is_primitive() && other.is_primitive()) { + // Instances are convertible to int. + return other.get_primitive_type() == PrimitiveType::INT + +#ifndef ZK_DAEDALUS_STRICT + || other.get_primitive_type() == PrimitiveType::FUNC +#endif + ; + } + + return false; + } + + bool Type::is_primitive() const noexcept { + return std::holds_alternative(_m_type); + } + + bool Type::is_array() const noexcept { + return is_primitive() && + (get_primitive_type() == PrimitiveType::INT_ARRAY || get_primitive_type() == PrimitiveType::FLOAT_ARRAY || + get_primitive_type() == PrimitiveType::STRING_ARRAY || get_primitive_type() == PrimitiveType::FUNC_ARRAY); + } + + Type Type::to_array() const noexcept { + if (is_primitive()) { + switch (this->get_primitive_type()) { + case PrimitiveType::INT: + return Type {PrimitiveType::INT_ARRAY}; + case PrimitiveType::FLOAT: + return Type {PrimitiveType::FLOAT_ARRAY}; + case PrimitiveType::STRING: + return Type {PrimitiveType::STRING_ARRAY}; + case PrimitiveType::FUNC: + return Type {PrimitiveType::FUNC_ARRAY}; + default: + break; + } + } + + return *this; + } + + Type Type::to_elemental() const noexcept { + if (is_primitive()) { + switch (this->get_primitive_type()) { + case PrimitiveType::INT_ARRAY: + return Type {PrimitiveType::INT}; + case PrimitiveType::FLOAT_ARRAY: + return Type {PrimitiveType::FLOAT}; + case PrimitiveType::STRING_ARRAY: + return Type {PrimitiveType::STRING}; + case PrimitiveType::FUNC_ARRAY: + return Type {PrimitiveType::FUNC}; + default: + break; + } + } + + return *this; + } + + PrimitiveType Type::get_primitive_type() const { + return std::get(_m_type); + } + + ClassSymbol* Type::get_complex_type() const { + return std::get(_m_type); + } + + std::string const& Type::describe() const { + if (this->is_primitive()) { + return PRIMITIVE_TYPE_NAMES[(int) this->get_primitive_type()]; + } + + return get_complex_type()->get_name(); + } + + bool Type::is_void() const noexcept { + return is_primitive() && get_primitive_type() == PrimitiveType::VOID; + } + + bool operator==(Type a, Type b) noexcept { + if (a.is_primitive() && b.is_primitive()) { + return a.get_primitive_type() == b.get_primitive_type(); + } + + if (!a.is_primitive() && !b.is_primitive()) { + return a.get_complex_type() == b.get_complex_type(); + } + + return false; + } + + bool operator!=(Type a, Type b) noexcept { + return !(a == b); + } + + std::vector& Scope::get_local_vars() { + return _m_local_vars; + } + + VarSymbol* Scope::add_local_var(std::string name, Type type, AstVariable ast) { + return &_m_local_vars.emplace_back(std::move(name), type, std::move(ast)); + } + + Symbol::Symbol(SymbolKind type, std::string name) : _m_type(type), _m_name(std::move(name)) {} + + SymbolKind Symbol::get_kind() const noexcept { + return _m_type; + } + + std::string const& Symbol::get_name() const noexcept { + return _m_name; + } + + VarSymbol::VarSymbol(std::string name, Type type) : Symbol(SymbolKind::VAR, std::move(name)), _m_value_type(type) {} + + VarSymbol::VarSymbol(std::string name, Type type, AstVariable ast) + : Symbol(SymbolKind::VAR, std::move(name)), _m_value_type(type), _m_ast(std::move(ast)) {} + + Type VarSymbol::get_type() const { + return _m_value_type; + } + + void VarSymbol::set_size(int size) { + _m_size = size; + } + + int VarSymbol::get_size() const { + return _m_size; + } + + ClassSymbol::ClassSymbol(std::string name) : Symbol(SymbolKind::CLASS, std::move(name)) {} + + ClassSymbol::ClassSymbol(std::string name, AstClass ast) + : Symbol(SymbolKind::CLASS, std::move(name)), _m_ast(std::move(ast)) {} + + VarSymbol* ClassSymbol::get_member(std::string const& name) { + for (auto& member : _m_members) { + if (member.get_name() == name) { + return &member; + } + } + return nullptr; + } + + VarSymbol* ClassSymbol::get_member(AstIdentifier const& name) { + return get_member(name.value); + } + + void ClassSymbol::add_member(std::string name, Type type) { + _m_members.emplace_back(std::move(name), type); + } + + void ClassSymbol::add_member(std::string name, Type type, AstVariable ast) { + _m_members.emplace_back(std::move(name), type, std::move(ast)); + } + + std::vector& ClassSymbol::get_members() { + return _m_members; + } + + PrototypeSymbol::PrototypeSymbol(std::string name, ClassSymbol* base) + : Symbol(SymbolKind::PROTOTYPE, std::move(name)), _m_base(base) {} + + PrototypeSymbol::PrototypeSymbol(std::string name, ClassSymbol* base, AstPrototype ast) + : Symbol(SymbolKind::PROTOTYPE, std::move(name)), _m_base(base), _m_ast(std::move(ast)) {} + + ClassSymbol* PrototypeSymbol::get_base() const noexcept { + return _m_base; + } + + Scope& PrototypeSymbol::get_scope() { + return _m_scope; + } + + InstanceSymbol::InstanceSymbol(std::string name, Symbol* base) + : Symbol(SymbolKind::INSTANCE, std::move(name)), _m_base(base) {} + + InstanceSymbol::InstanceSymbol(std::string name, Symbol* base, AstInstance ast) + : Symbol(SymbolKind::INSTANCE, std::move(name)), _m_base(base), _m_ast(std::move(ast)) {} + + Symbol* InstanceSymbol::get_base() const noexcept { + return _m_base; + } + + Scope& InstanceSymbol::get_scope() { + return _m_scope; + } + + ClassSymbol* InstanceSymbol::get_base_class() const noexcept { + if (_m_base->get_kind() == SymbolKind::CLASS) { + return (ClassSymbol*) _m_base; + } + return ((PrototypeSymbol*) _m_base)->get_base(); + } + + FunctionSymbol::FunctionSymbol(std::string name, Type rtype) + : Symbol(SymbolKind::FUNC, std::move(name)), _m_return_type(rtype) {} + + FunctionSymbol::FunctionSymbol(std::string name, Type rtype, AstFunction ast) + : Symbol(SymbolKind::FUNC, std::move(name)), _m_return_type(rtype), _m_ast(std::move(ast)) {} + + VarSymbol* FunctionSymbol::get_argument(std::string const& name) { + for (auto& arg : _m_args) { + if (arg.get_name() == name) { + return &arg; + } + } + + return nullptr; + } + + VarSymbol* FunctionSymbol::get_argument(AstIdentifier const& name) { + return this->get_argument(name.value); + } + + void FunctionSymbol::add_argument(std::string name, Type type) { + _m_args.emplace_back(std::move(name), type); + } + + void FunctionSymbol::add_argument(std::string name, Type type, AstVariable ast) { + _m_args.emplace_back(std::move(name), type, std::move(ast)); + } + + Type FunctionSymbol::get_return_type() const { + return _m_return_type; + } + + std::vector& FunctionSymbol::get_arguments() { + return _m_args; + } + + Scope& FunctionSymbol::get_scope() { + return _m_scope; + } + + ConstSymbol::ConstSymbol(std::string name, Type type) + : Symbol(SymbolKind::CONST, std::move(name)), _m_value_type(type) {} + + ConstSymbol::ConstSymbol(std::string name, Type type, AstConstant ast) + : Symbol(SymbolKind::CONST, std::move(name)), _m_value_type(type), _m_ast(std::move(ast)) {} + + Type ConstSymbol::get_type() const { + return _m_value_type; + } + + void ConstSymbol::set_size(int size) { + _m_size = size; + } + + int ConstSymbol::get_size() const { + return _m_size; + } + + ExternSymbol::ExternSymbol(std::string name, Type rtype) + : Symbol(SymbolKind::EXTERN, std::move(name)), _m_return_type(rtype) {} + + ExternSymbol::ExternSymbol(std::string name, Type rtype, AstExternal ast) + : Symbol(SymbolKind::EXTERN, std::move(name)), _m_return_type(rtype), _m_ast(std::move(ast)) {} + + VarSymbol* ExternSymbol::get_argument(std::string const& name) { + for (auto& arg : _m_args) { + if (arg.get_name() == name) { + return &arg; + } + } + + return nullptr; + } + + VarSymbol* ExternSymbol::get_argument(AstIdentifier const& name) { + return this->get_argument(name.value); + } + + void ExternSymbol::add_argument(std::string name, Type type) { + _m_args.emplace_back(std::move(name), type); + } + + void ExternSymbol::add_argument(std::string name, Type type, AstVariable ast) { + _m_args.emplace_back(std::move(name), type, std::move(ast)); + } + + Type ExternSymbol::get_return_type() const { + return _m_return_type; + } + + std::vector& ExternSymbol::get_arguments() { + return _m_args; + } +} // namespace zenkit::daedalus diff --git a/src/daedalus/SyntaxTree.cc b/src/daedalus/SyntaxTree.cc new file mode 100644 index 00000000..7b9af07b --- /dev/null +++ b/src/daedalus/SyntaxTree.cc @@ -0,0 +1,809 @@ +// Copyright © 2023 GothicKit Contributors. +// SPDX-License-Identifier: MIT +#include "zenkit/daedalus/SyntaxTree.hh" + +#include "../Internal.hh" +#include "Tokenizer.hh" + +#include +#include + +#define TRY(x, ...) \ + do { \ + if (!(x)) { \ + report(tk.location(), __VA_ARGS__); \ + } \ + } while (false) + +#define WRN(x, ...) \ + do { \ + if (!(x)) { \ + warn(tk.location(), __VA_ARGS__); \ + } \ + } while (false) + +namespace zenkit::daedalus { + static std::string KW_CONST = "const"; + static std::string KW_VAR = "var"; + static std::string KW_FUNC = "func"; + static std::string KW_CLASS = "class"; + static std::string KW_INSTANCE = "instance"; + static std::string KW_PROTOTYPE = "prototype"; + static std::string KW_IF = "if"; + static std::string KW_ELSE = "else"; + static std::string KW_RETURN = "return"; + static std::string KW_EXTERN = "extern"; + + [[noreturn]] static void report(SourceLocation loc, std::string message) { + throw SyntaxError {std::move(loc), std::move(message)}; + } + + [[maybe_unused]] static void warn(SourceLocation const& loc, char const* message) { + auto filename = loc.source->path.empty() ? "" : loc.source->path; + ZKLOGW("Daedalus.Compiler", + "\n\nSyntaxWarning: %s\n --> %s:%zu:%zu\n\n%s\n", + message, + filename.c_str(), + loc.line, + loc.column, + loc.format().c_str()); + } + + [[nodiscard]] static bool parse_stmt_block(Tokenizer&, AstStatementBlock*); + [[nodiscard]] static bool parse_expr_assign(Tokenizer&, AstExpression::ptr*); + [[nodiscard]] static bool parse_var_decl(Tokenizer&, std::vector*); + + /// ident = (ALPHA / "_") *(ALPHA / DIGIT / "_") + [[nodiscard]] static bool parse_ident(Tokenizer& tk, AstIdentifier* ast) { + if (!tk.match(Token::IDENTIFIER)) { + return false; + } + + ast->location = tk.location(); + ast->value = tk.get_value(); + return true; + } + + /// ident-qual = ident ["." ident] + [[nodiscard]] static bool parse_ident_qual(Tokenizer& tk, AstQualifiedIdentifier* ast) { + if (!parse_ident(tk, &ast->base)) { + return false; + } + + ast->location = tk.location(); + + if (tk.match(Token::DOT)) { + TRY(parse_ident(tk, &ast->element.emplace()), "Expected "); + } + + return true; + } + + /// expr = expr-assign + [[nodiscard]] static bool parse_expr(Tokenizer& tk, AstExpression::ptr* ast) { + return parse_expr_assign(tk, ast); + } + + /// literal-string = DQUOTE *(ALPHA / DIGIT / WSP / "!" / %x23-7E) DQUOTE + [[nodiscard]] static bool parse_literal_string(Tokenizer& tk, AstExpression::ptr* ast) { + if (!tk.match(Token::STRING)) { + return false; + } + + auto expr = new AstLiteralString; + expr->location = tk.location(); + expr->value = tk.get_value(); + ast->reset(expr); + return true; + } + + /// literal-int = 1*DIGIT + [[nodiscard]] static bool parse_literal_int(Tokenizer& tk, AstExpression::ptr* ast) { + if (!tk.match(Token::INTEGER)) { + return false; + } + + auto expr = new AstLiteralInteger; + expr->location = tk.location(); + expr->value = tk.get_value_int(); + ast->reset(expr); + return true; + } + + /// literal-float = 1*DIGIT ["." *DIGIT] + [[nodiscard]] static bool parse_literal_float(Tokenizer& tk, AstExpression::ptr* ast) { + if (!tk.match(Token::FLOAT)) { + return false; + } + + auto expr = new AstLiteralFloat; + expr->location = tk.location(); + expr->value = tk.get_value_float(); + ast->reset(expr); + return true; + } + + /// literal-array = "{" expr *("," expr) [","] "}" + [[nodiscard]] static bool parse_literal_array(Tokenizer& tk, AstExpression::ptr* ast) { + if (!tk.match(Token::LBRACE)) { + return false; + } + + auto expr = new AstLiteralArray; + expr->location = tk.location(); + ast->reset(expr); + + while (!tk.match(Token::RBRACE)) { + auto& val = expr->value.emplace_back(); + TRY(parse_expr(tk, &val), "Expected "); + + if (tk.match(Token::COMMA, Token::RBRACE)) { + // Allow trailing commas. + if (tk.last() == Token::RBRACE) break; + } else { + // Reject missing commas. + report(tk.location(), "Expected ','"); + } + } + + return true; + } + + /// expr-call-args = [expr *("," expr)] + /// expr-call = ident "(" expr-call-args ")" + /// expr-ref = ident-qual [ "[" expr "]" ] + [[nodiscard]] static bool parse_expr_ref_or_call(Tokenizer& tk, AstExpression::ptr* ast) { + AstQualifiedIdentifier ident; + if (!parse_ident_qual(tk, &ident)) { + return false; + } + + if (tk.match(Token::LPAREN)) { + auto expr = new AstExpressionCall; + expr->location = ident.location; + expr->target = std::move(ident); + ast->reset(expr); + + while (!tk.match(Token::RPAREN)) { + auto& arg = expr->args.emplace_back(); + TRY(parse_expr(tk, &arg), "Expected "); + + if (tk.match(Token::COMMA, Token::RPAREN)) { + // Allow trailing commas. + if (tk.last() == Token::RPAREN) { + break; + } + } else { + // Reject missing commas. + report(tk.location(), "Expected ','"); + } + } + } else { + auto expr = new AstExpressionReference; + expr->location = ident.location; + expr->name = std::move(ident); + ast->reset(expr); + + if (tk.match(Token::LBRACKET)) { + auto& sz = expr->index.emplace(); + TRY(parse_expr(tk, &sz), "Expected "); + TRY(tk.match(Token::RBRACKET), "Expected ']'"); + } + } + + return true; + } + + /// expr-group = "(" expr ")" + [[nodiscard]] static bool parse_expr_group(Tokenizer& tk, AstExpression::ptr* ast) { + if (!tk.match(Token::LPAREN)) { + return false; + } + + TRY(parse_expr(tk, ast), "Expected "); + TRY(tk.match(Token::RPAREN), "Expected ')'"); + return true; + } + + /// expr-primary = literal-string / literal-int / literal-float / literal-array / expr-ref / expr-call / expr-group + [[nodiscard]] static bool parse_expr_primary(Tokenizer& tk, AstExpression::ptr* ast) { + return parse_literal_int(tk, ast) || parse_literal_float(tk, ast) || parse_literal_string(tk, ast) || + parse_literal_array(tk, ast) || parse_expr_ref_or_call(tk, ast) || parse_expr_group(tk, ast); + } + + /// expr-unary = (("!" / "+" / "-" / "~") expr-unary) / expr-primary + [[nodiscard]] static bool parse_expr_unary(Tokenizer& tk, AstExpression::ptr* ast) { + if (tk.match(Token::NOT, Token::PLUS, Token::MINUS, Token::COMPLEMENT)) { + auto expr = new AstExpressionUnary(); + expr->location = tk.location(); + + // clang-format off + if (tk.last() == Token::NOT) expr->type = AstUnaryOp::UNARY_NOT; + if (tk.last() == Token::PLUS) expr->type = AstUnaryOp::UNARY_PLUS; + if (tk.last() == Token::MINUS) expr->type = AstUnaryOp::UNARY_MINUS; + if (tk.last() == Token::COMPLEMENT) expr->type = AstUnaryOp::UNARY_COMPLEMENT; + // clang-format on + + TRY(parse_expr_unary(tk, &expr->rhs), "Expected "); + + ast->reset(expr); + return true; + } + + return parse_expr_primary(tk, ast); + } + + /// expr-factor = expr-unary [("*" / "/" / "%") expr] + [[nodiscard]] static bool parse_expr_factor(Tokenizer& tk, AstExpression::ptr* ast) { + if (!parse_expr_unary(tk, ast)) { + return false; + } + + if (tk.match(Token::MULTIPLY, Token::DIVIDE, Token::MODULO)) { + auto expr = new AstExpressionBinary(); + expr->location = tk.location(); + + // clang-format off + if (tk.last() == Token::MULTIPLY) expr->type = AstBinaryOp::MULTIPLY; + if (tk.last() == Token::DIVIDE) expr->type = AstBinaryOp::DIVIDE; + if (tk.last() == Token::MODULO) expr->type = AstBinaryOp::MODULO; + // clang-format on + + TRY(parse_expr(tk, &expr->rhs), "Expected "); + + ast->swap(expr->lhs); + ast->reset(expr); + } + + return true; + } + + /// expr-term = expr-factor [("+" / "-") expr] + [[nodiscard]] static bool parse_expr_term(Tokenizer& tk, AstExpression::ptr* ast) { + if (!parse_expr_factor(tk, ast)) { + return false; + } + + if (tk.match(Token::PLUS, Token::MINUS)) { + auto expr = new AstExpressionBinary(); + expr->location = tk.location(); + + // clang-format off + if (tk.last() == Token::PLUS) expr->type = AstBinaryOp::ADD; + if (tk.last() == Token::MINUS) expr->type = AstBinaryOp::SUBTRACT; + // clang-format on + + TRY(parse_expr(tk, &expr->rhs), "Expected "); + + ast->swap(expr->lhs); + ast->reset(expr); + } + + return true; + } + + /// expr-shift = expr-term [("<<" / ">>") expr] + [[nodiscard]] static bool parse_expr_shift(Tokenizer& tk, AstExpression::ptr* ast) { + if (!parse_expr_term(tk, ast)) { + return false; + } + + if (tk.match(Token::RIGHT_SHIFT, Token::LEFT_SHIFT)) { + auto expr = new AstExpressionBinary(); + expr->location = tk.location(); + + // clang-format off + if (tk.last() == Token::RIGHT_SHIFT) expr->type = AstBinaryOp::RIGHT_SHIFT; + if (tk.last() == Token::LEFT_SHIFT) expr->type = AstBinaryOp::LEFT_SHIFT; + // clang-format on + + TRY(parse_expr(tk, &expr->rhs), "Expected "); + + ast->swap(expr->lhs); + ast->reset(expr); + } + + return true; + } + + /// expr-rel = expr-shift [("<" / "<=" / ">=" / ">") expr] + [[nodiscard]] static bool parse_expr_rel(Tokenizer& tk, AstExpression::ptr* ast) { + if (!parse_expr_shift(tk, ast)) { + return false; + } + + if (tk.match(Token::LESS_THAN, Token::GREATER_THAN, Token::LESS_THAN_OR_EQUAL, Token::GREATER_THAN_OR_EQUAL)) { + auto expr = new AstExpressionBinary(); + expr->location = tk.location(); + + // clang-format off + if (tk.last() == Token::LESS_THAN) expr->type = AstBinaryOp::LESS_THAN; + if (tk.last() == Token::GREATER_THAN) expr->type = AstBinaryOp::GREATER_THAN; + if (tk.last() == Token::LESS_THAN_OR_EQUAL) expr->type = AstBinaryOp::LESS_THAN_OR_EQUAL; + if (tk.last() == Token::GREATER_THAN_OR_EQUAL) expr->type = AstBinaryOp::GREATER_THAN_OR_EQUAL; + // clang-format on + + TRY(parse_expr(tk, &expr->rhs), "Expected "); + + ast->swap(expr->lhs); + ast->reset(expr); + } + + return true; + } + + /// expr-equal = expr-rel [("==" / "!=") expr] + [[nodiscard]] static bool parse_expr_equal(Tokenizer& tk, AstExpression::ptr* ast) { + if (!parse_expr_rel(tk, ast)) { + return false; + } + + if (tk.match(Token::EQUAL, Token::NOT_EQUAL)) { + auto expr = new AstExpressionBinary(); + expr->location = tk.location(); + + // clang-format off + if (tk.last() == Token::EQUAL) expr->type = AstBinaryOp::EQUAL; + if (tk.last() == Token::NOT_EQUAL) expr->type = AstBinaryOp::NOT_EQUAL; + // clang-format on + + TRY(parse_expr(tk, &expr->rhs), "Expected "); + + ast->swap(expr->lhs); + ast->reset(expr); + } + + return true; + } + + /// expr-bit = expr-equal [("&" / "|") expr] + [[nodiscard]] static bool parse_expr_bit(Tokenizer& tk, AstExpression::ptr* ast) { + if (!parse_expr_equal(tk, ast)) { + return false; + } + + if (tk.match(Token::BITWISE_AND, Token::BITWISE_OR)) { + auto expr = new AstExpressionBinary(); + expr->location = tk.location(); + + // clang-format off + if (tk.last() == Token::BITWISE_AND) expr->type = AstBinaryOp::BITWISE_AND; + if (tk.last() == Token::BITWISE_OR) expr->type = AstBinaryOp::BITWISE_OR; + // clang-format on + + TRY(parse_expr(tk, &expr->rhs), "Expected "); + + ast->swap(expr->lhs); + ast->reset(expr); + } + + return true; + } + + /// expr-logic = expr-bit [("&&" / "||") expr] + [[nodiscard]] static bool parse_expr_logic(Tokenizer& tk, AstExpression::ptr* ast) { + if (!parse_expr_bit(tk, ast)) { + return false; + } + + if (tk.match(Token::LOGICAL_AND, Token::LOGICAL_OR)) { + auto expr = new AstExpressionBinary(); + expr->location = tk.location(); + + // clang-format off + if (tk.last() == Token::LOGICAL_AND) expr->type = AstBinaryOp::LOGICAL_AND; + if (tk.last() == Token::LOGICAL_OR) expr->type = AstBinaryOp::LOGICAL_OR; + // clang-format on + + TRY(parse_expr(tk, &expr->rhs), "Expected "); + + ast->swap(expr->lhs); + ast->reset(expr); + } + + return true; + } + + /// expr-assign = expr-logic [("=" / "+=" / "-=" / "*=" / "/=") expr] + [[nodiscard]] bool parse_expr_assign(Tokenizer& tk, AstExpression::ptr* ast) { + if (!parse_expr_logic(tk, ast)) { + return false; + } + + if (tk.match(Token::ASSIGN, + Token::ASSIGN_ADD, + Token::ASSIGN_SUBTRACT, + Token::ASSIGN_MULTIPLY, + Token::ASSIGN_DIVIDE)) { + auto expr = new AstExpressionBinary(); + expr->location = tk.location(); + + // clang-format off + if (tk.last() == Token::ASSIGN) expr->type = AstBinaryOp::ASSIGN; + if (tk.last() == Token::ASSIGN_ADD) expr->type = AstBinaryOp::ASSIGN_ADD; + if (tk.last() == Token::ASSIGN_SUBTRACT) expr->type = AstBinaryOp::ASSIGN_SUBTRACT; + if (tk.last() == Token::ASSIGN_MULTIPLY) expr->type = AstBinaryOp::ASSIGN_MULTIPLY; + if (tk.last() == Token::ASSIGN_DIVIDE) expr->type = AstBinaryOp::ASSIGN_DIVIDE; + // clang-format on + + TRY(parse_expr(tk, &expr->rhs), "Expected "); + + ast->swap(expr->lhs); + ast->reset(expr); + } + + return true; + } + + /// stmt-cond = "if" expr stmt-block *("else" "if" expr stmt-block) ["else" stmt-block] + [[nodiscard]] static bool parse_stmt_cond(Tokenizer& tk, AstStatementBlock* root) { + if (!tk.match_kw(KW_IF)) { + return false; + } + + auto ast = new AstStatementCondition; + root->statements.emplace_back(ast); + ast->location = tk.location(); + + TRY(parse_expr(tk, &ast->condition), "Expected "); + TRY(parse_stmt_block(tk, &ast->body), "Expected "); + + // Accept multiple `else if`-blocks or a single `else`-block + while (tk.match_kw(KW_ELSE)) { + // Accept a single `else`-block + if (!tk.match_kw(KW_IF)) { + auto& else_ = ast->else_body.emplace(); + TRY(parse_stmt_block(tk, &else_), "Expected "); + break; + } + + // Accept an `else-if` block + auto& elif = ast->else_if.emplace_back(); + elif.location = tk.location(); + + TRY(parse_expr(tk, &elif.condition), "Expected "); + TRY(parse_stmt_block(tk, &elif.body), "Expected "); + } + + return true; + } + + /// stmt-return = "return" [expr] + [[nodiscard]] static bool parse_stmt_return(Tokenizer& tk, AstStatementBlock* root) { + if (!tk.match_kw(KW_RETURN)) { + return false; + } + + auto ast = new AstStatementReturn; + root->statements.emplace_back(ast); + ast->location = tk.location(); + + AstExpression::ptr expr; + if (parse_expr(tk, &expr)) { + ast->value.emplace(std::move(expr)); + } + + return true; + } + + /// stmt-decl = var-decl + [[nodiscard]] static bool parse_stmt_decl(Tokenizer& tk, AstStatementBlock* root) { + return parse_var_decl(tk, &root->locals); + } + + /// stmt-expr = expr + [[nodiscard]] static bool parse_stmt_expr(Tokenizer& tk, AstStatementBlock* root) { + AstExpression::ptr expr; + + if (!parse_expr(tk, &expr)) { + return false; + } + + auto ast = new AstStatementExpression; + root->statements.emplace_back(ast); + ast->location = expr->location; + ast->expr = std::move(expr); + + return true; + } + + /// stmt = stmt-cond / stmt-return / stmt-decl / stmt-expr + [[nodiscard]] static bool parse_stmt(Tokenizer& tk, AstStatementBlock* root) { + return parse_stmt_cond(tk, root) || parse_stmt_return(tk, root) || parse_stmt_decl(tk, root) || + parse_stmt_expr(tk, root); + } + + /// stmt-block = "{" *(stmt [";"]) "}" + [[nodiscard]] static bool parse_stmt_block(Tokenizer& tk, AstStatementBlock* root) { + if (!tk.match(Token::LBRACE)) { + return false; + } + + root->location = tk.location(); + + while (parse_stmt(tk, root)) { +#ifndef ZK_DAEDALUS_STRICT + WRN(tk.match(Token::SEMICOLON), "Expected ';'"); +#else + TRY(tk.match(Token::SEMICOLON), "Expected ';' (strict-mode)"); +#endif + } + + TRY(tk.match(Token::RBRACE), "Expected '}'"); + return true; + } + + static void parse_var_decl_base(Tokenizer& tk, AstVariable* ast) { + ast->location = tk.location(); + + TRY(parse_ident(tk, &ast->type), "Expected "); + TRY(parse_ident(tk, &ast->name), "Expected "); + + if (tk.match(Token::LBRACKET)) { + auto& expr = ast->size.emplace(); + TRY(parse_expr(tk, &expr), "Expected "); + TRY(tk.match(Token::RBRACKET), "Expected ']'"); + } + } + + /// var-decl-single = "var" ident ident [ "[" expr "]" ] + [[nodiscard]] static bool parse_var_decl(Tokenizer& tk, AstVariable* ast) { + if (!tk.match_kw(KW_VAR)) { + return false; + } + + parse_var_decl_base(tk, ast); + return true; + } + + /// var-decl = var-decl-single / var-decl-multi + [[nodiscard]] static bool parse_var_decl(Tokenizer& tk, std::vector* root) { + if (!tk.match_kw(KW_VAR)) { + return false; + } + + auto location = tk.location(); + std::vector names; + AstIdentifier type; + + TRY(parse_ident(tk, &type), "Expected "); + + do { + auto& name = names.emplace_back(); + TRY(parse_ident(tk, &name), "Expected "); + } while (tk.match(Token::COMMA)); + + if (names.size() == 1) { + auto& ast = root->emplace_back(); + ast.location = location; + ast.name = std::move(names[0]); + ast.type = type; + + if (tk.match(Token::LBRACKET)) { + auto& expr = ast.size.emplace(); + TRY(parse_expr(tk, &expr), "Expected "); + TRY(tk.match(Token::RBRACKET), "Expected ']'"); + } + + return true; + } + + for (auto& name : names) { + auto& ast = root->emplace_back(); + ast.location = location; + ast.name = std::move(name); + ast.type = type; + } + + return true; + } + + /// var-decl-multi = "var" ident ident *("," ident) + [[nodiscard]] static bool parse_var_decl(Tokenizer& tk, AstScript* root) { + return parse_var_decl(tk, &root->variables); + } + + static void parse_const_decl_base(Tokenizer& tk, AstConstant* ast) { + ast->location = tk.location(); + + TRY(parse_ident(tk, &ast->type), "Expected "); + TRY(parse_ident(tk, &ast->name), "Expected "); + + if (tk.match(Token::LBRACKET)) { + auto& expr = ast->size.emplace(); + TRY(parse_expr(tk, &expr), "Expected "); + TRY(tk.match(Token::RBRACKET), "Expected ']'"); + } + + TRY(tk.match(Token::ASSIGN), "Expected '='"); + TRY(parse_expr(tk, &ast->value), "Expected "); + } + + /// const-decl = "const" ident ident [ "[" expr "]" ] "=" expr + [[nodiscard]] static bool parse_const_decl(Tokenizer& tk, AstScript* root) { + if (!tk.match_kw(KW_CONST)) { + return false; + } + + auto& ast = root->constants.emplace_back(); + parse_const_decl_base(tk, &ast); + return true; + } + + /// class-decl = "class" ident "{" *(var-decl ";") "}" + [[nodiscard]] static bool parse_class_decl(Tokenizer& tk, AstScript* root) { + if (!tk.match_kw(KW_CLASS)) { + return false; + } + + auto& ast = root->classes.emplace_back(); + ast.location = tk.location(); + + TRY(parse_ident(tk, &ast.name), "Expected "); + TRY(tk.match(Token::LBRACE), "Expected '{'"); + + while (!tk.match(Token::RBRACE)) { + TRY(parse_var_decl(tk, &ast.members), "Expected "); + TRY(tk.match(Token::SEMICOLON), "Expected ';'"); + } + + return true; + } + + /// inst-decl = "instance" ident *("," ident) "(" ident ")" [stmt-block] + [[nodiscard]] static bool parse_inst_decl(Tokenizer& tk, AstScript* root) { + if (!tk.match_kw(KW_INSTANCE)) { + return false; + } + + auto location = tk.location(); + + std::vector names; + AstIdentifier base; + + do { + auto& name = names.emplace_back(); + TRY(parse_ident(tk, &name), "Expected "); + } while (tk.match(Token::COMMA)); + + TRY(tk.match(Token::LPAREN), "Expected '('"); + TRY(parse_ident(tk, &base), "Expected "); + TRY(tk.match(Token::RPAREN), "Expected ')'"); + + AstStatementBlock body; + bool has_body = parse_stmt_block(tk, &body); + + // When declaring multiple instances at once, instance bodies are not supported. + if (has_body && names.size() > 1) { + report(body.location, "Unexpected "); + } + + for (auto& name : names) { + auto& ast = root->instances.emplace_back(); + ast.location = location; + ast.name = std::move(name); + ast.base = base; + + if (has_body) { + ast.body = std::move(body); + has_body = false; + } + } + + return true; + } + + /// proto-decl = "prototype" ident "(" ident ")" stmt-block + [[nodiscard]] static bool parse_proto_decl(Tokenizer& tk, AstScript* root) { + if (!tk.match_kw(KW_PROTOTYPE)) { + return false; + } + + auto& ast = root->prototypes.emplace_back(); + ast.location = tk.location(); + + TRY(parse_ident(tk, &ast.name), "Expected "); + TRY(tk.match(Token::LPAREN), "Expected '('"); + TRY(parse_ident(tk, &ast.base), "Expected "); + TRY(tk.match(Token::RPAREN), "Expected ')'"); + TRY(parse_stmt_block(tk, &ast.body), "Expected "); + return true; + } + + /// func-decl-args = "(" [ var-decl *("," var-decl) [","] ] ")" + [[nodiscard]] static bool parse_func_decl_args(Tokenizer& tk, std::vector* root) { + if (!tk.match(Token::LPAREN)) { + return false; + } + + while (!tk.match(Token::RPAREN)) { + auto& var = root->emplace_back(); + TRY(parse_var_decl(tk, &var), "Expected in "); + + if (tk.match(Token::COMMA, Token::RPAREN)) { + // Allow trailing commas. + if (tk.last() == Token::RPAREN) break; + } else { + // Reject missing commas. + report(tk.location(), "Expected ','"); + } + } + + return true; + } + + /// func-decl = "func" ident ident func-decl-args stmt-block + [[nodiscard]] static bool parse_func_decl(Tokenizer& tk, AstScript* root) { + if (!tk.match_kw(KW_FUNC)) { + return false; + } + + auto& ast = root->functions.emplace_back(); + ast.location = tk.location(); + + TRY(parse_ident(tk, &ast.rtype), "Expected "); + TRY(parse_ident(tk, &ast.name), "Expected "); + + TRY(parse_func_decl_args(tk, &ast.args), "Expected "); + TRY(parse_stmt_block(tk, &ast.body), "Expected "); + + return true; + } + + /// extern-decl = "extern" "func" ident ident func-decl-args + [[nodiscard]] static bool parse_extern_decl(Tokenizer& tk, AstScript* root) { + if (!tk.match_kw(KW_EXTERN)) { + return false; + } + + TRY(tk.match_kw(KW_FUNC), "Expected 'func'"); + + auto& ast = root->externals.emplace_back(); + ast.location = tk.location(); + + TRY(parse_ident(tk, &ast.rtype), "Expected "); + TRY(parse_ident(tk, &ast.name), "Expected "); + + TRY(parse_func_decl_args(tk, &ast.args), "Expected "); + return true; + } + + /// script-decl-node = const-decl / var-decl / class-decl / proto-decl / inst-decl / func-decl / extern-decl + [[nodiscard]] static bool parse_script_decl_node(Tokenizer& tk, AstScript* ast) { + return parse_const_decl(tk, ast) || parse_var_decl(tk, ast) || parse_class_decl(tk, ast) || + parse_proto_decl(tk, ast) || parse_inst_decl(tk, ast) || parse_func_decl(tk, ast) || + parse_extern_decl(tk, ast); + } + + /// script-decl = *(script-decl-node ";") + void parse_script_decl(Tokenizer& tk, AstScript* ast) { + do { + if (tk.match(Token::END_OF_FILE)) { + break; + } + + TRY(parse_script_decl_node(tk, ast), "Expected "); + TRY(tk.match(Token::SEMICOLON), "Expected ';'"); + } while (true); + } + + void parse_script(AstScript* ast, std::filesystem::path const& source) { + Tokenizer tk {std::make_shared(source)}; + parse_script_decl(tk, ast); + } + + void parse_script(AstScript* ast, std::unique_ptr source) { + Tokenizer tk {std::make_shared(std::move(source))}; + parse_script_decl(tk, ast); + } + + void parse_script(AstScript* ast, CompilationSource source) { + Tokenizer tk {std::make_shared(std::move(source))}; + parse_script_decl(tk, ast); + } + + void parse_script(AstScript* ast, std::shared_ptr source) { + Tokenizer tk {std::move(source)}; + parse_script_decl(tk, ast); + } +} // namespace zenkit::daedalus diff --git a/src/daedalus/Tokenizer.cc b/src/daedalus/Tokenizer.cc new file mode 100644 index 00000000..85c8cd4d --- /dev/null +++ b/src/daedalus/Tokenizer.cc @@ -0,0 +1,353 @@ +// Copyright © 2023 GothicKit Contributors. +// SPDX-License-Identifier: MIT +#include "Tokenizer.hh" + +#include "zenkit/Misc.hh" + +#include + +namespace zenkit::daedalus { + static std::unordered_set KEYWORDS = { + "CONST", + "VAR", + "CLASS", + "INSTANCE", + "PROTOTYPE", + "IF", + "ELSE", + "RETURN", + "EXTERN", + }; + + Tokenizer::Tokenizer(std::shared_ptr src) : _m_src(std::move(src)) {} + + Token Tokenizer::last() const noexcept { + return _m_token; + } + + std::string const& Tokenizer::get_value() const noexcept { + return _m_value; + } + + int Tokenizer::get_value_int() const { + return std::stoi(_m_value); + } + + float Tokenizer::get_value_float() const { + return std::stof(_m_value); + } + + Token Tokenizer::next() { + _m_value.clear(); + + auto tt = this->next_internal(); + _m_token = tt; + _m_tokens_parsed += 1; + + return tt; + } + + Token Tokenizer::next_internal() { + while (!_m_src->read->eof()) { + this->mark(); + auto ntk = this->get(); + + // Skip whitespace. + if (std::isspace(ntk)) { + continue; + } + + // Skip comments. + if (ntk == '/') { + ntk = this->get(); + + // :: Line comments. + if (ntk == '/') { + while (ntk != '\n' && !_m_src->read->eof()) { + ntk = this->get(); + } + + continue; + } + + // :: Block comments. + if (ntk == '*') { + bool check_eoc = false; + while (!_m_src->read->eof()) { + ntk = this->get(); + + if (ntk == '*') { + check_eoc = true; + } else if (ntk == '/' && check_eoc) { + break; + } else { + check_eoc = false; + } + } + + continue; + } + + // :: Compound assignment. + if (ntk == '=') { + return Token::ASSIGN_DIVIDE; + } else if (!std::isspace(ntk)) { + this->unget(); + } + + return Token::DIVIDE; + } + + // Report strings. + if (ntk == '"') { + while (!_m_src->read->eof()) { + ntk = this->get(); + + if (ntk == '"') { + break; + } + + _m_value.push_back(ntk); + } + + return Token::STRING; + } + + // Report numbers. + if (std::isdigit(ntk)) { + auto tt = Token::INTEGER; + + do { + _m_value.push_back(ntk); + + ntk = this->get(); + if (tt == Token::INTEGER && ntk == '.') { + tt = Token::FLOAT; + ntk = this->get(); + } + } while (std::isdigit(ntk) && !_m_src->read->eof()); + + // :: Backtrack one if the next char is not a space. + if (!std::isspace(ntk)) { + this->unget(); + } + + return tt; + } + + // Report identifiers. + if (std::isalpha(ntk) || ntk == '_') { + auto tt = Token::IDENTIFIER; + + do { + _m_value.push_back(ntk); + ntk = this->get(); + } while (!std::isspace(ntk) && (!std::ispunct(ntk) || ntk == '_') && !_m_src->read->eof()); + + // :: Backtrack one if the next char is not a space. + if (!std::isspace(ntk)) { + this->unget(); + } + + std::transform(_m_value.begin(), _m_value.end(), _m_value.begin(), [](char c) { + return (char) std::toupper(c); + }); + + auto it = KEYWORDS.find(_m_value); + if (it != KEYWORDS.end()) { + return Token::KEYWORD; + } + + return tt; + } + + // Report 2-char tokens. + if (ntk == '<') { + ntk = this->get(); + + if (ntk == '<') { + return Token::LEFT_SHIFT; + } else if (ntk == '=') { + return Token::LESS_THAN_OR_EQUAL; + } else if (!std::isspace(ntk)) { + this->unget(); + } + return Token::LESS_THAN; + } + + if (ntk == '>') { + ntk = this->get(); + + if (ntk == '>') { + return Token::RIGHT_SHIFT; + } else if (ntk == '=') { + return Token::GREATER_THAN_OR_EQUAL; + } else if (!std::isspace(ntk)) { + this->unget(); + } + return Token::GREATER_THAN; + } + + if (ntk == '=') { + ntk = this->get(); + + if (ntk == '=') { + return Token::EQUAL; + } else if (!std::isspace(ntk)) { + this->unget(); + } + return Token::ASSIGN; + } + + if (ntk == '+') { + ntk = this->get(); + + if (ntk == '=') { + return Token::ASSIGN_ADD; + } else if (!std::isspace(ntk)) { + this->unget(); + } + return Token::PLUS; + } + + if (ntk == '-') { + ntk = this->get(); + + if (ntk == '=') { + return Token::ASSIGN_SUBTRACT; + } else if (!std::isspace(ntk)) { + this->unget(); + } + return Token::MINUS; + } + + if (ntk == '*') { + ntk = this->get(); + + if (ntk == '=') { + return Token::ASSIGN_MULTIPLY; + } else if (!std::isspace(ntk)) { + this->unget(); + } + return Token::MULTIPLY; + } + + if (ntk == '|') { + ntk = this->get(); + + if (ntk == '|') { + return Token::LOGICAL_OR; + } else if (!std::isspace(ntk)) { + this->unget(); + } + return Token::BITWISE_OR; + } + + if (ntk == '&') { + ntk = this->get(); + + if (ntk == '&') { + return Token::LOGICAL_AND; + } else if (!std::isspace(ntk)) { + this->unget(); + } + return Token::BITWISE_AND; + } + + if (ntk == '!') { + ntk = this->get(); + + if (ntk == '=') { + return Token::NOT_EQUAL; + } else if (!std::isspace(ntk)) { + this->unget(); + } + return Token::NOT; + } + + switch (ntk) { + case '%': + return Token::MODULO; + case '~': + return Token::COMPLEMENT; + case ';': + return Token::SEMICOLON; + case ',': + return Token::COMMA; + case '(': + return Token::LPAREN; + case ')': + return Token::RPAREN; + case '{': + return Token::LBRACE; + case '}': + return Token::RBRACE; + case '[': + return Token::LBRACKET; + case ']': + return Token::RBRACKET; + case '.': + return Token::DOT; + default: + return Token::UNKNOWN; + } + } + + return Token::END_OF_FILE; + } + + char Tokenizer::get() { + auto c = _m_src->read->read_char(); + + if (c == '\n') { + _m_line += 1; + _m_column = 1; + } else { + _m_column += 1; + } + + return c; + } + + void Tokenizer::mark() { + _m_bt_pos = _m_src->read->tell(); + _m_bt_column = _m_column; + _m_bt_line = _m_line; + } + + void Tokenizer::backtrack() { + _m_src->read->seek(static_cast(_m_bt_pos), Whence::BEG); + _m_column = _m_bt_column; + _m_line = _m_bt_line; + _m_tokens_parsed -= 1; + } + + void Tokenizer::unget() { + _m_src->read->seek(-1, Whence::CUR); + _m_column -= 1; + } + + bool Tokenizer::match_kw(std::string_view kw) { + auto tt = next(); + + if (tt != Token::IDENTIFIER && tt != Token::KEYWORD) { + this->backtrack(); + return false; + } + + if (!zenkit::iequals(kw, this->get_value())) { + this->backtrack(); + return false; + } + + return true; + } + + SourceLocation Tokenizer::location() const noexcept { + return SourceLocation {_m_src, _m_bt_line, _m_bt_column + 1}; + } + + std::size_t Tokenizer::get_tokens_parsed() const { + return _m_tokens_parsed; + } +} // namespace zenkit::daedalus diff --git a/src/daedalus/Tokenizer.hh b/src/daedalus/Tokenizer.hh new file mode 100644 index 00000000..b3f80eac --- /dev/null +++ b/src/daedalus/Tokenizer.hh @@ -0,0 +1,108 @@ +// Copyright © 2023 GothicKit Contributors. +// SPDX-License-Identifier: MIT +#pragma once +#include "zenkit/Stream.hh" +#include "zenkit/daedalus/Compiler.hh" +#include "zenkit/daedalus/SyntaxTree.hh" + +#include +#include + +namespace zenkit::daedalus { + enum class Token { + IDENTIFIER = 0, + KEYWORD, + INTEGER, + FLOAT, + STRING, + + DOT, + PLUS, + MINUS, + MULTIPLY, + DIVIDE, + MODULO, + NOT, + COMPLEMENT, + LESS_THAN, + GREATER_THAN, + LESS_THAN_OR_EQUAL, + GREATER_THAN_OR_EQUAL, + EQUAL, + NOT_EQUAL, + LOGICAL_OR, + LOGICAL_AND, + BITWISE_OR, + BITWISE_AND, + LEFT_SHIFT, + RIGHT_SHIFT, + ASSIGN, + ASSIGN_ADD, + ASSIGN_SUBTRACT, + ASSIGN_MULTIPLY, + ASSIGN_DIVIDE, + + SEMICOLON, + COMMA, + LPAREN, + RPAREN, + LBRACE, + RBRACE, + LBRACKET, + RBRACKET, + + UNKNOWN, + END_OF_FILE + }; + + class ZKINT Tokenizer { + public: + explicit Tokenizer(std::shared_ptr src); + + [[nodiscard]] Token last() const noexcept; + + template + bool match(T... tts) { + auto tt = next(); + + bool any = ((tt == tts) || ...); + if (!any) { + this->backtrack(); + return false; + } + + return true; + } + + bool match_kw(std::string_view kw); + + [[nodiscard]] std::string const& get_value() const noexcept; + [[nodiscard]] int get_value_int() const; + [[nodiscard]] float get_value_float() const; + [[nodiscard]] std::size_t get_tokens_parsed() const; + + [[nodiscard]] SourceLocation location() const noexcept; + + private: + Token next(); + Token next_internal(); + + void backtrack(); + + char get(); + void unget(); + void mark(); + + private: + std::shared_ptr _m_src; + + std::string _m_value; + Token _m_token {Token::UNKNOWN}; + std::size_t _m_tokens_parsed {0}; + + std::size_t _m_bt_pos {0}; + std::size_t _m_bt_line {0}, _m_bt_column {0}; + + std::size_t _m_line = 1, _m_column = 1; + }; +} // namespace zenkit::daedalus diff --git a/src/daedalus/TypeCheck.cc b/src/daedalus/TypeCheck.cc new file mode 100644 index 00000000..124c6e74 --- /dev/null +++ b/src/daedalus/TypeCheck.cc @@ -0,0 +1,620 @@ +// Copyright © 2023 GothicKit Contributors. +// SPDX-License-Identifier: MIT +#include "TypeCheck.hh" + +namespace zenkit::daedalus { + [[noreturn]] static void fail_mismatch(AstNode const* ast, Type expected, Type actual) { + throw TypeError {ast->location, + "Type mismatch: expected " + expected.describe() + ", got " + actual.describe()}; + } + + [[noreturn]] static void fail_unresolved(AstQualifiedIdentifier const& new_) { + if (new_.element.has_value()) { + throw TypeError {new_.location, "Unresolved reference: " + new_.base.value + "." + new_.element->value}; + } else { + throw TypeError {new_.location, "Unresolved reference: " + new_.base.value}; + } + } + + [[noreturn]] static void fail_invalid(SourceLocation const& new_) { + // TODO(lmichaelis): Better error messages + throw TypeError {new_, "Invalid reference"}; + } + + [[noreturn]] static void fail_invalid(AstQualifiedIdentifier const& new_) { + if (new_.element.has_value()) { + throw TypeError {new_.location, "Invalid reference: " + new_.base.value + "." + new_.element->value}; + } else { + throw TypeError {new_.location, "Invalid reference: " + new_.base.value}; + } + } + + [[noreturn]] static void fail_expr(AstExpression const& new_) { + throw TypeError {new_.location, "Not a constant expression"}; + } + + class Locals { + public: + void push(std::vector* vars) { + _m_vars.push_back(vars); + } + + Symbol* get_var(AstQualifiedIdentifier const& name) { + Symbol* base = nullptr; + + for (long i = (long) _m_vars.size() - 1; i >= 0; --i) { + if (_m_vars[i] != nullptr) { + for (auto& var : *_m_vars[i]) { + if (var.get_name() == name.base.value) { + base = &var; + } + } + } + } + + if (!name.element.has_value() || base == nullptr) { + return base; + } + + if (base->get_kind() == SymbolKind::CONST) { + return nullptr; + } + + auto type = ((VarSymbol*) base)->get_type(); + if (type.is_primitive()) { + return nullptr; + } + + return type.get_complex_type()->get_member(*name.element); + } + + private: + std::vector*> _m_vars; + }; + + [[nodiscard]] static Type check_expr(TypeStore* db, AstExpression* ast, Locals&); + [[nodiscard]] static std::optional eval_expr_int(TypeStore* db, AstExpression* ast); + + std::optional eval_expr_ref_int(TypeStore* db, AstExpressionReference* ast) { + auto def = db->get(ast->name); + if (def->get_kind() != SymbolKind::CONST) { + return std::nullopt; + } + + auto const_ = (ConstSymbol*) def; + + if (ast->index) { + auto index_value = eval_expr_int(db, ast->index->get()); + if (!index_value) { + return std::nullopt; + } + + if (const_->get_size() == 0) { + // TODO: Eval size + } + + if (index_value >= const_->get_size()) { + // TODO: better error messages + fail_invalid(ast->location); + } + + return eval_expr_int( + db, + ((AstLiteralArray*) const_->get_ast().value()->value.get())->value[*index_value].get()); + } + + return eval_expr_int(db, const_->get_ast().value()->value.get()); + } + + std::optional eval_expr_binary_int(TypeStore* db, AstExpressionBinary* ast) { + auto lhs = eval_expr_int(db, ast->lhs.get()); + auto rhs = eval_expr_int(db, ast->rhs.get()); + + if (!lhs || !rhs) { + return std::nullopt; + } + + switch (ast->type) { + case AstBinaryOp::LOGICAL_AND: + return *lhs && *rhs; + case AstBinaryOp::LOGICAL_OR: + return *lhs || *rhs; + case AstBinaryOp::BITWISE_AND: + return *lhs & *rhs; + case AstBinaryOp::BITWISE_OR: + return *lhs | *rhs; + case AstBinaryOp::EQUAL: + return *lhs == *rhs; + case AstBinaryOp::NOT_EQUAL: + return *lhs != *rhs; + case AstBinaryOp::LESS_THAN: + return *lhs < *rhs; + case AstBinaryOp::LESS_THAN_OR_EQUAL: + return *lhs <= *rhs; + case AstBinaryOp::GREATER_THAN: + return *lhs > *rhs; + case AstBinaryOp::GREATER_THAN_OR_EQUAL: + return *lhs >= *rhs; + case AstBinaryOp::RIGHT_SHIFT: + return *lhs >> *rhs; + case AstBinaryOp::LEFT_SHIFT: + return *lhs << *rhs; + case AstBinaryOp::ADD: + return *lhs + *rhs; + case AstBinaryOp::SUBTRACT: + return *lhs - *rhs; + case AstBinaryOp::MULTIPLY: + return *lhs * *rhs; + case AstBinaryOp::DIVIDE: + return *lhs / *rhs; + case AstBinaryOp::MODULO: + return *lhs % *rhs; + default: + return std::nullopt; + } + } + + std::optional eval_expr_unary_int(TypeStore* db, AstExpressionUnary* ast) { + auto rhs = eval_expr_int(db, ast->rhs.get()); + + if (!rhs) { + return std::nullopt; + } + + switch (ast->type) { + case AstUnaryOp::UNARY_NOT: + return !*rhs; + case AstUnaryOp::UNARY_PLUS: + return +*rhs; + case AstUnaryOp::UNARY_MINUS: + return -*rhs; + case AstUnaryOp::UNARY_COMPLEMENT: + return ~*rhs; + } + } + + std::optional eval_expr_int(TypeStore* db, AstExpression* ast) { + switch (ast->kind()) { + case AstNodeKind::LITERAL_INT: + return ((AstLiteralInteger*) ast)->value; + case AstNodeKind::EXPR_REFERENCE: + return eval_expr_ref_int(db, (AstExpressionReference*) ast); + case AstNodeKind::EXPR_BINARY: + return eval_expr_binary_int(db, (AstExpressionBinary*) ast); + case AstNodeKind::EXPR_UNARY: + return eval_expr_unary_int(db, (AstExpressionUnary*) ast); + default: + return std::nullopt; + } + } + + [[nodiscard]] static Type check_literal_array(TypeStore* db, AstLiteralArray* ast, Locals& locals) { + auto type = check_expr(db, ast->value[0].get(), locals); + + // Check that every other type is convertible to the initial one. + for (auto i = 1u; i < ast->value.size(); ++i) { + auto next = check_expr(db, ast->value[i].get(), locals); + if (!next.is_convertible(type)) { + fail_mismatch(ast->value[i].get(), type, next); + } + } + + return type.to_array(); + } + + [[nodiscard]] static Type check_expr_ref(TypeStore* db, AstExpressionReference* ast, Locals& locals) { + auto def = locals.get_var(ast->name); + + if (def == nullptr) def = db->get(ast->name); + if (def == nullptr) fail_unresolved(ast->name); + + // Check the index type. + if (ast->index) { + auto type_index = check_expr(db, ast->index->get(), locals); + if (!type_index.is_convertible(Type::INT)) { + fail_mismatch(ast->index->get(), Type::INT, type_index); + } + } + + if (def->get_kind() == SymbolKind::VAR) { + auto* var = (VarSymbol*) def; + auto var_type = var->get_type(); + + if (ast->index.has_value() && !var_type.is_array()) { + // TODO(lmichaelis): better error messages + fail_invalid(ast->name); + } + + // Check if the index fits (if it's a const expr) + if (ast->index) { + auto array_size = eval_expr_int(db, (*var->get_ast())->size->get()); + auto array_idx = eval_expr_int(db, ast->index->get()); + + if (!array_size.has_value()) { + fail_expr(**(*var->get_ast())->size); + } + + if (array_idx.has_value() && *array_idx >= *array_size) { + // TODO: better error messages + fail_invalid((*ast->index)->location); + } + } + + // TODO(lmichaelis): Do we have to allow raw arrays here? + return var_type.to_elemental(); + } + + if (def->get_kind() == SymbolKind::CONST) { + auto* var = (ConstSymbol*) def; + auto var_type = var->get_type(); + + // Check that the types match. + if (var_type.is_array() != ast->index.has_value()) { + // TODO(lmichaelis): better error messages + fail_invalid(ast->name); + } + + // Check if the index fits (if it's a const expr) + if (ast->index) { + auto array_size = eval_expr_int(db, (*var->get_ast())->size->get()); + auto array_idx = eval_expr_int(db, ast->index->get()); + + if (!array_size.has_value()) { + fail_expr(**(*var->get_ast())->size); + } + + if (array_idx.has_value() && *array_idx >= *array_size) { + // TODO: better error messages + fail_invalid((*ast->index)->location); + } + } + + // TODO(lmichaelis): Do we have to allow raw arrays here? + return var_type.to_elemental(); + } + + // Instances and funcs cannot be indexed. + if (ast->index) { + // TODO(lmichaelis): better error messages + fail_invalid(ast->name); + } + +#ifndef ZK_DAEDALUS_STRICT + if (def->get_kind() == SymbolKind::PROTOTYPE) { + return Type {((PrototypeSymbol*) def)->get_base()}; + } +#endif + + if (def->get_kind() == SymbolKind::INSTANCE) { + return Type {((InstanceSymbol*) def)->get_base_class()}; + } + + if (def->get_kind() == SymbolKind::FUNC) { + return Type::FUNC; + } + + // TODO(lmichaelis): better error messages + fail_invalid(ast->name); + } + + [[nodiscard]] static Type check_expr_call(TypeStore* db, AstExpressionCall* ast, Locals& locals) { + auto def = db->get(ast->target); + if (def == nullptr) { + fail_unresolved(ast->target); + } + + auto rtype = Type::VOID; + std::vector* args; + + if (def->get_kind() == SymbolKind::FUNC) { + auto* func = (FunctionSymbol*) def; + rtype = func->get_return_type(); + args = &func->get_arguments(); + } else if (def->get_kind() == SymbolKind::EXTERN) { + auto* extern_ = (ExternSymbol*) def; + rtype = extern_->get_return_type(); + args = &extern_->get_arguments(); + } else { + // TODO(lmichaelis): better error messages + fail_invalid(ast->target); + } + + // Check that we have the correct number of args. + if (args->size() != ast->args.size()) { + // TODO(lmichaelis): better error messages + fail_invalid(ast->target); + } + + // Check each argument's type + for (auto i = 0u; i < args->size(); ++i) { + auto* arg = ast->args[i].get(); + auto arg_type = check_expr(db, arg, locals); + auto var_type = args->at(i).get_type(); + + if (!arg_type.is_convertible(var_type)) { + fail_mismatch(arg, var_type, arg_type); + } + } + + return rtype; + } + + [[nodiscard]] static Type check_expr_binary(TypeStore* db, AstExpressionBinary* ast, Locals& locals) { + auto lhs = check_expr(db, ast->lhs.get(), locals); + auto rhs = check_expr(db, ast->rhs.get(), locals); + + switch (ast->type) { + case AstBinaryOp::ASSIGN: + if (ast->lhs->kind() != AstNodeKind::EXPR_REFERENCE) { + // TODO(lmichaelis): better error messages + fail_invalid(ast->lhs->location); + } + + if (!rhs.is_convertible(lhs)) { + fail_mismatch(ast->rhs.get(), lhs, rhs); + } + + return Type::VOID; + case AstBinaryOp::ASSIGN_ADD: + case AstBinaryOp::ASSIGN_SUBTRACT: + case AstBinaryOp::ASSIGN_MULTIPLY: + case AstBinaryOp::ASSIGN_DIVIDE: + if (ast->lhs->kind() != AstNodeKind::EXPR_REFERENCE) { + // TODO(lmichaelis): better error messages + fail_invalid(ast->lhs->location); + } + + if (rhs != Type::INT) { + fail_mismatch(ast->rhs.get(), Type::INT, rhs); + } + + return Type::VOID; + case AstBinaryOp::EQUAL: + case AstBinaryOp::NOT_EQUAL: + case AstBinaryOp::LESS_THAN: + case AstBinaryOp::GREATER_THAN: + case AstBinaryOp::GREATER_THAN_OR_EQUAL: + case AstBinaryOp::LESS_THAN_OR_EQUAL: + if (!rhs.is_convertible(Type::INT)) { + fail_mismatch(ast->rhs.get(), Type::INT, rhs); + } + + if (!lhs.is_convertible(Type::INT)) { + fail_mismatch(ast->lhs.get(), Type::INT, lhs); + } + + return Type::INT; + default: + if (lhs != Type::INT) { + fail_mismatch(ast->lhs.get(), Type::INT, lhs); + } + + if (rhs != Type::INT) { + fail_mismatch(ast->rhs.get(), Type::INT, rhs); + } + + return Type::INT; + } + } + + [[nodiscard]] static Type check_expr_unary(TypeStore* db, AstExpressionUnary* ast, Locals& locals) { + auto rhs = check_expr(db, ast->rhs.get(), locals); + + // TODO: ~ and ! are not supported for FLOATs + if (rhs != Type::INT && rhs != Type::FLOAT) { + fail_mismatch(ast->rhs.get(), Type::INT, rhs); + } + + return Type::INT; + } + + [[nodiscard]] static Type check_expr(TypeStore* db, AstExpression* ast, Locals& locals) { + switch (ast->kind()) { + case AstNodeKind::LITERAL_INT: + return Type::INT; + case AstNodeKind::LITERAL_FLOAT: + return Type::FLOAT; + case AstNodeKind::LITERAL_STRING: + return Type::STRING; + case AstNodeKind::LITERAL_ARRAY: + return check_literal_array(db, (AstLiteralArray*) ast, locals); + case AstNodeKind::EXPR_REFERENCE: + return check_expr_ref(db, (AstExpressionReference*) ast, locals); + case AstNodeKind::EXPR_CALL: + return check_expr_call(db, (AstExpressionCall*) ast, locals); + case AstNodeKind::EXPR_BINARY: + return check_expr_binary(db, (AstExpressionBinary*) ast, locals); + case AstNodeKind::EXPR_UNARY: + return check_expr_unary(db, (AstExpressionUnary*) ast, locals); + default: + break; + } + + throw Error {"Internal compiler error: non-expr passed to `check_expr` during type checking!"}; + } + + [[nodiscard]] static Type check_expr(TypeStore* db, AstExpression* ast) { + Locals locals; + return check_expr(db, ast, locals); + } + + static void check_var(TypeStore* db, VarSymbol* def) { + auto* ast = def->get_ast().value_or(nullptr); + if (ast == nullptr) return; + + // Check the array size. + if (ast->size) { + // Check the array type. + auto type = check_expr(db, ast->size->get()); + if (!type.is_convertible(Type::INT)) { + fail_mismatch(ast->size->get(), Type::INT, type); + } + + // Calculate the array size. + auto size = eval_expr_int(db, ast->size->get()); + if (!size.has_value()) { + fail_expr(**ast->size); + } + + def->set_size(*size); + } + } + + static void check_const(TypeStore* db, ConstSymbol* def) { + auto* ast = def->get_ast().value_or(nullptr); + if (ast == nullptr) return; + + // Check the value type. + auto type = check_expr(db, ast->value.get()); + if (!type.is_convertible(def->get_type())) { + fail_mismatch(ast->value.get(), def->get_type(), type); + } + + // Check the array size. + if (ast->size) { + // Check the array type. + type = check_expr(db, ast->size->get()); + if (!type.is_convertible(Type::INT)) { + fail_mismatch(ast->size->get(), Type::INT, type); + } + + // Calculate the array size. + auto size = eval_expr_int(db, ast->size->get()); + if (!size.has_value()) { + fail_expr(**ast->size); + } + + def->set_size(*size); + } + + // TODO(lmichaelis): Evaluate value expression + } + + static void check_class(TypeStore* db, ClassSymbol* def) { + // Check each class member. + for (auto& member : def->get_members()) { + check_var(db, &member); + } + } + + static void check_stmt_block(TypeStore* db, AstStatementBlock* ast, Locals& locals, Type rtype) { + for (auto& stmt : ast->statements) { + switch (stmt->kind()) { + case AstNodeKind::STMT_CONDITION: { + auto if_ = (AstStatementCondition*) stmt.get(); + auto if_cond = check_expr(db, if_->condition.get(), locals); + + if (!if_cond.is_convertible(Type::INT)) { + fail_mismatch(if_->condition.get(), Type::INT, if_cond); + } + + check_stmt_block(db, &if_->body, locals, rtype); + + for (auto& elif : if_->else_if) { + if_cond = check_expr(db, elif.condition.get(), locals); + if (!if_cond.is_convertible(Type::INT)) { + fail_mismatch(elif.condition.get(), Type::INT, if_cond); + } + + check_stmt_block(db, &elif.body, locals, rtype); + } + + if (if_->else_body.has_value()) { + check_stmt_block(db, &*if_->else_body, locals, rtype); + } + + break; + } + case AstNodeKind::STMT_EXPRESSION: { + auto type = check_expr(db, ((AstStatementExpression*) stmt.get())->expr.get(), locals); + if (type != Type::VOID) { + // TODO: WARN + } + break; + } + case AstNodeKind::STMT_RETURN: { + auto ret = (AstStatementReturn*) stmt.get(); + auto expr = Type::VOID; + + if (ret->value) { + expr = check_expr(db, ret->value->get(), locals); + } + + if (!expr.is_convertible(rtype)) { + fail_mismatch(ret, rtype, expr); + } + + break; + } + default: + break; + } + } + } + + static void check_prototype(TypeStore* db, PrototypeSymbol* def) { + auto* ast = def->get_ast().value_or(nullptr); + if (ast == nullptr) return; + + auto* base = def->get_base(); + + Locals locals; + locals.push(&base->get_members()); + + check_stmt_block(db, &ast->body, locals, Type::VOID); + } + + static void check_instance(TypeStore* db, InstanceSymbol* def) { + auto* ast = def->get_ast().value_or(nullptr); + if (ast == nullptr) return; + if (!ast->body) return; + + auto* base = def->get_base_class(); + + Locals locals; + +#ifndef ZK_DAEDALUS_STRICT + // `slf.` is permitted + std::vector orig_fixes; + orig_fixes.emplace_back("SLF", Type {base}); + locals.push(&orig_fixes); +#endif + + locals.push(&base->get_members()); + + check_stmt_block(db, &*ast->body, locals, Type::VOID); + } + + static void check_func(TypeStore* db, FunctionSymbol* def) { + auto* ast = def->get_ast().value_or(nullptr); + if (ast == nullptr) return; + + Locals locals; + locals.push(&def->get_arguments()); + locals.push(&def->get_scope().get_local_vars()); + + check_stmt_block(db, &ast->body, locals, def->get_return_type()); + } + + void check_types(TypeStore* db) { + db->each([db](Symbol* def) { + if (def->get_kind() == SymbolKind::CONST) { + check_const(db, (ConstSymbol*) def); + } else if (def->get_kind() == SymbolKind::VAR) { + check_var(db, (VarSymbol*) def); + } else if (def->get_kind() == SymbolKind::CLASS) { + check_class(db, (ClassSymbol*) def); + } + }); + + db->each([db](Symbol* def) { + if (def->get_kind() == SymbolKind::FUNC) { + check_func(db, (FunctionSymbol*) def); + } else if (def->get_kind() == SymbolKind::PROTOTYPE) { + check_prototype(db, (PrototypeSymbol*) def); + } else if (def->get_kind() == SymbolKind::INSTANCE) { + check_instance(db, (InstanceSymbol*) def); + } + }); + } +} // namespace zenkit::daedalus diff --git a/src/daedalus/TypeCheck.hh b/src/daedalus/TypeCheck.hh new file mode 100644 index 00000000..6ad7bc09 --- /dev/null +++ b/src/daedalus/TypeCheck.hh @@ -0,0 +1,8 @@ +// Copyright © 2023 GothicKit Contributors. +// SPDX-License-Identifier: MIT +#pragma once +#include "TypeStore.hh" + +namespace zenkit::daedalus { + ZKINT void check_types(TypeStore* db); +} diff --git a/src/daedalus/TypeStore.cc b/src/daedalus/TypeStore.cc new file mode 100644 index 00000000..a8dbde14 --- /dev/null +++ b/src/daedalus/TypeStore.cc @@ -0,0 +1,294 @@ +// Copyright © 2023 GothicKit Contributors. +// SPDX-License-Identifier: MIT +#include "TypeStore.hh" + +#include "zenkit/Misc.hh" + +namespace zenkit::daedalus { + static std::string_view TYPE_INT = "int"; + static std::string_view TYPE_STRING = "string"; + static std::string_view TYPE_FLOAT = "float"; + static std::string_view TYPE_FUNC = "func"; + static std::string_view TYPE_VOID = "void"; + + [[noreturn]] static void fail_duplicate(SourceLocation const& loc, std::string const& name) { + throw TypeError {loc, "Duplicate definition: " + name}; + } + + [[noreturn]] static void fail_unresolved(AstIdentifier const& new_) { + throw TypeError {new_.location, "Unresolved reference: " + new_.value}; + } + + [[noreturn]] static void fail_invalid(AstIdentifier const& new_) { + throw TypeError {new_.location, "Invalid type-id: " + new_.value}; + } + + static AstNode* get_ast(Symbol* sym) { + switch (sym->get_kind()) { + case SymbolKind::VAR: + return ((VarSymbol*) sym)->get_ast().value_or(nullptr); + case SymbolKind::CONST: + return ((ConstSymbol*) sym)->get_ast().value_or(nullptr); + case SymbolKind::CLASS: + return ((ClassSymbol*) sym)->get_ast().value_or(nullptr); + case SymbolKind::PROTOTYPE: + return ((PrototypeSymbol*) sym)->get_ast().value_or(nullptr); + case SymbolKind::INSTANCE: + return ((InstanceSymbol*) sym)->get_ast().value_or(nullptr); + case SymbolKind::FUNC: + return ((FunctionSymbol*) sym)->get_ast().value_or(nullptr); + case SymbolKind::EXTERN: + return ((ExternSymbol*) sym)->get_ast().value_or(nullptr); + } + } + + static void parse_scope(TypeStore* db, Scope& scope, AstStatementBlock* ast) { + for (auto& local : ast->locals) { + auto name = local.name.value; + auto type = db->type(local.type, local.size.has_value()); + + if (type.is_void()) { + fail_invalid(local.type); + } + + scope.add_local_var(std::move(name), type, std::move(local)); + } + + for (auto& stmt : ast->statements) { + if (stmt->kind() != AstNodeKind::STMT_CONDITION) continue; + + auto if_ = (AstStatementCondition*) stmt.get(); + parse_scope(db, scope, &if_->body); + + for (auto& elif : if_->else_if) { + parse_scope(db, scope, &elif.body); + } + + if (if_->else_body) { + parse_scope(db, scope, &*if_->else_body); + } + } + + ast->locals.clear(); + } + + void TypeStore::add_script(AstScript ast) { + for (auto& node : ast.classes) + this->add_class(std::move(node)); + for (auto& node : ast.prototypes) + this->add_prototype(std::move(node)); + for (auto& node : ast.instances) + this->add_instance(std::move(node)); + for (auto& node : ast.functions) + this->add_function(std::move(node)); + for (auto& node : ast.externals) + this->add_external(std::move(node)); + for (auto& node : ast.constants) + this->add_constant(std::move(node)); + for (auto& node : ast.variables) + this->add_variable(std::move(node)); + } + + void TypeStore::add_class(AstClass ast) { + auto new_members = std::move(ast.members); + auto new_name = ast.name.value; + auto new_ = std::make_unique(std::move(new_name), std::move(ast)); + + for (auto& member : new_members) { + auto def = new_->get_member(member.name); + if (def != nullptr) { + fail_duplicate(member.name.location, member.name.value); + } + + auto name = member.name.value; + auto type = this->type(member.type, member.size.has_value()); + + if (type.is_void()) { + fail_invalid(member.type); + } + + new_->add_member(std::move(name), type, std::move(member)); + } + + this->add(std::move(new_)); + } + + void TypeStore::add_prototype(AstPrototype ast) { + auto* base = this->get(ast.base); + if (base == nullptr) { + fail_unresolved(ast.base); + } else if (base->get_kind() != SymbolKind::CLASS) { + fail_invalid(ast.base); + } + + auto new_name = ast.name.value; + auto new_ = std::make_unique(std::move(new_name), (ClassSymbol*) base, std::move(ast)); + + parse_scope(this, new_->get_scope(), &new_->get_ast().value()->body); + this->add(std::move(new_)); + } + + void TypeStore::add_instance(AstInstance ast) { + auto* base = this->get(ast.base); + if (base == nullptr) { + fail_unresolved(ast.base); + } else if (base->get_kind() != SymbolKind::CLASS && base->get_kind() != SymbolKind::PROTOTYPE) { + fail_invalid(ast.base); + } + + auto new_name = ast.name.value; + auto new_ = std::make_unique(std::move(new_name), base, std::move(ast)); + + if ((*new_->get_ast())->body) { + parse_scope(this, new_->get_scope(), &*new_->get_ast().value()->body); + } + + this->add(std::move(new_)); + } + + void TypeStore::add_function(AstFunction ast) { + auto new_args = std::move(ast.args); + auto new_type = this->type(ast.rtype, false); + auto new_name = ast.name.value; + auto new_ = std::make_unique(std::move(new_name), new_type, std::move(ast)); + + for (auto& arg : new_args) { + auto def = new_->get_argument(arg.name); + if (def != nullptr) { + fail_duplicate(arg.name.location, arg.name.value); + } + + auto name = arg.name.value; + auto type = this->type(arg.type, false); + + if (arg.size.has_value() || type.is_void()) { + fail_invalid(arg.type); + } + + new_->add_argument(std::move(name), type, std::move(arg)); + } + + parse_scope(this, new_->get_scope(), &new_->get_ast().value()->body); + this->add(std::move(new_)); + } + + void TypeStore::add_external(AstExternal ast) { + auto new_args = std::move(ast.args); + auto new_type = this->type(ast.rtype, false); + auto new_name = ast.name.value; + auto new_ = std::make_unique(std::move(new_name), new_type, std::move(ast)); + + for (auto& arg : new_args) { + auto def = new_->get_argument(arg.name); + if (def != nullptr) { + fail_duplicate(arg.name.location, arg.name.value); + } + + auto name = arg.name.value; + auto type = this->type(arg.type, false); + + if (arg.size.has_value() || type.is_void()) { + fail_invalid(arg.type); + } + + new_->add_argument(std::move(name), type, std::move(arg)); + } + + this->add(std::move(new_)); + } + + void TypeStore::add_constant(AstConstant ast) { + auto name = ast.name.value; + auto type = this->type(ast.type, ast.size.has_value()); + + if (!type.is_primitive() || type.is_void()) { + fail_invalid(ast.type); + } + + this->add(std::make_unique(std::move(name), type, std::move(ast))); + } + + void TypeStore::add_variable(AstVariable ast) { + auto name = ast.name.value; + auto type = this->type(ast.type, ast.size.has_value()); + + if (type.is_void()) { + fail_invalid(ast.type); + } + + this->add(std::make_unique(std::move(name), type, std::move(ast))); + } + + void TypeStore::add(std::unique_ptr sym) { + auto xst = this->get(sym->get_name()); + if (xst != nullptr) { + auto new_ast = get_ast(sym.get()); + fail_duplicate(new_ast->location, sym->get_name()); + } + + _m_symbols[sym->get_name()] = std::move(sym); + } + + Symbol* TypeStore::get(std::string const& name) { + auto it = _m_symbols.find(name); + if (it == _m_symbols.end()) { + return nullptr; + } + return it->second.get(); + } + + Symbol* TypeStore::get(AstIdentifier const& name) { + return this->get(name.value); + } + + Symbol* TypeStore::get(AstQualifiedIdentifier const& name) { + auto base = this->get(name.base); + + if (base == nullptr || !name.element.has_value()) { + return base; + } + + ClassSymbol* cls; + if (base->get_kind() == SymbolKind::VAR) { + auto* var = (VarSymbol*) base; + + // The variable must be of type class. + if (var->get_type().is_primitive()) { + return nullptr; + } + + cls = var->get_type().get_complex_type(); + } else if (base->get_kind() == SymbolKind::INSTANCE) { + auto* instance = (InstanceSymbol*) base; + cls = instance->get_base_class(); + } else { + // Only vars and instances can be accessed through a qualified identifier. + return nullptr; + } + + return cls->get_member(*name.element); + } + + Type TypeStore::type(AstIdentifier const& name, bool array) { + if (zenkit::iequals(name.value, TYPE_INT)) return array ? Type::INT.to_array() : Type::INT; + if (zenkit::iequals(name.value, TYPE_FLOAT)) return array ? Type::FLOAT.to_array() : Type::FLOAT; + if (zenkit::iequals(name.value, TYPE_STRING)) return array ? Type::STRING.to_array() : Type::STRING; + if (zenkit::iequals(name.value, TYPE_FUNC)) return array ? Type::FUNC.to_array() : Type::FUNC; + + if (array) fail_invalid(name); + + if (zenkit::iequals(name.value, TYPE_VOID)) return Type::VOID; + + auto def = this->get(name); + if (def == nullptr) fail_unresolved(name); + if (def->get_kind() != SymbolKind::CLASS) fail_invalid(name); + + return Type((ClassSymbol*) def); + } + + void TypeStore::each(std::function const& cb) { + for (auto const& item : _m_symbols) { + cb(item.second.get()); + } + } +} // namespace zenkit::daedalus diff --git a/src/daedalus/TypeStore.hh b/src/daedalus/TypeStore.hh new file mode 100644 index 00000000..f79286b0 --- /dev/null +++ b/src/daedalus/TypeStore.hh @@ -0,0 +1,40 @@ +// Copyright © 2023 GothicKit Contributors. +// SPDX-License-Identifier: MIT +#pragma once +#include "zenkit/daedalus/Module.hh" +#include "zenkit/daedalus/SyntaxTree.hh" + +#include +#include +#include +#include +#include + +namespace zenkit::daedalus { + class TypeStore { + public: + TypeStore() = default; + + void each(std::function const& cb); + + void add_script(AstScript ast); + void add_class(AstClass ast); + void add_prototype(AstPrototype ast); + void add_instance(AstInstance ast); + void add_function(AstFunction ast); + void add_external(AstExternal ast); + void add_constant(AstConstant ast); + void add_variable(AstVariable ast); + + void add(std::unique_ptr sym); + + Symbol* get(std::string const& name); + Symbol* get(AstIdentifier const& name); + Symbol* get(AstQualifiedIdentifier const& name); + + Type type(AstIdentifier const& name, bool array); + + private: + std::unordered_map> _m_symbols; + }; +} // namespace zenkit::daedalus diff --git a/tests/TestDaedalusCompiler.cc b/tests/TestDaedalusCompiler.cc new file mode 100644 index 00000000..9c6cfee7 --- /dev/null +++ b/tests/TestDaedalusCompiler.cc @@ -0,0 +1,39 @@ +// Copyright © 2023 GothicKit Contributors. +// SPDX-License-Identifier: MIT +#include + +#include "zenkit/daedalus/Compiler.hh" + +#include + +static std::string_view SOURCE = R"( +const INT MY_CONSTANT_2 = 88 - 48; +CONST int MY_CONSTANT_1 = MY_CONSTANT_2 * 2; + +Class MyClass { + Var Int field1; + var Float field2; + var String field3; + var Func field4,field5; + + var int field6_array[5]; + var int field7_array[MY_CONSTANT_1]; +}; + +Prototype MyProto(MyClass) {}; + +Instance MyInstance1(MyClass) {}; +instance MyInstance2(MyProto) {}; + +Func Void MyFunc1() {}; +func void MyFunc2(var int par1) {}; +func void MyFunc3(var string par1, var MyClass par2) {}; + +)"; + +TEST_CASE("DaedalusCompiler") { + zenkit::Logger::set_default(zenkit::LogLevel::DEBUG); + zenkit::daedalus::Compiler compiler; + compiler.add_raw(SOURCE); + compiler.compile(); +}