From 3f95947075d7dc5c2850d73e69f9e9bf94ebe2cd Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 15 Jan 2024 07:44:11 +0100 Subject: [PATCH] progress --- .vscode/settings.json | 3 + Cargo.lock | 6 + lib/edlang_ast/src/lib.rs | 189 +++++++++++++++++++++++ lib/edlang_codegen_mlir/Cargo.toml | 4 + lib/edlang_codegen_mlir/build.rs | 16 ++ lib/edlang_codegen_mlir/src/codegen.rs | 1 + lib/edlang_codegen_mlir/src/ffi.rs | 10 ++ lib/edlang_codegen_mlir/src/lib.rs | 152 ++++++++++++++++++ lib/edlang_codegen_mlir/src/linker.rs | 98 ++++++++++++ lib/edlang_codegen_mlir/src/wrappers.cpp | 23 +++ lib/edlang_driver/src/lib.rs | 84 +++++++++- lib/edlang_parser/Cargo.toml | 1 + lib/edlang_parser/build.rs | 3 + lib/edlang_parser/src/error.rs | 4 + lib/edlang_parser/src/grammar.lalrpop | 119 ++++++++++++++ lib/edlang_parser/src/lexer.rs | 47 ++++++ lib/edlang_parser/src/lib.rs | 109 +++++++++++++ lib/edlang_parser/src/tokens.rs | 128 +++++++++++++++ lib/edlang_session/Cargo.toml | 1 + lib/edlang_session/src/lib.rs | 28 ++++ 20 files changed, 1025 insertions(+), 1 deletion(-) create mode 100644 .vscode/settings.json create mode 100644 lib/edlang_codegen_mlir/build.rs create mode 100644 lib/edlang_codegen_mlir/src/codegen.rs create mode 100644 lib/edlang_codegen_mlir/src/ffi.rs create mode 100644 lib/edlang_codegen_mlir/src/linker.rs create mode 100644 lib/edlang_codegen_mlir/src/wrappers.cpp create mode 100644 lib/edlang_parser/build.rs create mode 100644 lib/edlang_parser/src/error.rs create mode 100644 lib/edlang_parser/src/grammar.lalrpop create mode 100644 lib/edlang_parser/src/lexer.rs create mode 100644 lib/edlang_parser/src/tokens.rs diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..8e619a44e --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "rust-analyzer.showUnlinkedFileNotification": false +} diff --git a/Cargo.lock b/Cargo.lock index 1d4a26116..722e4ab85 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -539,12 +539,14 @@ dependencies = [ name = "edlang_codegen_mlir" version = "0.1.0" dependencies = [ + "cc", "edlang_ast", "edlang_parser", "edlang_session", "llvm-sys", "melior", "mlir-sys", + "tracing", ] [[package]] @@ -566,6 +568,7 @@ dependencies = [ name = "edlang_parser" version = "0.1.0" dependencies = [ + "ariadne", "edlang_ast", "lalrpop", "lalrpop-util", @@ -576,6 +579,9 @@ dependencies = [ [[package]] name = "edlang_session" version = "0.1.0" +dependencies = [ + "ariadne", +] [[package]] name = "either" diff --git a/lib/edlang_ast/src/lib.rs b/lib/edlang_ast/src/lib.rs index 8b1378917..0d8ecd18c 100644 --- a/lib/edlang_ast/src/lib.rs +++ b/lib/edlang_ast/src/lib.rs @@ -1 +1,190 @@ +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default, Hash)] +pub struct Span { + pub lo: usize, + pub hi: usize, +} +impl Span { + pub fn new(lo: usize, hi: usize) -> Self { + Self { lo, hi } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Module { + pub imports: Vec, + pub contents: Vec, + pub span: Span, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum ModuleStatement { + Function(Function), + Constant(Constant), + Struct(Struct), +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Import { + pub path: PathExpr, + /// If symbols is empty then the last path ident is the symbol. + pub symbols: Vec, + pub span: Span, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct PathExpr { + pub first: Ident, + pub extra: Vec, + pub span: Span, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum PathSegment { + Field(Ident), + Index { value: Expression, span: Span }, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Ident { + pub name: String, + pub span: Span, +} + +// T: A + B +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Type { + pub name: Ident, + pub generics: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct FnParam { + pub name: Ident, + pub arg_type: Type, + pub span: Span, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Block { + pub body: Vec, + pub span: Span, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Statement { + Let, + Assign, + For, + While, + If, + Return, + FnCall(FnCallExpr), +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Function { + pub name: Ident, + pub is_extern: bool, + pub is_public: bool, + pub params: Vec, + pub return_type: Option, + pub body: Block, + pub span: Span, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Constant { + pub name: Ident, + pub r#type: Type, + pub value: Expression, + pub span: Span, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Field { + pub name: Ident, + pub r#type: Type, + pub span: Span, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Struct { + pub name: Ident, + pub generics: Vec, + pub fields: Vec, + pub span: Span, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Expression { + Value(ValueExpr), + FnCall(FnCallExpr), + Unary(UnaryOp, Box), + Binary(Box, BinaryOp, Box), +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum ValueExpr { + Bool { value: bool, span: Span }, + Char { value: char, span: Span }, + Int { value: u128, span: Span }, + Float { value: String, span: Span }, + Str { value: String, span: Span }, + Path(PathExpr), +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct FnCallExpr { + pub name: Ident, + pub generic_params: Vec, + pub params: Vec, + pub span: Span, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum UnaryOp { + ArithNeg(Span), + LogicalNot(Span), + BitwiseNot(Span), +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum BinaryOp { + Arith(ArithOp, Span), + Logic(LogicOp, Span), + Compare(CmpOp, Span), + Bitwise(BitwiseOp, Span), +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum ArithOp { + Add, + Sub, + Mul, + Div, + Mod, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum LogicOp { + And, + Or, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum CmpOp { + Eq, + NotEq, + Lt, + LtEq, + Gt, + GtEq, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum BitwiseOp { + And, + Or, + Xor, +} diff --git a/lib/edlang_codegen_mlir/Cargo.toml b/lib/edlang_codegen_mlir/Cargo.toml index 0b88d5b66..758c9e5a2 100644 --- a/lib/edlang_codegen_mlir/Cargo.toml +++ b/lib/edlang_codegen_mlir/Cargo.toml @@ -17,3 +17,7 @@ edlang_session = { version = "0.1.0", path = "../edlang_session" } llvm-sys = "170.0.1" melior = { version = "0.15.0", features = ["ods-dialects"] } mlir-sys = "0.2.1" +tracing = { workspace = true } + +[build-dependencies] +cc = "1.0.83" diff --git a/lib/edlang_codegen_mlir/build.rs b/lib/edlang_codegen_mlir/build.rs new file mode 100644 index 000000000..d4eaaff68 --- /dev/null +++ b/lib/edlang_codegen_mlir/build.rs @@ -0,0 +1,16 @@ +use std::env::var; + +fn main() { + let mlir_path = var("MLIR_SYS_170_PREFIX").expect("MLIR path should be set."); + + cc::Build::new() + .cpp(true) + .flag("-std=c++17") + .flag_if_supported("-Wno-unused-parameter") + .flag_if_supported("-Wno-comment") + .include(&format!("{mlir_path}/include")) + .file("src/wrappers.cpp") + .compile("ffi"); + + println!("cargo:rerun-if-changed=src/wrappers.cpp"); +} diff --git a/lib/edlang_codegen_mlir/src/codegen.rs b/lib/edlang_codegen_mlir/src/codegen.rs new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/lib/edlang_codegen_mlir/src/codegen.rs @@ -0,0 +1 @@ + diff --git a/lib/edlang_codegen_mlir/src/ffi.rs b/lib/edlang_codegen_mlir/src/ffi.rs new file mode 100644 index 000000000..8f5117613 --- /dev/null +++ b/lib/edlang_codegen_mlir/src/ffi.rs @@ -0,0 +1,10 @@ +use llvm_sys::prelude::{LLVMContextRef, LLVMModuleRef}; + +extern "C" { + /// Translate operation that satisfies LLVM dialect module requirements into an LLVM IR module living in the given context. + /// This translates operations from any dilalect that has a registered implementation of LLVMTranslationDialectInterface. + pub fn mlirTranslateModuleToLLVMIR( + module_operation_ptr: mlir_sys::MlirOperation, + llvm_context: LLVMContextRef, + ) -> LLVMModuleRef; +} diff --git a/lib/edlang_codegen_mlir/src/lib.rs b/lib/edlang_codegen_mlir/src/lib.rs index 8b1378917..db21ca1f4 100644 --- a/lib/edlang_codegen_mlir/src/lib.rs +++ b/lib/edlang_codegen_mlir/src/lib.rs @@ -1 +1,153 @@ +#![allow(clippy::too_many_arguments)] +use std::{ + ffi::{CStr, CString}, + mem::MaybeUninit, + path::PathBuf, + ptr::{addr_of_mut, null_mut}, + sync::OnceLock, +}; + +use edlang_session::{OptLevel, Session}; +use llvm_sys::{ + core::{LLVMContextCreate, LLVMContextDispose, LLVMDisposeMessage, LLVMDisposeModule}, + target::{ + LLVM_InitializeAllAsmPrinters, LLVM_InitializeAllTargetInfos, LLVM_InitializeAllTargetMCs, + LLVM_InitializeAllTargets, + }, + target_machine::{ + LLVMCodeGenFileType, LLVMCodeGenOptLevel, LLVMCodeModel, LLVMCreateTargetMachine, + LLVMDisposeTargetMachine, LLVMGetDefaultTargetTriple, LLVMGetHostCPUFeatures, + LLVMGetHostCPUName, LLVMGetTargetFromTriple, LLVMRelocMode, LLVMTargetMachineEmitToFile, + LLVMTargetRef, + }, +}; +use melior::ir::Module; + +use crate::ffi::mlirTranslateModuleToLLVMIR; + +pub mod codegen; +mod ffi; +pub mod linker; + +/// Converts a module to an object. +/// The object will be written to the specified target path. +/// TODO: error handling +/// +/// Returns the path to the object. +pub fn compile_to_object( + session: &Session, + module: &Module, +) -> Result> { + tracing::debug!("Compiling to object file"); + if !session.target_dir.exists() { + std::fs::create_dir_all(&session.target_dir)?; + } + + let target_file = session + .file_path + .clone() + .file_stem() + .unwrap() + .to_string_lossy() + .to_string(); + let target_file = PathBuf::from(target_file).with_extension("o"); + tracing::debug!("Target file: {:?}", target_file); + + let target_path = session.target_dir.join(target_file); + + // TODO: Rework so you can specify target and host features, etc. + // Right now it compiles for the native cpu feature set and arch. + static INITIALIZED: OnceLock<()> = OnceLock::new(); + + INITIALIZED.get_or_init(|| unsafe { + LLVM_InitializeAllTargets(); + LLVM_InitializeAllTargetInfos(); + LLVM_InitializeAllTargetMCs(); + LLVM_InitializeAllAsmPrinters(); + tracing::debug!("initialized llvm targets"); + }); + + unsafe { + let llvm_context = LLVMContextCreate(); + + let op = module.as_operation().to_raw(); + + let llvm_module = mlirTranslateModuleToLLVMIR(op, llvm_context); + + let mut null = null_mut(); + let mut error_buffer = addr_of_mut!(null); + + let target_triple = LLVMGetDefaultTargetTriple(); + tracing::debug!("Target triple: {:?}", CStr::from_ptr(target_triple)); + + let target_cpu = LLVMGetHostCPUName(); + tracing::debug!("Target CPU: {:?}", CStr::from_ptr(target_cpu)); + + let target_cpu_features = LLVMGetHostCPUFeatures(); + tracing::debug!( + "Target CPU Features: {:?}", + CStr::from_ptr(target_cpu_features) + ); + + let mut target: MaybeUninit = MaybeUninit::uninit(); + + if LLVMGetTargetFromTriple(target_triple, target.as_mut_ptr(), error_buffer) != 0 { + let error = CStr::from_ptr(*error_buffer); + let err = error.to_string_lossy().to_string(); + tracing::error!("error getting target triple: {}", err); + LLVMDisposeMessage(*error_buffer); + panic!("{err}") + } else if !(*error_buffer).is_null() { + LLVMDisposeMessage(*error_buffer); + error_buffer = addr_of_mut!(null); + } + + let target = target.assume_init(); + + let machine = LLVMCreateTargetMachine( + target, + target_triple.cast(), + target_cpu.cast(), + target_cpu_features.cast(), + match session.optlevel { + OptLevel::None => LLVMCodeGenOptLevel::LLVMCodeGenLevelNone, + OptLevel::Less => LLVMCodeGenOptLevel::LLVMCodeGenLevelLess, + OptLevel::Default => LLVMCodeGenOptLevel::LLVMCodeGenLevelDefault, + OptLevel::Aggressive => LLVMCodeGenOptLevel::LLVMCodeGenLevelAggressive, + }, + if session.library { + LLVMRelocMode::LLVMRelocDynamicNoPic + } else { + LLVMRelocMode::LLVMRelocDefault + }, + LLVMCodeModel::LLVMCodeModelDefault, + ); + + let filename = CString::new(target_path.as_os_str().to_string_lossy().as_bytes()).unwrap(); + tracing::debug!("filename to llvm: {:?}", filename); + let ok = LLVMTargetMachineEmitToFile( + machine, + llvm_module, + filename.as_ptr().cast_mut(), + LLVMCodeGenFileType::LLVMObjectFile, // object (binary) or assembly (textual) + error_buffer, + ); + + if ok != 0 { + let error = CStr::from_ptr(*error_buffer); + let err = error.to_string_lossy().to_string(); + tracing::error!("error emitting to file: {:?}", err); + LLVMDisposeMessage(*error_buffer); + panic!("{err}") + } else if !(*error_buffer).is_null() { + LLVMDisposeMessage(*error_buffer); + } + + LLVMDisposeTargetMachine(machine); + LLVMDisposeModule(llvm_module); + LLVMContextDispose(llvm_context); + + Ok(target_path) + } +} diff --git a/lib/edlang_codegen_mlir/src/linker.rs b/lib/edlang_codegen_mlir/src/linker.rs new file mode 100644 index 000000000..6fb2f784c --- /dev/null +++ b/lib/edlang_codegen_mlir/src/linker.rs @@ -0,0 +1,98 @@ +use std::path::Path; + +use tracing::instrument; + +// TODO: Implement a proper linker driver, passing only the arguments needed dynamically based on the requirements. + +#[instrument(level = "debug")] +pub fn link_shared_lib(input_path: &Path, output_filename: &Path) -> Result<(), std::io::Error> { + let args: &[&str] = { + #[cfg(target_os = "macos")] + { + &[ + "-demangle", + "-no_deduplicate", + "-dynamic", + "-dylib", + "-L/usr/local/lib", + "-L/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib", + &input_path.display().to_string(), + "-o", + &output_filename.display().to_string(), + "-lSystem", + ] + } + #[cfg(target_os = "linux")] + { + &[ + "--hash-style=gnu", + "--eh-frame-hdr", + "-shared", + "-o", + &output_filename.display().to_string(), + "-L/lib/../lib64", + "-L/usr/lib/../lib64", + "-lc", + &input_path.display().to_string(), + ] + } + #[cfg(target_os = "windows")] + { + unimplemented!() + } + }; + + let mut linker = std::process::Command::new("ld"); + let proc = linker.args(args.iter()).spawn()?; + proc.wait_with_output()?; + Ok(()) +} + +#[instrument(level = "debug")] +pub fn link_binary(input_path: &Path, output_filename: &Path) -> Result<(), std::io::Error> { + let args: &[&str] = { + #[cfg(target_os = "macos")] + { + &[ + "-L/usr/local/lib", + "-L/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib", + &input_path.display().to_string(), + "-o", + &output_filename.display().to_string(), + "-lSystem", + ] + } + #[cfg(target_os = "linux")] + { + &[ + "-pie", + "--hash-style=gnu", + "--eh-frame-hdr", + "--dynamic-linker", + "/lib64/ld-linux-x86-64.so.2", + "-m", + "elf_x86_64", + "/usr/lib64/Scrt1.o", + "/usr/lib64/crti.o", + "-o", + &output_filename.display().to_string(), + "-L/lib64", + "-L/usr/lib64", + "-zrelro", + "--no-as-needed", + "-lc", + "/usr/lib64/crtn.o", + &input_path.display().to_string(), + ] + } + #[cfg(target_os = "windows")] + { + unimplemented!() + } + }; + + let mut linker = std::process::Command::new("ld"); + let proc = linker.args(args.iter()).spawn()?; + proc.wait_with_output()?; + Ok(()) +} diff --git a/lib/edlang_codegen_mlir/src/wrappers.cpp b/lib/edlang_codegen_mlir/src/wrappers.cpp new file mode 100644 index 000000000..004136f08 --- /dev/null +++ b/lib/edlang_codegen_mlir/src/wrappers.cpp @@ -0,0 +1,23 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern "C" LLVMModuleRef mlirTranslateModuleToLLVMIR(MlirOperation module, + LLVMContextRef context) { + mlir::Operation *moduleOp = unwrap(module); + + llvm::LLVMContext *ctx = llvm::unwrap(context); + + std::unique_ptr llvmModule = mlir::translateModuleToLLVMIR( + moduleOp, *ctx); + + LLVMModuleRef moduleRef = llvm::wrap(llvmModule.release()); + + return moduleRef; +} diff --git a/lib/edlang_driver/src/lib.rs b/lib/edlang_driver/src/lib.rs index 0769462e9..226e442db 100644 --- a/lib/edlang_driver/src/lib.rs +++ b/lib/edlang_driver/src/lib.rs @@ -1,5 +1,87 @@ -use std::error::Error; +use std::{error::Error, path::PathBuf, time::Instant}; + +use clap::Parser; + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +pub struct CompilerArgs { + /// The input file. + input: PathBuf, + + /// Build for release with all optimizations. + #[arg(short, long, default_value_t = false)] + release: bool, + + /// Build as a library. + #[arg(short, long, default_value_t = false)] + library: bool, +} pub fn main() -> Result<(), Box> { + let start_time = Instant::now(); + + tracing_subscriber::fmt::init(); + + let args = CompilerArgs::parse(); + + /* + let db = crate::db::Database::default(); + let source = ProgramSource::new(&db, std::fs::read_to_string(args.input.clone())?); + tracing::debug!("source code:\n{}", source.input(&db)); + let program = match concrete_parser::parse_ast(&db, source) { + Some(x) => x, + None => { + Diagnostics::dump( + &db, + source, + &concrete_parser::parse_ast::accumulated::( + &db, source, + ), + ); + panic!(); + } + }; + + let cwd = std::env::current_dir()?; + // todo: find a better name, "target" would clash with rust if running in the source tree. + let target_dir = cwd.join("target_ed/"); + let output_file = target_dir.join(PathBuf::from(args.input.file_name().unwrap())); + let output_file = if args.library { + output_file.with_extension("so") + } else { + output_file.with_extension("") + }; + + let session = Session { + file_path: args.input, + debug_info: if args.release { + DebugInfo::None + } else { + DebugInfo::Full + }, + optlevel: if args.release { + OptLevel::Aggressive + } else { + OptLevel::None + }, + source: source.input(&db).to_string(), + library: args.library, + target_dir, + output_file, + }; + tracing::debug!("Compiling with session: {:#?}", session); + + // let object_path = concrete_codegen_mlir::compile(&session, &program)?; + + if session.library { + link_shared_lib(&object_path, &session.output_file.with_extension("so"))?; + } else { + link_binary(&object_path, &session.output_file.with_extension(""))?; + } + */ + + let elapsed = start_time.elapsed(); + tracing::debug!("Done in {:?}", elapsed); + Ok(()) } diff --git a/lib/edlang_parser/Cargo.toml b/lib/edlang_parser/Cargo.toml index ca06f68bf..13df4025b 100644 --- a/lib/edlang_parser/Cargo.toml +++ b/lib/edlang_parser/Cargo.toml @@ -11,6 +11,7 @@ categories = ["compilers"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +ariadne = { version = "0.4.0", features = ["auto-color"] } edlang_ast = { version = "0.1.0", path = "../edlang_ast" } lalrpop-util = { version = "0.20.0", features = ["lexer"] } logos = "0.13.0" diff --git a/lib/edlang_parser/build.rs b/lib/edlang_parser/build.rs new file mode 100644 index 000000000..ca5c2836d --- /dev/null +++ b/lib/edlang_parser/build.rs @@ -0,0 +1,3 @@ +fn main() { + lalrpop::process_root().unwrap(); +} diff --git a/lib/edlang_parser/src/error.rs b/lib/edlang_parser/src/error.rs new file mode 100644 index 000000000..bf742e73d --- /dev/null +++ b/lib/edlang_parser/src/error.rs @@ -0,0 +1,4 @@ +use crate::{lexer::LexicalError, tokens::Token}; +use lalrpop_util::ParseError; + +pub type Error = ParseError; diff --git a/lib/edlang_parser/src/grammar.lalrpop b/lib/edlang_parser/src/grammar.lalrpop new file mode 100644 index 000000000..2d2c1bf62 --- /dev/null +++ b/lib/edlang_parser/src/grammar.lalrpop @@ -0,0 +1,119 @@ +use crate::tokens::Token; +use crate::lexer::LexicalError; +use edlang_ast as ast; +use std::str::FromStr; + +grammar; + +extern { + type Location = usize; + type Error = LexicalError; + + enum Token { + // keywords + "let" => Token::KeywordLet, + "const" => Token::KeywordConst, + "fn" => Token::KeywordFn, + "return" => Token::KeywordReturn, + "struct" => Token::KeywordStruct, + "if" => Token::KeywordIf, + "else" => Token::KeywordElse, + "while" => Token::KeywordWhile, + "for" => Token::KeywordFor, + "match" => Token::KeywordMatch, + "mod" => Token::KeywordMod, + "pub" => Token::KeywordPub, + "mut" => Token::KeywordMut, + "use" => Token::KeywordUse, + + // literals + "identifier" => Token::Identifier(), + "integer" => Token::Integer(), + "string" => Token::String(), + "boolean" => Token::Boolean(), + + // Other + + "(" => Token::LeftParen, + ")" => Token::RightParen, + "{" => Token::LeftBracket, + "}" => Token::RightBracket, + "[" => Token::LeftSquareBracket, + "]" => Token::RightSquareBracket, + "=" => Token::Assign, + ";" => Token::Semicolon, + ":" => Token::Colon, + "->" => Token::Arrow, + "," => Token::Coma, + "<" => Token::LessThanSign, + ">" => Token::MoreThanSign, + ">=" => Token::MoreThanEqSign, + "<=" => Token::LessThanEqSign, + "." => Token::Dot, + + // operators + "+" => Token::OperatorAdd, + "-" => Token::OperatorSub, + "*" => Token::OperatorMul, + "/" => Token::OperatorDiv, + "%" => Token::OperatorRem, + "&&" => Token::OperatorAnd, + "||" => Token::OperatorOr, + "==" => Token::OperatorEq, + "!=" => Token::OperatorNe, + "!" => Token::OperatorNot, + "~" => Token::OperatorBitwiseNot, + "^" => Token::OperatorBitwiseXor, + "&" => Token::OperatorBitwiseAnd, + "|" => Token::OperatorBitwiseOr, + } +} + +// lalrpop macros + +Dot: Vec = { + ".")*> => match e { + None => v, + Some(e) => { + v.push(e); + v + } + } +}; + +Comma: Vec = { + ",")*> => match e { + None => v, + Some(e) => { + v.push(e); + v + } + } +}; + +SemiColon: Vec = { + ";")*> => match e { + None => v, + Some(e) => { + v.push(e); + v + } + } +}; + +PlusSeparated: Vec = { + "+")*> => match e { + None => v, + Some(e) => { + v.push(e); + v + } + } +}; + +pub(crate) Ident: ast::Ident = { + => ast::Ident { + name, + span: ast::Span::new(lo, hi), + } +} diff --git a/lib/edlang_parser/src/lexer.rs b/lib/edlang_parser/src/lexer.rs new file mode 100644 index 000000000..8d4eda6f8 --- /dev/null +++ b/lib/edlang_parser/src/lexer.rs @@ -0,0 +1,47 @@ +use std::{fmt::Display, ops::Range}; + +use logos::{Logos, SpannedIter}; + +use crate::tokens::{LexingError, Token}; + +pub type Spanned = Result<(Loc, Tok, Loc), Error>; + +#[derive(Debug, Clone)] +pub enum LexicalError { + InvalidToken(LexingError, Range), +} + +impl Display for LexicalError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + LexicalError::InvalidToken(err, span) => { + write!(f, "lexical error at ({:?}): {:?}", err, span) + } + } + } +} + +pub struct Lexer<'input> { + // instead of an iterator over characters, we have a token iterator + token_stream: SpannedIter<'input, Token>, +} + +impl<'input> Lexer<'input> { + pub fn new(input: &'input str) -> Self { + // the Token::lexer() method is provided by the Logos trait + Self { + token_stream: Token::lexer(input).spanned(), + } + } +} + +impl<'input> Iterator for Lexer<'input> { + type Item = Spanned; + + fn next(&mut self) -> Option { + self.token_stream.next().map(|(token, span)| match token { + Ok(token) => Ok((span.start, token, span.end)), + Err(err) => Err(LexicalError::InvalidToken(err, span)), + }) + } +} diff --git a/lib/edlang_parser/src/lib.rs b/lib/edlang_parser/src/lib.rs index 8b1378917..4aa0acf39 100644 --- a/lib/edlang_parser/src/lib.rs +++ b/lib/edlang_parser/src/lib.rs @@ -1 +1,110 @@ +use std::{ops::Range, path::Path}; +use ariadne::{Color, ColorGenerator, Fmt, Label, Report, ReportKind, Source}; +use error::Error; +use lalrpop_util::ParseError; +use lexer::{Lexer, LexicalError}; + +pub mod error; +pub mod lexer; +pub mod tokens; + +pub mod grammar { + #![allow(dead_code, unused_imports, unused_variables)] + + pub use self::grammar::*; + use lalrpop_util::lalrpop_mod; + + lalrpop_mod!(pub grammar); +} + +pub fn parse_ast(source: &str) { + let lexer = Lexer::new(source); + let parser = grammar::IdentParser::new(); +} + +pub fn print_error(path: &str, source: &str, error: &Error) -> Result<(), std::io::Error> { + let source = Source::from(source); + match error { + ParseError::InvalidToken { location } => { + let loc = *location; + Report::build(ReportKind::Error, path, loc) + .with_code(1) + .with_message("Invalid token") + .with_label(Label::new((path, loc..(loc + 1))).with_message("invalid token")) + .finish() + .eprint((path, source))?; + } + ParseError::UnrecognizedEof { location, expected } => { + let loc = *location; + Report::build(ReportKind::Error, path, loc) + .with_code(2) + .with_message("Unrecognized end of file") + .with_label(Label::new((path, loc..(loc + 1))).with_message(format!( + "unrecognized eof, expected one of the following: {:?}", + expected + ))) + .finish() + .eprint((path, source))?; + } + ParseError::UnrecognizedToken { token, expected } => { + Report::build(ReportKind::Error, path, token.0) + .with_code(3) + .with_message("Unrecognized token") + .with_label(Label::new((path, token.0..token.2)).with_message(format!( + "unrecognized token {:?}, expected one of the following: {:?}", + token.1, expected + ))) + .finish() + .eprint((path, source))?; + } + ParseError::ExtraToken { token } => { + Report::build(ReportKind::Error, path, token.0) + .with_code(4) + .with_message("Extra token") + .with_label( + Label::new((path, token.0..token.2)) + .with_message(format!("unexpected extra token {:?}", token.1)), + ) + .finish() + .eprint((path, source))?; + } + ParseError::User { error } => match error { + LexicalError::InvalidToken(err, range) => match err { + tokens::LexingError::NumberParseError => { + Report::build(ReportKind::Error, path, range.start) + .with_code(4) + .with_message("Error parsing literal number") + .with_label( + Label::new((path, range.start..range.end)) + .with_message("error parsing literal number"), + ) + .finish() + .eprint((path, source))?; + } + tokens::LexingError::Other => { + Report::build(ReportKind::Error, path, range.start) + .with_code(4) + .with_message("Other error") + .with_label( + Label::new((path, range.start..range.end)).with_message("other error"), + ) + .finish() + .eprint((path, source))?; + } + }, + }, + } + + Ok(()) +} + +#[cfg(test)] +mod test { + use crate::parse_ast; + + #[test] + fn test_ident() { + parse_ast("hello"); + } +} diff --git a/lib/edlang_parser/src/tokens.rs b/lib/edlang_parser/src/tokens.rs new file mode 100644 index 000000000..93a9bf573 --- /dev/null +++ b/lib/edlang_parser/src/tokens.rs @@ -0,0 +1,128 @@ +use logos::Logos; +use std::convert::Infallible; + +#[derive(Debug, PartialEq, Clone, Default)] +pub enum LexingError { + NumberParseError, + #[default] + Other, +} + +impl From for LexingError { + fn from(_: std::num::ParseIntError) -> Self { + LexingError::NumberParseError + } +} + +impl From for LexingError { + fn from(_: Infallible) -> Self { + LexingError::Other + } +} + +#[derive(Logos, Debug, PartialEq, Clone)] +#[logos(error = LexingError, skip r"[ \t\n\f]+", skip r"//[^\n]*", skip r"/\*(?:[^*]|\*[^/])*\*/")] +pub enum Token { + #[token("let")] + KeywordLet, + #[token("const")] + KeywordConst, + #[token("fn")] + KeywordFn, + #[token("return")] + KeywordReturn, + #[token("struct")] + KeywordStruct, + #[token("if")] + KeywordIf, + #[token("else")] + KeywordElse, + #[token("while")] + KeywordWhile, + #[token("for")] + KeywordFor, + #[token("match")] + KeywordMatch, + #[token("mod")] + KeywordMod, + #[token("pub")] + KeywordPub, + #[token("mut")] + KeywordMut, + #[token("use")] + KeywordUse, + + // Modern way of allowing identifiers, read: https://unicode.org/reports/tr31/ + #[regex(r"[\p{XID_Start}_]\p{XID_Continue}*", |lex| lex.slice().to_string())] + Identifier(String), + + // Literals + #[regex(r"\d+", |lex| lex.slice().parse::().unwrap())] + Integer(u64), + #[regex(r#""(?:[^"]|\\")*""#, |lex| lex.slice().to_string())] + String(String), + #[regex(r"(true|false)", |lex| lex.slice().parse::().unwrap())] + Boolean(bool), + + #[token("(")] + LeftParen, + #[token(")")] + RightParen, + #[token("{")] + LeftBracket, + #[token("}")] + RightBracket, + #[token("[")] + LeftSquareBracket, + #[token("]")] + RightSquareBracket, + #[token("=")] + Assign, + #[token(";")] + Semicolon, + #[token(":")] + Colon, + #[token("->")] + Arrow, + #[token(",")] + Coma, + #[token(".")] + Dot, + #[token("<")] + LessThanSign, + #[token(">")] + MoreThanSign, + #[token(">=")] + MoreThanEqSign, + #[token("<=")] + LessThanEqSign, + + #[token("+")] + OperatorAdd, + #[token("-")] + OperatorSub, + #[token("*")] + OperatorMul, + #[token("/")] + OperatorDiv, + #[token("%")] + OperatorRem, + #[token("&&")] + OperatorAnd, + #[token("||")] + OperatorOr, + #[token("==")] + OperatorEq, + #[token("!=")] + OperatorNe, + #[token("!")] + OperatorNot, + #[token("~")] + OperatorBitwiseNot, + #[token("^")] + OperatorBitwiseXor, + #[token("&")] + OperatorBitwiseAnd, + #[token("|")] + OperatorBitwiseOr, +} diff --git a/lib/edlang_session/Cargo.toml b/lib/edlang_session/Cargo.toml index d5d43790d..626059bb5 100644 --- a/lib/edlang_session/Cargo.toml +++ b/lib/edlang_session/Cargo.toml @@ -6,3 +6,4 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +ariadne = { version = "0.4.0", features = ["auto-color"] } diff --git a/lib/edlang_session/src/lib.rs b/lib/edlang_session/src/lib.rs index 8b1378917..4c3f60371 100644 --- a/lib/edlang_session/src/lib.rs +++ b/lib/edlang_session/src/lib.rs @@ -1 +1,29 @@ +use std::path::PathBuf; +use ariadne::Source; + +#[derive(Debug, Clone)] +pub struct Session { + pub file_path: PathBuf, + pub debug_info: DebugInfo, + pub optlevel: OptLevel, + pub source: Source, + pub library: bool, + /// The directory where to store artifacts and intermediate files such as object files. + pub target_dir: PathBuf, + pub output_file: PathBuf, +} + +#[derive(Clone, Copy, Debug, PartialEq, Hash)] +pub enum OptLevel { + None, // -O0 + Less, // -O1 + Default, // -O2 + Aggressive, // -O3 +} + +#[derive(Clone, Copy, Debug, PartialEq, Hash)] +pub enum DebugInfo { + None, + Full, +}