diff --git a/Cargo.lock b/Cargo.lock index eb6fd1756..79ad11c40 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,9 +19,9 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "aho-corasick" -version = "0.7.20" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04" dependencies = [ "memchr", ] @@ -38,9 +38,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.3.0" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e579a7752471abc2a8268df8b20005e3eadd975f585398f17efcfd8d4927371" +checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163" dependencies = [ "anstyle", "anstyle-parse", @@ -77,9 +77,9 @@ dependencies = [ [[package]] name = "anstyle-wincon" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bcd8291a340dd8ac70e18878bc4501dd7b4ff970cfa21c207d36ece51ea88fd" +checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188" dependencies = [ "anstyle", "windows-sys 0.48.0", @@ -156,9 +156,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "4.2.2" +version = "4.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b802d85aaf3a1cdb02b224ba472ebdea62014fccfcb269b95a4d76443b5ee5a" +checksum = "34d21f9bf1b425d2968943631ec91202fe5e837264063503708b83013f8fc938" dependencies = [ "clap_builder", "clap_derive", @@ -167,9 +167,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.2.2" +version = "4.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14a1a858f532119338887a4b8e1af9c60de8249cd7bafd68036a489e261e37b6" +checksum = "914c8c79fb560f238ef6429439a30023c862f7a28e688c58f7203f12b29970bd" dependencies = [ "anstream", "anstyle", @@ -187,7 +187,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.15", + "syn", ] [[package]] @@ -339,9 +339,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "getrandom" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" dependencies = [ "cfg-if", "libc", @@ -390,9 +390,9 @@ dependencies = [ [[package]] name = "inkwell" -version = "0.1.1" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbac11e485159a525867fb7e6aa61981453e6a72f625fde6a4ab3047b0c6dec9" +checksum = "3f4fcb4a4fa0b8f7b4178e24e6317d6f8b95ab500d8e6e1bd4283b6860e369c1" dependencies = [ "either", "inkwell_internals", @@ -404,13 +404,13 @@ dependencies = [ [[package]] name = "inkwell_internals" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87d00c17e264ce02be5bc23d7bff959188ec7137beddd06b8b6b05a7c680ea85" +checksum = "b185e7d068d6820411502efa14d8fbf010750485399402156b72dd2a548ef8e9" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn", ] [[package]] @@ -447,9 +447,9 @@ dependencies = [ [[package]] name = "lalrpop" -version = "0.19.9" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f34313ec00c2eb5c3c87ca6732ea02dcf3af99c3ff7a8fb622ffb99c9d860a87" +checksum = "da4081d44f4611b66c6dd725e6de3169f9f63905421e8626fcb86b6a898998b8" dependencies = [ "ascii-canvas", "bit-set", @@ -461,7 +461,7 @@ dependencies = [ "petgraph", "pico-args", "regex", - "regex-syntax", + "regex-syntax 0.7.1", "string_cache", "term", "tiny-keccak", @@ -470,9 +470,9 @@ dependencies = [ [[package]] name = "lalrpop-util" -version = "0.19.9" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5c1f7869c94d214466c5fd432dfed12c379fd87786768d36455892d46b18edd" +checksum = "3f35c735096c0293d313e8f2a641627472b83d01b937177fe76e5e2708d31e0d" dependencies = [ "regex", ] @@ -485,15 +485,15 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.141" +version = "0.2.144" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" +checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" [[package]] name = "linux-raw-sys" -version = "0.3.1" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" +checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f" [[package]] name = "llvm-sys" @@ -546,8 +546,8 @@ dependencies = [ "fnv", "proc-macro2", "quote", - "regex-syntax", - "syn 2.0.15", + "regex-syntax 0.6.29", + "syn", ] [[package]] @@ -670,9 +670,9 @@ dependencies = [ [[package]] name = "pico-args" -version = "0.4.2" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db8bcd96cb740d03149cbad5518db9fd87126a10ab519c011893b1754134c468" +checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" [[package]] name = "pin-project-lite" @@ -697,9 +697,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.26" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500" dependencies = [ "proc-macro2", ] @@ -726,13 +726,13 @@ dependencies = [ [[package]] name = "regex" -version = "1.7.3" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" +checksum = "af83e617f331cc6ae2da5443c602dfa5af81e517212d9d611a5b3ba1777b5370" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.7.1", ] [[package]] @@ -741,7 +741,7 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" dependencies = [ - "regex-syntax", + "regex-syntax 0.6.29", ] [[package]] @@ -751,16 +751,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] -name = "rustc-demangle" -version = "0.1.22" +name = "regex-syntax" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4a36c42d1873f9a77c53bde094f9664d9891bc604a45b4798fd2c389ed12e5b" +checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c" + +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" [[package]] name = "rustix" -version = "0.37.11" +version = "0.37.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85597d61f83914ddeba6a47b3b8ffe7365107221c2e557ed94426489fefb5f77" +checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d" dependencies = [ "bitflags", "errno", @@ -830,20 +836,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" -version = "1.0.109" +version = "2.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "syn" -version = "2.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" +checksum = "a6f671d4b5ffdb8eadec19c0ae67fe2639df8684bd7bc4b83d986b8db549cf01" dependencies = [ "proc-macro2", "quote", @@ -878,7 +873,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.15", + "syn", ] [[package]] @@ -914,20 +909,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a" +checksum = "0f57e3ca2a01450b1a921183a9c9cbfda207fd822cef4ccb00a65402cbba7a74" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn", ] [[package]] name = "tracing-core" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a" +checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" dependencies = [ "once_cell", "valuable", @@ -956,9 +951,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.16" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6176eae26dd70d0c919749377897b54a9276bd7061339665dd68777926b5a70" +checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77" dependencies = [ "matchers", "nu-ansi-term", diff --git a/Cargo.toml b/Cargo.toml index eec905def..e41d573f2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,16 +12,16 @@ categories = ["compilers"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -clap = { version = "4.2.2", features = ["derive"] } +clap = { version = "4.2.7", features = ["derive"] } color-eyre = "0.6.2" itertools = "0.10.5" -lalrpop-util = { version = "0.19.9", features = ["lexer"] } -regex = "1.7.3" +lalrpop-util = { version = "0.20.0", features = ["lexer"] } +regex = "1.8.1" tracing = "0.1.37" -tracing-subscriber = { version = "0.3.16", features = ["env-filter"] } -inkwell = { version = "0.1.1", features = ["llvm15-0"] } +tracing-subscriber = { version = "0.3.17", features = ["env-filter"] } +inkwell = { version = "0.2.0", features = ["llvm15-0"] } annotate-snippets = { version = "0.9.1", features = ["color"] } logos = "0.13.0" [build-dependencies] -lalrpop = "0.19.9" +lalrpop = "0.20.0" diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 2ac53c767..7e1cf4815 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -24,7 +24,8 @@ pub enum LiteralValue { String, Integer { bits: usize, - signed: bool + signed: bool, + value: String, }, } @@ -34,7 +35,7 @@ pub enum Expression { Variable(String), Call { function: String, - args: Vec> + args: Vec>, }, BinaryOp(Box, OpCode, Box), } @@ -45,6 +46,12 @@ pub struct Parameter { pub type_name: String, } +impl Parameter { + pub const fn new(ident: String, type_name: String) -> Self { + Self { ident, type_name } + } +} + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Function { pub name: String, @@ -53,11 +60,27 @@ pub struct Function { pub return_type: Option, } +impl Function { + pub const fn new( + name: String, + params: Vec, + body: Vec, + return_type: Option, + ) -> Self { + Self { + name, + params, + body, + return_type, + } + } +} + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Statement { Variable { name: String, - value: Box + value: Box, }, Return(Option>), Function(Function), diff --git a/src/check.rs b/src/check.rs index c77cfabeb..e40aff618 100644 --- a/src/check.rs +++ b/src/check.rs @@ -1,3 +1,4 @@ +/* use crate::{ ast::{self, Statement}, codegen::ProgramData, @@ -112,3 +113,4 @@ pub fn check<'a>(data: &'a ProgramData, ast: &ast::Program) -> Vec> { errors } + */ diff --git a/src/codegen.rs b/src/codegen.rs index bf2612563..e050038f4 100644 --- a/src/codegen.rs +++ b/src/codegen.rs @@ -10,10 +10,10 @@ use inkwell::{ values::{BasicMetadataValueEnum, BasicValue, BasicValueEnum}, }; use itertools::{Either, Itertools}; - +/* use statement::Statement; -use crate::ast::{self, statement, Expression, Function, Identifier, OpCode, SpanValue, Term}; +use crate::ast::{self, statement, Expression, Function, Identifier, OpCode, SpanValue, Term, LiteralValue}; #[derive(Debug, Clone)] pub struct ProgramData { @@ -203,9 +203,9 @@ impl<'ctx> CodeGen<'ctx> { variables: &mut HashMap>, ) -> Result>> { Ok(match &*expr.value { - Expression::Term(term) => Some(self.compile_term(term, variables)?), + Expression::Literal(term) => Some(self.compile_term(&term, variables)?), Expression::Call(func_id, args) => self.compile_call(block, func_id, args, variables)?, - Expression::Op(lhs, op, rhs) => Some(self.compile_op(block, lhs, op, rhs, variables)?), + Expression::BinaryOp(lhs, op, rhs) => Some(self.compile_op(block, lhs, op, rhs, variables)?), }) } @@ -269,7 +269,7 @@ impl<'ctx> CodeGen<'ctx> { pub fn compile_term( &self, - term: &Term, + term: &LiteralValue, variables: &mut HashMap>, ) -> Result> { let value = match term { @@ -284,3 +284,4 @@ impl<'ctx> CodeGen<'ctx> { Ok(value) } } + */ diff --git a/src/grammar.lalrpop b/src/grammar.lalrpop index c4ecd6fc3..8c9c125ce 100644 --- a/src/grammar.lalrpop +++ b/src/grammar.lalrpop @@ -24,8 +24,8 @@ FactorOp: OpCode = { "%" => OpCode::Rem, } -Tier: SpanValue> = { - > => SpanValue::new(l, Box::new(Expr::Op(t, o, n)), r), +Tier: Box = { + > => Box::new(Expression::BinaryOp(t, o, n)), NextTier }; @@ -33,18 +33,18 @@ Expr = Tier; Factor = Tier; // Terms: variables, literals, calls -Term: SpanValue> = { - => SpanValue::new(l, Box::new(Expr::new_ident(i)), r), - => SpanValue::new(l, Box::new(Expr::new_number(n)), r), - "(" > ")" => SpanValue::new(l, Box::new(Expr::Call(i, values)), r), +Term: Box = { + => Box::new(Expression::Variable(i)), + => Box::new(Expression::Literal(n)), + "(" > ")" => Box::new(Expression::Call { function: i, args: values}), "(" ")" }; -Identifier: Identifier = { - => Identifier(SpanValue::new(l, i.to_owned(), r)), +Identifier: String = { + => i.to_string(), }; -Num: Number = => Number(SpanValue::new(l, i64::from_str(n).unwrap(), r)); +Num: LiteralValue = => LiteralValue::Integer { bits: 32, signed: true, value: n.to_string()}; // Function handling Param: Parameter = { @@ -53,8 +53,8 @@ Param: Parameter = { Params = Comma; -FunctionReturn: Identifier = { - "->" => i, +FunctionReturn: String = { + "->" => i.to_string(), } Function: Function = { @@ -62,18 +62,18 @@ Function: Function = { } // statements not including function definitions -BasicStatement: SpanValue = { - "let" "=" ";" => SpanValue::new(l, Statement::new_assignment(i, e), r), - "=" ";" => SpanValue::new(l, Statement::new_definition(i, e), r), - "return" ";" => SpanValue::new(l, Statement::Return(e), r), +BasicStatement: Statement = { + "let" "=" ";" => Statement::Variable { name: i, value: e}, + "=" ";" => Statement::Variable { name: i, value: e}, + "return" ";" => Statement::Return(e), }; -Statement: SpanValue = { +Statement: Statement = { BasicStatement, - => SpanValue::new(l, Statement::new_function(f), r), + => Statement::Function(f), }; -Statements: Vec> = { +Statements: Vec = { Statement => vec![<>], => { s.push(n); diff --git a/src/lexer.rs b/src/lexer.rs index e69de29bb..cbdb0ef0f 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -0,0 +1,34 @@ +use logos::{Logos, SpannedIter}; + +use crate::tokens::Token; + +pub type Spanned = Result<(Loc, Tok, Loc), Error>; + +pub enum LexicalError { + InvalidToken, +} + +pub struct Lexer<'input> { + // instead of an iterator over characters, we have a token iterator + token_stream: SpannedIter<'input, Token>, +} + +impl<'input> Lexer<'input> { + pub fn new(input: &'input str) -> Self { + // the Token::lexer() method is provided by the Logos trait + Self { + token_stream: Token::lexer(input).spanned(), + } + } +} + +impl<'input> Iterator for Lexer<'input> { + type Item = Spanned; + + fn next(&mut self) -> Option { + self.token_stream.next().map(|(token, span)| match token { + Ok(token) => Ok((span.start, token, span.end)), + Err(()) => Err(LexicalError::InvalidToken), + }) + } +} diff --git a/src/main.rs b/src/main.rs index a90e8f76e..8ceee9275 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,19 +1,16 @@ #![allow(clippy::too_many_arguments)] -use check::Check; use clap::{Parser, Subcommand}; use color_eyre::Result; use inkwell::{context::Context, execution_engine::JitFunction, OptimizationLevel}; use lalrpop_util::lalrpop_mod; -use std::{fs, path::PathBuf}; - -use crate::codegen::ProgramData; +use std::{fs, path::PathBuf, println}; pub mod ast; pub mod check; pub mod codegen; -pub mod tokens; pub mod lexer; +pub mod tokens; lalrpop_mod!(pub grammar); @@ -60,6 +57,7 @@ enum Commands { }, } +/* fn check_program(program: &ProgramData, ast: &ast::Program) -> bool { let errors = check::check(program, ast); @@ -83,6 +81,7 @@ fn check_program(program: &ProgramData, ast: &ast::Program) -> bool { error_count == 0 } +*/ fn main() -> Result<()> { color_eyre::install()?; @@ -97,8 +96,8 @@ fn main() -> Result<()> { let ast = parser.parse(&code).unwrap(); let str_path = input.to_string_lossy(); - let program = ProgramData::new(&str_path, &code); - check_program(&program, &ast); + //let program = ProgramData::new(&str_path, &code); + //check_program(&program, &ast); } Commands::Compile { input, @@ -111,6 +110,9 @@ fn main() -> Result<()> { let parser = grammar::ProgramParser::new(); let ast = parser.parse(&code).unwrap(); + println!("{:#?}", ast); + + /* let str_path = input.to_string_lossy(); let program = ProgramData::new(&str_path, &code); @@ -130,13 +132,14 @@ fn main() -> Result<()> { } else { println!("{generated_llvm_ir}"); } + */ } Commands::Run { input } => { let code = fs::read_to_string(&input)?; let parser = grammar::ProgramParser::new(); let ast = parser.parse(&code).unwrap(); - + /* let str_path = input.to_string_lossy(); let program = ProgramData::new(&str_path, &code); @@ -155,6 +158,7 @@ fn main() -> Result<()> { execution_engine.get_function("main")?; main.call(); }; + */ } } diff --git a/src/tokens.rs b/src/tokens.rs index d8792aad8..0663293bc 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,42 +1,42 @@ -use std::fmt; use logos::Logos; +use std::fmt; #[derive(Logos, Debug, PartialEq)] #[logos(skip r"[ \t\n\f]+", skip r"#.*\n?")] pub enum Token { - #[token("var")] - KeywordVar, - #[token("print")] - KeywordPrint, + #[token("var")] + KeywordVar, + #[token("print")] + KeywordPrint, - #[regex("[_a-zA-Z][_0-9a-zA-Z]*", |lex| lex.slice().parse().ok())] - Identifier(String), - #[regex(r"\d+", |lex| lex.slice().parse().ok())] - Integer(i64), + #[regex("[_a-zA-Z][_0-9a-zA-Z]*", |lex| lex.slice().parse().ok())] + Identifier(String), + #[regex(r"\d+", |lex| lex.slice().parse().ok())] + Integer(i64), - #[token("(")] - LParen, - #[token(")")] - RParen, - #[token("=")] - Assign, - #[token(";")] - Semicolon, + #[token("(")] + LParen, + #[token(")")] + RParen, + #[token("=")] + Assign, + #[token(";")] + Semicolon, - #[token("+")] - OperatorAdd, - #[token("-")] - OperatorSub, - #[token("*")] - OperatorMul, - #[token("/")] - OperatorDiv, - #[token("%")] - OperatorRem, + #[token("+")] + OperatorAdd, + #[token("-")] + OperatorSub, + #[token("*")] + OperatorMul, + #[token("/")] + OperatorDiv, + #[token("%")] + OperatorRem, } impl fmt::Display for Token { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{:?}", self) + write!(f, "{:?}", self) } - } +}