diff --git a/simple.ed b/simple.ed index df490fdcf..788b78dd3 100644 --- a/simple.ed +++ b/simple.ed @@ -8,17 +8,17 @@ fn test(x: Hello) { } fn works(x: i64) -> i64 { - let z = 0; + let z = 0i64; if 2 == x { - z = x * 2; + z = x * 2i64; } else { - z = x * 3; + z = x * 3i64; } return z; } fn main() -> i64 { - let y = 2; + let y = 2i64; let z = y; return works(z); } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 14b67573e..8668853ad 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -53,8 +53,8 @@ pub enum LiteralValue { String(String), Integer { value: String, - bits: Option, - signed: Option, + bits: u32, + signed: bool, }, Boolean(bool), } diff --git a/src/check.rs b/src/check.rs index cba22f397..eddeff9fa 100644 --- a/src/check.rs +++ b/src/check.rs @@ -1,11 +1,14 @@ use crate::{ ast::{self, Statement}, codegen::ProgramData, + lexer::LexicalError, + tokens::Token, }; use annotate_snippets::{ display_list::{DisplayList, FormatOptions}, snippet::{Annotation, AnnotationType, Slice, Snippet, SourceAnnotation}, }; +use lalrpop_util::ParseError; #[derive(Debug)] pub enum Check<'a> { @@ -82,3 +85,65 @@ pub fn check<'a>(data: &'a ProgramData, ast: &ast::Program) -> Vec> { } errors } + +pub fn print_error(source: &str, err: ParseError) { + match err { + ParseError::InvalidToken { location } => { + let snippet = Snippet { + title: None, + footer: vec![], + slices: vec![Slice { + source, + line_start: 1, + fold: true, + origin: None, + + annotations: vec![SourceAnnotation { + label: "invalid token", + annotation_type: AnnotationType::Error, + range: (location, location), + }], + }], + opt: FormatOptions { + color: true, + ..Default::default() + }, + }; + let dl = DisplayList::from(snippet); + println!("{dl}"); + } + ParseError::UnrecognizedEof { location, expected } => todo!(), + ParseError::UnrecognizedToken { token, expected } => todo!(), + ParseError::ExtraToken { token } => todo!(), + ParseError::User { error } => match error { + LexicalError::InvalidToken(err, range) => { + let title = format!("invalid token (lexical error): {:?}", err); + let snippet = Snippet { + title: Some(Annotation { + id: None, + label: Some(&title), + annotation_type: AnnotationType::Error, + }), + footer: vec![], + slices: vec![Slice { + source: dbg!(source), + line_start: 1, + fold: false, + origin: None, + annotations: vec![SourceAnnotation { + label: "invalid token (lexical error)", + annotation_type: AnnotationType::Error, + range: dbg!((range.start, range.end)), + }], + }], + opt: FormatOptions { + color: true, + ..Default::default() + }, + }; + let dl = DisplayList::from(snippet); + println!("{dl}"); + } + }, + }; +} diff --git a/src/codegen.rs b/src/codegen.rs index 7133a4c2e..73bbe567e 100644 --- a/src/codegen.rs +++ b/src/codegen.rs @@ -552,8 +552,8 @@ impl<'ctx> CodeGen<'ctx> { bits, signed, } => { - let bits = bits.unwrap_or(32); - let signed = signed.unwrap_or(true); + let bits = *bits; + let signed = *signed; ( self.context .custom_width_int_type(bits) diff --git a/src/grammar.lalrpop b/src/grammar.lalrpop index adeb1e360..78bfc0ec7 100644 --- a/src/grammar.lalrpop +++ b/src/grammar.lalrpop @@ -24,6 +24,8 @@ extern { "return" => Token::KeywordReturn, "fn" => Token::KeywordFn, "ptr" => Token::KeywordPtr, + "_" => Token::KeywordUnderscore, + "(" => Token::LeftParen, ")" => Token::RightParen, "{" => Token::LeftBracket, @@ -151,10 +153,52 @@ Term: Box = { "(" ")" }; -Number: ast::LiteralValue = => ast::LiteralValue::Integer { - value: n, - bits: None, - signed: None +Number: ast::LiteralValue = { + "_"? "i8" => ast::LiteralValue::Integer { + value: n, + bits: 8, + signed: true, + }, + "_"? "i16" => ast::LiteralValue::Integer { + value: n, + bits: 16, + signed: true, + }, + "_"? "i32" => ast::LiteralValue::Integer { + value: n, + bits: 32, + signed: true, + }, + "_"? "i64" => ast::LiteralValue::Integer { + value: n, + bits: 64, + signed: true, + }, + "_"? "u8" => ast::LiteralValue::Integer { + value: n, + bits: 8, + signed: false, + }, + "_"? "u16" => ast::LiteralValue::Integer { + value: n, + bits: 16, + signed: false, + }, + "_"? "u32" => ast::LiteralValue::Integer { + value: n, + bits: 32, + signed: false, + }, + "_"? "u64" => ast::LiteralValue::Integer { + value: n, + bits: 64, + signed: false, + }, + => ast::LiteralValue::Integer { + value: n, + bits: 32, + signed: true, + }, }; StringLit: ast::LiteralValue = => ast::LiteralValue::String(n[1..(n.len()-1)].to_string()); diff --git a/src/lexer.rs b/src/lexer.rs index 2c6a5d61a..8d4eda6f8 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -2,19 +2,21 @@ use std::{fmt::Display, ops::Range}; use logos::{Logos, SpannedIter}; -use crate::tokens::Token; +use crate::tokens::{LexingError, Token}; pub type Spanned = Result<(Loc, Tok, Loc), Error>; #[derive(Debug, Clone)] pub enum LexicalError { - InvalidToken(Range), + InvalidToken(LexingError, Range), } impl Display for LexicalError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - LexicalError::InvalidToken(span) => write!(f, "lexical error at: {:?}", span), + LexicalError::InvalidToken(err, span) => { + write!(f, "lexical error at ({:?}): {:?}", err, span) + } } } } @@ -39,7 +41,7 @@ impl<'input> Iterator for Lexer<'input> { fn next(&mut self) -> Option { self.token_stream.next().map(|(token, span)| match token { Ok(token) => Ok((span.start, token, span.end)), - Err(()) => Err(LexicalError::InvalidToken(span)), + Err(err) => Err(LexicalError::InvalidToken(err, span)), }) } } diff --git a/src/main.rs b/src/main.rs index 2eb62cd93..47403110b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ #![allow(clippy::too_many_arguments)] +use check::print_error; use clap::{Parser, Subcommand}; use codegen::ProgramData; use color_eyre::Result; @@ -101,7 +102,7 @@ fn main() -> Result<()> { let lexer = Lexer::new(code.as_str()); let parser = grammar::ProgramParser::new(); let mut ast = parser.parse(lexer)?; - type_analysis::type_inference(&mut ast); + type_analysis::type_inference2(&mut ast); let program = ProgramData::new(&input, &code); check_program(&program, &ast); } @@ -109,9 +110,15 @@ fn main() -> Result<()> { let code = fs::read_to_string(input)?; let lexer = Lexer::new(code.as_str()); let parser = grammar::ProgramParser::new(); - let mut ast = parser.parse(lexer)?; - type_analysis::type_inference2(&mut ast); - println!("{ast:#?}"); + match parser.parse(lexer) { + Ok(mut ast) => { + type_analysis::type_inference2(&mut ast); + println!("{ast:#?}"); + } + Err(e) => { + print_error(&code, e); + } + } } Commands::Compile { input, @@ -123,7 +130,7 @@ fn main() -> Result<()> { let lexer = Lexer::new(code.as_str()); let parser = grammar::ProgramParser::new(); let mut ast: Program = parser.parse(lexer)?; - type_analysis::type_inference(&mut ast); + type_analysis::type_inference2(&mut ast); let program = ProgramData::new(&input, &code); diff --git a/src/tokens.rs b/src/tokens.rs index 21cc791b8..3f22260f3 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,9 +1,30 @@ use logos::Logos; -use std::fmt; +use std::{convert::Infallible, fmt}; + +// https://github.com/maciejhirsz/logos/issues/133 + +#[derive(Debug, PartialEq, Clone, Default)] +pub enum LexingError { + NumberParseError, + #[default] + Other, +} + +impl From for LexingError { + fn from(_: std::num::ParseIntError) -> Self { + LexingError::NumberParseError + } +} + +impl From for LexingError { + fn from(_: Infallible) -> Self { + LexingError::Other + } +} // todo: https://github.com/maciejhirsz/logos/issues/133#issuecomment-619444615 #[derive(Logos, Debug, PartialEq, Clone)] -#[logos(skip r"[ \t\n\f]+", skip r"#.*\n?")] +#[logos(error = LexingError, skip r"[ \t\n\f]+", skip r"#.*\n?")] pub enum Token { #[token("let")] KeywordLet, @@ -21,14 +42,16 @@ pub enum Token { KeywordIf, #[token("else")] KeywordElse, + #[token("_")] + KeywordUnderscore, - #[regex(r"_?\p{XID_Start}\p{XID_Continue}*", |lex| lex.slice().parse().ok())] + #[regex(r"_?\p{XID_Start}\p{XID_Continue}*", |lex| lex.slice().to_string())] Identifier(String), - #[regex(r"\d+", |lex| lex.slice().parse().ok())] + #[regex(r"\d+", |lex| lex.slice().to_string())] Integer(String), #[regex(r#""(?:[^"]|\\")*""#, |lex| lex.slice().to_string())] String(String), - #[regex(r"(true|false)", |lex| lex.slice().parse().ok())] + #[regex(r"(true|false)", |lex| lex.slice().parse::().unwrap())] Boolean(bool), #[token("bool")] diff --git a/src/type_analysis.rs b/src/type_analysis.rs index 07849ff64..a5517cc05 100644 --- a/src/type_analysis.rs +++ b/src/type_analysis.rs @@ -1,7 +1,5 @@ use std::collections::{HashMap, HashSet}; -use tracing::{info, warn}; - use crate::ast::{self, Expression, Function, Statement, TypeExp}; #[derive(Debug, Clone, Default)] @@ -10,7 +8,18 @@ struct Storage { functions: HashMap, } +/* +To briefly summarize the union-find algorithm, given the set of all types in a proof, +it allows one to group them together into equivalence classes by means of a union procedure and to + pick a representative for each such class using a find procedure. Emphasizing the word procedure in + the sense of side effect, we're clearly leaving the realm of logic in order to prepare an effective algorithm. + The representative of a u n i o n ( a , b ) {\mathtt {union}}(a,b) is determined such that, if both a and b are + type variables then the representative is arbitrarily one of them, but while uniting a variable and a term, the + term becomes the representative. Assuming an implementation of union-find at hand, one can formulate the unification of two monotypes as follows: + */ + // this works, but need to find a way to store the found info + handle literal integer types (or not?) +// maybe use scope ids pub fn type_inference2(ast: &mut ast::Program) { let mut storage = Storage::default(); @@ -166,16 +175,10 @@ fn type_inference_expression( value: _, bits, signed, - } => { - if bits.is_some() && signed.is_some() { - Some(TypeExp::Integer { - bits: bits.unwrap(), - signed: signed.unwrap(), - }) - } else { - None - } - } + } => Some(TypeExp::Integer { + bits: *bits, + signed: *signed, + }), ast::LiteralValue::Boolean(_) => Some(TypeExp::Boolean), } } @@ -230,318 +233,3 @@ fn type_inference_expression( }, } } - -pub fn type_inference(ast: &mut ast::Program) { - let mut struct_cache: HashMap> = HashMap::new(); - for statement in ast.statements.iter_mut() { - if let Statement::Struct(st) = statement { - let fields = st - .fields - .iter() - .map(|x| (x.ident.clone(), x.type_exp.clone())) - .collect(); - struct_cache.insert(st.name.clone(), fields); - } - } - - let mut fn_cache: HashMap = HashMap::new(); - for statement in ast.statements.iter_mut() { - if let Statement::Function(function) = statement { - fn_cache.insert(function.name.clone(), function.clone()); - } - } - - for statement in ast.statements.iter_mut() { - if let Statement::Function(function) = statement { - let ret_type = function.return_type.clone(); - let mut var_cache: HashMap = HashMap::new(); - - for arg in &function.params { - var_cache.insert(arg.ident.clone(), arg.type_exp.clone()); - } - - if let Some(ret_type) = &ret_type { - let ret_type_exp = fn_return_type(function); - - if let Some(exp) = ret_type_exp { - set_expression_type(exp, ret_type, &mut var_cache); - } - } - - update_statements(&mut function.body, &mut var_cache, &fn_cache); - } - } -} - -fn update_statements( - statements: &mut [Statement], - var_cache: &mut HashMap, - fn_cache: &HashMap, -) { - let mut var_cache = var_cache.clone(); - - { - let mut let_or_mut: Vec<&mut Statement> = statements - .iter_mut() - .filter(|x| matches!(x, Statement::Let { .. } | Statement::Mutate { .. })) - .collect(); - - // process mutate first - for st in let_or_mut.iter_mut() { - if let Statement::Mutate { - name, - value, - value_type, - .. - } = st - { - if let Some(value_type) = value_type { - // todo: check types matches? - var_cache.insert(name.clone(), value_type.clone()); - set_expression_type(value, value_type, &mut var_cache); - } else { - // evalue the value expr first to find a possible type. - if var_cache.contains_key(name) { - *value_type = var_cache.get(name).cloned(); - let mut env = Some(value_type.clone().unwrap()); - set_exp_types_from_cache(value, &mut var_cache, &mut env, fn_cache); - } else { - // no type info? - } - } - } - } - - // we need to process lets with a specified type first. - for st in let_or_mut.iter_mut() { - if let Statement::Let { - name, - value, - value_type, - .. - } = st - { - if let Some(value_type) = value_type { - // todo: check types matches? - var_cache.insert(name.clone(), value_type.clone()); - set_expression_type(value, value_type, &mut var_cache); - } else { - // evalue the value expr first to find a possible type. - if var_cache.contains_key(name) { - *value_type = var_cache.get(name).cloned(); - let mut env = Some(value_type.clone().unwrap()); - set_exp_types_from_cache(value, &mut var_cache, &mut env, fn_cache); - } else { - // no type info? - } - } - } - } - } - - for st in statements.iter_mut() { - match st { - Statement::Let { - name, - value_type, - value, - .. - } => { - // infer type if let has no type - if value_type.is_none() { - // evalue the value expr first to find a possible type. - let mut env = None; - set_exp_types_from_cache(value, &mut var_cache, &mut env, fn_cache); - - // try to find if it was set on the cache - if var_cache.contains_key(name) { - *value_type = var_cache.get(name).cloned(); - set_expression_type(value, value_type.as_ref().unwrap(), &mut var_cache); - } else { - // what here? no let type, no cache - println!("no cache let found") - } - } - } - Statement::Mutate { - name, - value_type, - value, - .. - } => { - if let Some(value_type) = value_type { - // todo: check types matches? - var_cache.insert(name.clone(), value_type.clone()); - set_expression_type(value, value_type, &mut var_cache); - } else { - // evalue the value expr first to find a possible type. - if var_cache.contains_key(name) { - *value_type = var_cache.get(name).cloned(); - let mut env = Some(value_type.clone().unwrap()); - set_exp_types_from_cache(value, &mut var_cache, &mut env, fn_cache); - } else { - // no type info? - } - } - } - Statement::If { - condition, - body, - else_body, - } => { - let mut env = None; - set_exp_types_from_cache(condition, &mut var_cache, &mut env, fn_cache); - update_statements(body, &mut var_cache, fn_cache); - if let Some(else_body) = else_body { - update_statements(else_body, &mut var_cache, fn_cache); - } - } - Statement::Return(exp) => { - if let Some(exp) = exp { - let mut env = None; - set_exp_types_from_cache(exp, &mut var_cache, &mut env, fn_cache); - } - } - Statement::Function(_) => unreachable!(), - Statement::Struct(_) => unreachable!(), - } - } -} - -fn fn_return_type(func: &mut Function) -> Option<&mut Box> { - for st in func.body.iter_mut() { - if let Statement::Return(r) = st { - return r.as_mut(); - } - } - None -} - -// set variables using the cache -fn set_exp_types_from_cache( - exp: &mut Expression, - var_cache: &mut HashMap, - env: &mut Option, - fn_cache: &HashMap, -) { - match exp { - Expression::Variable { name, value_type } => { - let name = name.value.clone(); - if let Some(value_type) = value_type { - // todo: check types matches? - var_cache.insert(name, value_type.clone()); - *env = Some(value_type.clone()); - } else if var_cache.contains_key(&name) { - *value_type = var_cache.get(&name).cloned(); - if env.is_none() { - *env = value_type.clone(); - } - } - } - Expression::BinaryOp(lhs, op, rhs) => match op { - ast::OpCode::Eq | ast::OpCode::Ne => { - set_exp_types_from_cache(lhs, var_cache, env, fn_cache); - set_exp_types_from_cache(rhs, var_cache, env, fn_cache); - set_exp_types_from_cache(lhs, var_cache, env, fn_cache); - *env = Some(TypeExp::Boolean); - } - _ => { - set_exp_types_from_cache(lhs, var_cache, env, fn_cache); - set_exp_types_from_cache(rhs, var_cache, env, fn_cache); - set_exp_types_from_cache(lhs, var_cache, env, fn_cache); // needed in case 2 == x - } - }, - Expression::Literal(lit) => match lit { - ast::LiteralValue::String(_) => { - warn!("found string, unimplemented") - } - ast::LiteralValue::Integer { bits, signed, .. } => { - if let Some(TypeExp::Integer { - bits: t_bits, - signed: t_signed, - }) = env - { - *bits = Some(*t_bits); - *signed = Some(*t_signed); - } - } - ast::LiteralValue::Boolean(_) => { - warn!("found bool, unimplemented") - } - }, - Expression::Call { - function, - args, - value_type, - } => { - let fn_type = fn_cache.get(function).unwrap().clone(); - match value_type { - Some(value_type) => *env = Some(value_type.clone()), - None => { - if env.is_some() { - let env = env.clone(); - *value_type = env.clone(); - } else { - *value_type = fn_type.return_type.clone(); - *env = fn_type.return_type.clone(); - } - } - } - - for (i, arg) in args.iter_mut().enumerate() { - let mut env = Some(fn_type.params[i].type_exp.clone()); - set_exp_types_from_cache(arg, var_cache, &mut env, fn_cache); - } - } - } -} - -fn set_expression_type( - exp: &mut Expression, - expected_type: &TypeExp, - var_cache: &mut HashMap, -) { - match exp { - Expression::Variable { name, value_type } => { - // if needed? - if value_type.is_none() { - *value_type = Some(expected_type.clone()); - } - if !var_cache.contains_key(&name.value) { - var_cache.insert(name.value.clone(), expected_type.clone()); - } - } - Expression::BinaryOp(lhs, op, rhs) => match op { - // ast::OpCode::Eq | ast::OpCode::Ne => {} - _ => { - set_expression_type(lhs, expected_type, var_cache); - set_expression_type(rhs, expected_type, var_cache); - } - }, - Expression::Literal(lit) => match lit { - ast::LiteralValue::String(_) => { - warn!("found string, unimplemented") - } - ast::LiteralValue::Integer { bits, signed, .. } => { - if let TypeExp::Integer { - bits: t_bits, - signed: t_signed, - } = expected_type - { - *bits = Some(*t_bits); - *signed = Some(*t_signed); - } - } - ast::LiteralValue::Boolean(_) => { - warn!("found bool, unimplemented") - } - }, - Expression::Call { - function: _, - args: _, - value_type, - } => { - *value_type = Some(expected_type.clone()); - } - } -}