This commit is contained in:
Edgar 2023-06-09 17:35:48 +02:00
parent 53e3e4dcd5
commit f3cc72e7ce
No known key found for this signature in database
GPG key ID: 70ADAE8F35904387
9 changed files with 182 additions and 353 deletions

View file

@ -8,17 +8,17 @@ fn test(x: Hello) {
} }
fn works(x: i64) -> i64 { fn works(x: i64) -> i64 {
let z = 0; let z = 0i64;
if 2 == x { if 2 == x {
z = x * 2; z = x * 2i64;
} else { } else {
z = x * 3; z = x * 3i64;
} }
return z; return z;
} }
fn main() -> i64 { fn main() -> i64 {
let y = 2; let y = 2i64;
let z = y; let z = y;
return works(z); return works(z);
} }

View file

@ -53,8 +53,8 @@ pub enum LiteralValue {
String(String), String(String),
Integer { Integer {
value: String, value: String,
bits: Option<u32>, bits: u32,
signed: Option<bool>, signed: bool,
}, },
Boolean(bool), Boolean(bool),
} }

View file

@ -1,11 +1,14 @@
use crate::{ use crate::{
ast::{self, Statement}, ast::{self, Statement},
codegen::ProgramData, codegen::ProgramData,
lexer::LexicalError,
tokens::Token,
}; };
use annotate_snippets::{ use annotate_snippets::{
display_list::{DisplayList, FormatOptions}, display_list::{DisplayList, FormatOptions},
snippet::{Annotation, AnnotationType, Slice, Snippet, SourceAnnotation}, snippet::{Annotation, AnnotationType, Slice, Snippet, SourceAnnotation},
}; };
use lalrpop_util::ParseError;
#[derive(Debug)] #[derive(Debug)]
pub enum Check<'a> { pub enum Check<'a> {
@ -82,3 +85,65 @@ pub fn check<'a>(data: &'a ProgramData, ast: &ast::Program) -> Vec<Check<'a>> {
} }
errors errors
} }
pub fn print_error(source: &str, err: ParseError<usize, Token, LexicalError>) {
match err {
ParseError::InvalidToken { location } => {
let snippet = Snippet {
title: None,
footer: vec![],
slices: vec![Slice {
source,
line_start: 1,
fold: true,
origin: None,
annotations: vec![SourceAnnotation {
label: "invalid token",
annotation_type: AnnotationType::Error,
range: (location, location),
}],
}],
opt: FormatOptions {
color: true,
..Default::default()
},
};
let dl = DisplayList::from(snippet);
println!("{dl}");
}
ParseError::UnrecognizedEof { location, expected } => todo!(),
ParseError::UnrecognizedToken { token, expected } => todo!(),
ParseError::ExtraToken { token } => todo!(),
ParseError::User { error } => match error {
LexicalError::InvalidToken(err, range) => {
let title = format!("invalid token (lexical error): {:?}", err);
let snippet = Snippet {
title: Some(Annotation {
id: None,
label: Some(&title),
annotation_type: AnnotationType::Error,
}),
footer: vec![],
slices: vec![Slice {
source: dbg!(source),
line_start: 1,
fold: false,
origin: None,
annotations: vec![SourceAnnotation {
label: "invalid token (lexical error)",
annotation_type: AnnotationType::Error,
range: dbg!((range.start, range.end)),
}],
}],
opt: FormatOptions {
color: true,
..Default::default()
},
};
let dl = DisplayList::from(snippet);
println!("{dl}");
}
},
};
}

View file

@ -552,8 +552,8 @@ impl<'ctx> CodeGen<'ctx> {
bits, bits,
signed, signed,
} => { } => {
let bits = bits.unwrap_or(32); let bits = *bits;
let signed = signed.unwrap_or(true); let signed = *signed;
( (
self.context self.context
.custom_width_int_type(bits) .custom_width_int_type(bits)

View file

@ -24,6 +24,8 @@ extern {
"return" => Token::KeywordReturn, "return" => Token::KeywordReturn,
"fn" => Token::KeywordFn, "fn" => Token::KeywordFn,
"ptr" => Token::KeywordPtr, "ptr" => Token::KeywordPtr,
"_" => Token::KeywordUnderscore,
"(" => Token::LeftParen, "(" => Token::LeftParen,
")" => Token::RightParen, ")" => Token::RightParen,
"{" => Token::LeftBracket, "{" => Token::LeftBracket,
@ -151,10 +153,52 @@ Term: Box<ast::Expression> = {
"(" <Term> ")" "(" <Term> ")"
}; };
Number: ast::LiteralValue = <n:"int literal"> => ast::LiteralValue::Integer { Number: ast::LiteralValue = {
value: n, <n:"int literal"> "_"? "i8" => ast::LiteralValue::Integer {
bits: None, value: n,
signed: None bits: 8,
signed: true,
},
<n:"int literal"> "_"? "i16" => ast::LiteralValue::Integer {
value: n,
bits: 16,
signed: true,
},
<n:"int literal"> "_"? "i32" => ast::LiteralValue::Integer {
value: n,
bits: 32,
signed: true,
},
<n:"int literal"> "_"? "i64" => ast::LiteralValue::Integer {
value: n,
bits: 64,
signed: true,
},
<n:"int literal"> "_"? "u8" => ast::LiteralValue::Integer {
value: n,
bits: 8,
signed: false,
},
<n:"int literal"> "_"? "u16" => ast::LiteralValue::Integer {
value: n,
bits: 16,
signed: false,
},
<n:"int literal"> "_"? "u32" => ast::LiteralValue::Integer {
value: n,
bits: 32,
signed: false,
},
<n:"int literal"> "_"? "u64" => ast::LiteralValue::Integer {
value: n,
bits: 64,
signed: false,
},
<n:"int literal"> => ast::LiteralValue::Integer {
value: n,
bits: 32,
signed: true,
},
}; };
StringLit: ast::LiteralValue = <n:"string literal"> => ast::LiteralValue::String(n[1..(n.len()-1)].to_string()); StringLit: ast::LiteralValue = <n:"string literal"> => ast::LiteralValue::String(n[1..(n.len()-1)].to_string());

View file

@ -2,19 +2,21 @@ use std::{fmt::Display, ops::Range};
use logos::{Logos, SpannedIter}; use logos::{Logos, SpannedIter};
use crate::tokens::Token; use crate::tokens::{LexingError, Token};
pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>; pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum LexicalError { pub enum LexicalError {
InvalidToken(Range<usize>), InvalidToken(LexingError, Range<usize>),
} }
impl Display for LexicalError { impl Display for LexicalError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
LexicalError::InvalidToken(span) => write!(f, "lexical error at: {:?}", span), LexicalError::InvalidToken(err, span) => {
write!(f, "lexical error at ({:?}): {:?}", err, span)
}
} }
} }
} }
@ -39,7 +41,7 @@ impl<'input> Iterator for Lexer<'input> {
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.token_stream.next().map(|(token, span)| match token { self.token_stream.next().map(|(token, span)| match token {
Ok(token) => Ok((span.start, token, span.end)), Ok(token) => Ok((span.start, token, span.end)),
Err(()) => Err(LexicalError::InvalidToken(span)), Err(err) => Err(LexicalError::InvalidToken(err, span)),
}) })
} }
} }

View file

@ -1,5 +1,6 @@
#![allow(clippy::too_many_arguments)] #![allow(clippy::too_many_arguments)]
use check::print_error;
use clap::{Parser, Subcommand}; use clap::{Parser, Subcommand};
use codegen::ProgramData; use codegen::ProgramData;
use color_eyre::Result; use color_eyre::Result;
@ -101,7 +102,7 @@ fn main() -> Result<()> {
let lexer = Lexer::new(code.as_str()); let lexer = Lexer::new(code.as_str());
let parser = grammar::ProgramParser::new(); let parser = grammar::ProgramParser::new();
let mut ast = parser.parse(lexer)?; let mut ast = parser.parse(lexer)?;
type_analysis::type_inference(&mut ast); type_analysis::type_inference2(&mut ast);
let program = ProgramData::new(&input, &code); let program = ProgramData::new(&input, &code);
check_program(&program, &ast); check_program(&program, &ast);
} }
@ -109,9 +110,15 @@ fn main() -> Result<()> {
let code = fs::read_to_string(input)?; let code = fs::read_to_string(input)?;
let lexer = Lexer::new(code.as_str()); let lexer = Lexer::new(code.as_str());
let parser = grammar::ProgramParser::new(); let parser = grammar::ProgramParser::new();
let mut ast = parser.parse(lexer)?; match parser.parse(lexer) {
type_analysis::type_inference2(&mut ast); Ok(mut ast) => {
println!("{ast:#?}"); type_analysis::type_inference2(&mut ast);
println!("{ast:#?}");
}
Err(e) => {
print_error(&code, e);
}
}
} }
Commands::Compile { Commands::Compile {
input, input,
@ -123,7 +130,7 @@ fn main() -> Result<()> {
let lexer = Lexer::new(code.as_str()); let lexer = Lexer::new(code.as_str());
let parser = grammar::ProgramParser::new(); let parser = grammar::ProgramParser::new();
let mut ast: Program = parser.parse(lexer)?; let mut ast: Program = parser.parse(lexer)?;
type_analysis::type_inference(&mut ast); type_analysis::type_inference2(&mut ast);
let program = ProgramData::new(&input, &code); let program = ProgramData::new(&input, &code);

View file

@ -1,9 +1,30 @@
use logos::Logos; use logos::Logos;
use std::fmt; use std::{convert::Infallible, fmt};
// https://github.com/maciejhirsz/logos/issues/133
#[derive(Debug, PartialEq, Clone, Default)]
pub enum LexingError {
NumberParseError,
#[default]
Other,
}
impl From<std::num::ParseIntError> for LexingError {
fn from(_: std::num::ParseIntError) -> Self {
LexingError::NumberParseError
}
}
impl From<Infallible> for LexingError {
fn from(_: Infallible) -> Self {
LexingError::Other
}
}
// todo: https://github.com/maciejhirsz/logos/issues/133#issuecomment-619444615 // todo: https://github.com/maciejhirsz/logos/issues/133#issuecomment-619444615
#[derive(Logos, Debug, PartialEq, Clone)] #[derive(Logos, Debug, PartialEq, Clone)]
#[logos(skip r"[ \t\n\f]+", skip r"#.*\n?")] #[logos(error = LexingError, skip r"[ \t\n\f]+", skip r"#.*\n?")]
pub enum Token { pub enum Token {
#[token("let")] #[token("let")]
KeywordLet, KeywordLet,
@ -21,14 +42,16 @@ pub enum Token {
KeywordIf, KeywordIf,
#[token("else")] #[token("else")]
KeywordElse, KeywordElse,
#[token("_")]
KeywordUnderscore,
#[regex(r"_?\p{XID_Start}\p{XID_Continue}*", |lex| lex.slice().parse().ok())] #[regex(r"_?\p{XID_Start}\p{XID_Continue}*", |lex| lex.slice().to_string())]
Identifier(String), Identifier(String),
#[regex(r"\d+", |lex| lex.slice().parse().ok())] #[regex(r"\d+", |lex| lex.slice().to_string())]
Integer(String), Integer(String),
#[regex(r#""(?:[^"]|\\")*""#, |lex| lex.slice().to_string())] #[regex(r#""(?:[^"]|\\")*""#, |lex| lex.slice().to_string())]
String(String), String(String),
#[regex(r"(true|false)", |lex| lex.slice().parse().ok())] #[regex(r"(true|false)", |lex| lex.slice().parse::<bool>().unwrap())]
Boolean(bool), Boolean(bool),
#[token("bool")] #[token("bool")]

View file

@ -1,7 +1,5 @@
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use tracing::{info, warn};
use crate::ast::{self, Expression, Function, Statement, TypeExp}; use crate::ast::{self, Expression, Function, Statement, TypeExp};
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone, Default)]
@ -10,7 +8,18 @@ struct Storage {
functions: HashMap<String, Function>, functions: HashMap<String, Function>,
} }
/*
To briefly summarize the union-find algorithm, given the set of all types in a proof,
it allows one to group them together into equivalence classes by means of a union procedure and to
pick a representative for each such class using a find procedure. Emphasizing the word procedure in
the sense of side effect, we're clearly leaving the realm of logic in order to prepare an effective algorithm.
The representative of a u n i o n ( a , b ) {\mathtt {union}}(a,b) is determined such that, if both a and b are
type variables then the representative is arbitrarily one of them, but while uniting a variable and a term, the
term becomes the representative. Assuming an implementation of union-find at hand, one can formulate the unification of two monotypes as follows:
*/
// this works, but need to find a way to store the found info + handle literal integer types (or not?) // this works, but need to find a way to store the found info + handle literal integer types (or not?)
// maybe use scope ids
pub fn type_inference2(ast: &mut ast::Program) { pub fn type_inference2(ast: &mut ast::Program) {
let mut storage = Storage::default(); let mut storage = Storage::default();
@ -166,16 +175,10 @@ fn type_inference_expression(
value: _, value: _,
bits, bits,
signed, signed,
} => { } => Some(TypeExp::Integer {
if bits.is_some() && signed.is_some() { bits: *bits,
Some(TypeExp::Integer { signed: *signed,
bits: bits.unwrap(), }),
signed: signed.unwrap(),
})
} else {
None
}
}
ast::LiteralValue::Boolean(_) => Some(TypeExp::Boolean), ast::LiteralValue::Boolean(_) => Some(TypeExp::Boolean),
} }
} }
@ -230,318 +233,3 @@ fn type_inference_expression(
}, },
} }
} }
pub fn type_inference(ast: &mut ast::Program) {
let mut struct_cache: HashMap<String, HashMap<String, TypeExp>> = HashMap::new();
for statement in ast.statements.iter_mut() {
if let Statement::Struct(st) = statement {
let fields = st
.fields
.iter()
.map(|x| (x.ident.clone(), x.type_exp.clone()))
.collect();
struct_cache.insert(st.name.clone(), fields);
}
}
let mut fn_cache: HashMap<String, Function> = HashMap::new();
for statement in ast.statements.iter_mut() {
if let Statement::Function(function) = statement {
fn_cache.insert(function.name.clone(), function.clone());
}
}
for statement in ast.statements.iter_mut() {
if let Statement::Function(function) = statement {
let ret_type = function.return_type.clone();
let mut var_cache: HashMap<String, TypeExp> = HashMap::new();
for arg in &function.params {
var_cache.insert(arg.ident.clone(), arg.type_exp.clone());
}
if let Some(ret_type) = &ret_type {
let ret_type_exp = fn_return_type(function);
if let Some(exp) = ret_type_exp {
set_expression_type(exp, ret_type, &mut var_cache);
}
}
update_statements(&mut function.body, &mut var_cache, &fn_cache);
}
}
}
fn update_statements(
statements: &mut [Statement],
var_cache: &mut HashMap<String, TypeExp>,
fn_cache: &HashMap<String, Function>,
) {
let mut var_cache = var_cache.clone();
{
let mut let_or_mut: Vec<&mut Statement> = statements
.iter_mut()
.filter(|x| matches!(x, Statement::Let { .. } | Statement::Mutate { .. }))
.collect();
// process mutate first
for st in let_or_mut.iter_mut() {
if let Statement::Mutate {
name,
value,
value_type,
..
} = st
{
if let Some(value_type) = value_type {
// todo: check types matches?
var_cache.insert(name.clone(), value_type.clone());
set_expression_type(value, value_type, &mut var_cache);
} else {
// evalue the value expr first to find a possible type.
if var_cache.contains_key(name) {
*value_type = var_cache.get(name).cloned();
let mut env = Some(value_type.clone().unwrap());
set_exp_types_from_cache(value, &mut var_cache, &mut env, fn_cache);
} else {
// no type info?
}
}
}
}
// we need to process lets with a specified type first.
for st in let_or_mut.iter_mut() {
if let Statement::Let {
name,
value,
value_type,
..
} = st
{
if let Some(value_type) = value_type {
// todo: check types matches?
var_cache.insert(name.clone(), value_type.clone());
set_expression_type(value, value_type, &mut var_cache);
} else {
// evalue the value expr first to find a possible type.
if var_cache.contains_key(name) {
*value_type = var_cache.get(name).cloned();
let mut env = Some(value_type.clone().unwrap());
set_exp_types_from_cache(value, &mut var_cache, &mut env, fn_cache);
} else {
// no type info?
}
}
}
}
}
for st in statements.iter_mut() {
match st {
Statement::Let {
name,
value_type,
value,
..
} => {
// infer type if let has no type
if value_type.is_none() {
// evalue the value expr first to find a possible type.
let mut env = None;
set_exp_types_from_cache(value, &mut var_cache, &mut env, fn_cache);
// try to find if it was set on the cache
if var_cache.contains_key(name) {
*value_type = var_cache.get(name).cloned();
set_expression_type(value, value_type.as_ref().unwrap(), &mut var_cache);
} else {
// what here? no let type, no cache
println!("no cache let found")
}
}
}
Statement::Mutate {
name,
value_type,
value,
..
} => {
if let Some(value_type) = value_type {
// todo: check types matches?
var_cache.insert(name.clone(), value_type.clone());
set_expression_type(value, value_type, &mut var_cache);
} else {
// evalue the value expr first to find a possible type.
if var_cache.contains_key(name) {
*value_type = var_cache.get(name).cloned();
let mut env = Some(value_type.clone().unwrap());
set_exp_types_from_cache(value, &mut var_cache, &mut env, fn_cache);
} else {
// no type info?
}
}
}
Statement::If {
condition,
body,
else_body,
} => {
let mut env = None;
set_exp_types_from_cache(condition, &mut var_cache, &mut env, fn_cache);
update_statements(body, &mut var_cache, fn_cache);
if let Some(else_body) = else_body {
update_statements(else_body, &mut var_cache, fn_cache);
}
}
Statement::Return(exp) => {
if let Some(exp) = exp {
let mut env = None;
set_exp_types_from_cache(exp, &mut var_cache, &mut env, fn_cache);
}
}
Statement::Function(_) => unreachable!(),
Statement::Struct(_) => unreachable!(),
}
}
}
fn fn_return_type(func: &mut Function) -> Option<&mut Box<Expression>> {
for st in func.body.iter_mut() {
if let Statement::Return(r) = st {
return r.as_mut();
}
}
None
}
// set variables using the cache
fn set_exp_types_from_cache(
exp: &mut Expression,
var_cache: &mut HashMap<String, TypeExp>,
env: &mut Option<TypeExp>,
fn_cache: &HashMap<String, Function>,
) {
match exp {
Expression::Variable { name, value_type } => {
let name = name.value.clone();
if let Some(value_type) = value_type {
// todo: check types matches?
var_cache.insert(name, value_type.clone());
*env = Some(value_type.clone());
} else if var_cache.contains_key(&name) {
*value_type = var_cache.get(&name).cloned();
if env.is_none() {
*env = value_type.clone();
}
}
}
Expression::BinaryOp(lhs, op, rhs) => match op {
ast::OpCode::Eq | ast::OpCode::Ne => {
set_exp_types_from_cache(lhs, var_cache, env, fn_cache);
set_exp_types_from_cache(rhs, var_cache, env, fn_cache);
set_exp_types_from_cache(lhs, var_cache, env, fn_cache);
*env = Some(TypeExp::Boolean);
}
_ => {
set_exp_types_from_cache(lhs, var_cache, env, fn_cache);
set_exp_types_from_cache(rhs, var_cache, env, fn_cache);
set_exp_types_from_cache(lhs, var_cache, env, fn_cache); // needed in case 2 == x
}
},
Expression::Literal(lit) => match lit {
ast::LiteralValue::String(_) => {
warn!("found string, unimplemented")
}
ast::LiteralValue::Integer { bits, signed, .. } => {
if let Some(TypeExp::Integer {
bits: t_bits,
signed: t_signed,
}) = env
{
*bits = Some(*t_bits);
*signed = Some(*t_signed);
}
}
ast::LiteralValue::Boolean(_) => {
warn!("found bool, unimplemented")
}
},
Expression::Call {
function,
args,
value_type,
} => {
let fn_type = fn_cache.get(function).unwrap().clone();
match value_type {
Some(value_type) => *env = Some(value_type.clone()),
None => {
if env.is_some() {
let env = env.clone();
*value_type = env.clone();
} else {
*value_type = fn_type.return_type.clone();
*env = fn_type.return_type.clone();
}
}
}
for (i, arg) in args.iter_mut().enumerate() {
let mut env = Some(fn_type.params[i].type_exp.clone());
set_exp_types_from_cache(arg, var_cache, &mut env, fn_cache);
}
}
}
}
fn set_expression_type(
exp: &mut Expression,
expected_type: &TypeExp,
var_cache: &mut HashMap<String, TypeExp>,
) {
match exp {
Expression::Variable { name, value_type } => {
// if needed?
if value_type.is_none() {
*value_type = Some(expected_type.clone());
}
if !var_cache.contains_key(&name.value) {
var_cache.insert(name.value.clone(), expected_type.clone());
}
}
Expression::BinaryOp(lhs, op, rhs) => match op {
// ast::OpCode::Eq | ast::OpCode::Ne => {}
_ => {
set_expression_type(lhs, expected_type, var_cache);
set_expression_type(rhs, expected_type, var_cache);
}
},
Expression::Literal(lit) => match lit {
ast::LiteralValue::String(_) => {
warn!("found string, unimplemented")
}
ast::LiteralValue::Integer { bits, signed, .. } => {
if let TypeExp::Integer {
bits: t_bits,
signed: t_signed,
} = expected_type
{
*bits = Some(*t_bits);
*signed = Some(*t_signed);
}
}
ast::LiteralValue::Boolean(_) => {
warn!("found bool, unimplemented")
}
},
Expression::Call {
function: _,
args: _,
value_type,
} => {
*value_type = Some(expected_type.clone());
}
}
}