This commit is contained in:
Edgar 2023-05-14 18:11:38 +02:00
parent 1daeb22097
commit 04c3fdcd07
No known key found for this signature in database
GPG key ID: 70ADAE8F35904387
8 changed files with 200 additions and 146 deletions

View file

@ -1,9 +1,9 @@
fn add(a: i64, b: i64) -> i64 { fn add(a: i32, b: i32) -> i32 {
return a + b; return a + b;
} }
fn main() { fn main() -> i32 {
let x = 2 + 3; let x = 2 + 3;
let y = add(x, 4); let y = add(x, 4);
return; return y;
} }

5
simple.ed Normal file
View file

@ -0,0 +1,5 @@
fn main(x: i64) -> i64 {
let x = 2 + 3;
return x;
}

View file

@ -23,7 +23,7 @@ impl OpCode {
pub enum LiteralValue { pub enum LiteralValue {
String, String,
Integer { Integer {
bits: usize, bits: Option<u32>,
signed: bool, signed: bool,
value: String, value: String,
}, },

View file

@ -1,4 +1,8 @@
use std::collections::HashMap; use std::{
collections::HashMap,
path::{Path, PathBuf},
todo,
};
use color_eyre::Result; use color_eyre::Result;
use inkwell::{ use inkwell::{
@ -10,21 +14,19 @@ use inkwell::{
values::{BasicMetadataValueEnum, BasicValue, BasicValueEnum}, values::{BasicMetadataValueEnum, BasicValue, BasicValueEnum},
}; };
use itertools::{Either, Itertools}; use itertools::{Either, Itertools};
/*
use statement::Statement;
use crate::ast::{self, statement, Expression, Function, Identifier, OpCode, SpanValue, Term, LiteralValue}; use crate::ast::{self, Expression, Function, LiteralValue, OpCode, Statement};
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct ProgramData { pub struct ProgramData {
pub filename: String, pub filename: PathBuf,
pub source: String, pub source: String,
} }
impl ProgramData { impl ProgramData {
pub fn new(filename: &str, source: &str) -> Self { pub fn new(filename: &Path, source: &str) -> Self {
Self { Self {
filename: filename.to_string(), filename: filename.to_path_buf(),
source: source.to_string(), source: source.to_string(),
} }
} }
@ -65,9 +67,8 @@ impl<'ctx> CodeGen<'ctx> {
// create the llvm functions first. // create the llvm functions first.
for statement in &self.ast.statements { for statement in &self.ast.statements {
match &statement.value { match &statement {
Statement::Assignment(_) => unreachable!(), Statement::Variable { .. } => unreachable!(),
Statement::Definition(_) => todo!(),
Statement::Return(_) => unreachable!(), Statement::Return(_) => unreachable!(),
Statement::Function(function) => { Statement::Function(function) => {
functions.push(function); functions.push(function);
@ -106,7 +107,7 @@ impl<'ctx> CodeGen<'ctx> {
let args_types: Vec<BasicTypeEnum<'ctx>> = function let args_types: Vec<BasicTypeEnum<'ctx>> = function
.params .params
.iter() .iter()
.map(|param| param.type_name.0.value.as_str()) .map(|param| param.type_name.as_str())
.map(|t| self.get_llvm_type(t)) .map(|t| self.get_llvm_type(t))
.try_collect()?; .try_collect()?;
@ -114,18 +115,17 @@ impl<'ctx> CodeGen<'ctx> {
args_types.into_iter().map(|t| t.into()).collect_vec(); args_types.into_iter().map(|t| t.into()).collect_vec();
let fn_type = match &function.return_type { let fn_type = match &function.return_type {
Some(id) => self.get_llvm_type(&id.0.value)?.fn_type(&args_types, false), Some(id) => self.get_llvm_type(id)?.fn_type(&args_types, false),
None => self.context.void_type().fn_type(&args_types, false), None => self.context.void_type().fn_type(&args_types, false),
}; };
self.module self.module.add_function(&function.name, fn_type, None);
.add_function(&function.ident.0.value, fn_type, None);
Ok(()) Ok(())
} }
fn compile_function(&self, function: &Function) -> Result<()> { fn compile_function(&self, function: &Function) -> Result<()> {
let func = self.module.get_function(&function.ident.0.value).unwrap(); let func = self.module.get_function(&function.name).unwrap();
let entry_block = self.context.append_basic_block(func, "entry"); let entry_block = self.context.append_basic_block(func, "entry");
self.builder.position_at_end(entry_block); self.builder.position_at_end(entry_block);
@ -135,7 +135,7 @@ impl<'ctx> CodeGen<'ctx> {
for (i, param) in function.params.iter().enumerate() { for (i, param) in function.params.iter().enumerate() {
let id = param.ident.clone(); let id = param.ident.clone();
variables.insert( variables.insert(
id.0.value.clone(), id.clone(),
func.get_nth_param(i.try_into().unwrap()) func.get_nth_param(i.try_into().unwrap())
.expect("parameter"), .expect("parameter"),
); );
@ -145,7 +145,7 @@ impl<'ctx> CodeGen<'ctx> {
let mut has_return = false; let mut has_return = false;
for statement in &function.body { for statement in &function.body {
if let Statement::Return(_) = statement.value { if let Statement::Return(_) = statement {
has_return = true has_return = true
} }
self.compile_statement(&entry_block, statement, &mut variables)?; self.compile_statement(&entry_block, statement, &mut variables)?;
@ -161,24 +161,17 @@ impl<'ctx> CodeGen<'ctx> {
fn compile_statement( fn compile_statement(
&self, &self,
block: &BasicBlock, block: &BasicBlock,
statement: &SpanValue<Statement>, statement: &Statement,
variables: &mut HashMap<String, BasicValueEnum<'ctx>>, variables: &mut HashMap<String, BasicValueEnum<'ctx>>,
) -> Result<()> { ) -> Result<()> {
match &statement.value { match statement {
// Variable assignment // Variable assignment
Statement::Assignment(body) => { Statement::Variable { name, value } => {
let result = self let result = self
.compile_expression(block, &body.expr, variables)? .compile_expression(block, value, variables)?
.expect("should have result"); .expect("should have result");
variables.insert(body.ident.0.value.clone(), result); variables.insert(name.clone(), result);
}
Statement::Definition(body) => {
let result = self
.compile_expression(block, &body.expr, variables)?
.expect("should have result");
variables.insert(body.ident.0.value.clone(), result);
} }
Statement::Return(ret) => { Statement::Return(ret) => {
if let Some(ret) = ret { if let Some(ret) = ret {
@ -199,24 +192,28 @@ impl<'ctx> CodeGen<'ctx> {
pub fn compile_expression( pub fn compile_expression(
&self, &self,
block: &BasicBlock, block: &BasicBlock,
expr: &SpanValue<Box<Expression>>, expr: &Expression,
variables: &mut HashMap<String, BasicValueEnum<'ctx>>, variables: &mut HashMap<String, BasicValueEnum<'ctx>>,
) -> Result<Option<BasicValueEnum<'ctx>>> { ) -> Result<Option<BasicValueEnum<'ctx>>> {
Ok(match &*expr.value { Ok(match expr {
Expression::Literal(term) => Some(self.compile_term(&term, variables)?), Expression::Variable(term) => Some(self.compile_variable(term, variables)?),
Expression::Call(func_id, args) => self.compile_call(block, func_id, args, variables)?, Expression::Literal(term) => Some(self.compile_literal(term)?),
Expression::BinaryOp(lhs, op, rhs) => Some(self.compile_op(block, lhs, op, rhs, variables)?), Expression::Call { function, args } => {
self.compile_call(block, function, args, variables)?
}
Expression::BinaryOp(lhs, op, rhs) => {
Some(self.compile_binary_op(block, lhs, op, rhs, variables)?)
}
}) })
} }
pub fn compile_call( pub fn compile_call(
&self, &self,
block: &BasicBlock, block: &BasicBlock,
func_id: &Identifier, func_name: &str,
args: &[SpanValue<Box<Expression>>], args: &[Box<Expression>],
variables: &mut HashMap<String, BasicValueEnum<'ctx>>, variables: &mut HashMap<String, BasicValueEnum<'ctx>>,
) -> Result<Option<BasicValueEnum<'ctx>>> { ) -> Result<Option<BasicValueEnum<'ctx>>> {
let func_name = &func_id.0.value;
let function = self.module.get_function(func_name).expect("should exist"); let function = self.module.get_function(func_name).expect("should exist");
let mut value_args: Vec<BasicMetadataValueEnum> = Vec::with_capacity(args.len()); let mut value_args: Vec<BasicMetadataValueEnum> = Vec::with_capacity(args.len());
@ -239,12 +236,12 @@ impl<'ctx> CodeGen<'ctx> {
}) })
} }
pub fn compile_op( pub fn compile_binary_op(
&self, &self,
block: &BasicBlock, block: &BasicBlock,
lhs: &SpanValue<Box<Expression>>, lhs: &Expression,
op: &OpCode, op: &OpCode,
rhs: &SpanValue<Box<Expression>>, rhs: &Expression,
variables: &mut HashMap<String, BasicValueEnum<'ctx>>, variables: &mut HashMap<String, BasicValueEnum<'ctx>>,
) -> Result<BasicValueEnum<'ctx>> { ) -> Result<BasicValueEnum<'ctx>> {
let lhs = self let lhs = self
@ -267,21 +264,33 @@ impl<'ctx> CodeGen<'ctx> {
Ok(result.as_basic_value_enum()) Ok(result.as_basic_value_enum())
} }
pub fn compile_term( pub fn compile_literal(&self, term: &LiteralValue) -> Result<BasicValueEnum<'ctx>> {
&self,
term: &LiteralValue,
variables: &mut HashMap<String, BasicValueEnum<'ctx>>,
) -> Result<BasicValueEnum<'ctx>> {
let value = match term { let value = match term {
Term::Identifier(ident) => *variables.get(&ident.0.value).expect("value"), LiteralValue::String => todo!(),
Term::Number(num) => self LiteralValue::Integer {
.context bits,
.i64_type() signed: _,
.const_int(num.0.value.try_into()?, true) value,
.as_basic_value_enum(), } => {
// todo: type resolution for bit size?
let bits = bits.unwrap_or(32);
self.context
.custom_width_int_type(bits)
.const_int(value.parse().unwrap(), false)
.as_basic_value_enum()
}
}; };
Ok(value) Ok(value)
} }
pub fn compile_variable(
&self,
variable: &str,
variables: &mut HashMap<String, BasicValueEnum<'ctx>>,
) -> Result<BasicValueEnum<'ctx>> {
let var = *variables.get(variable).expect("value");
Ok(var)
}
} }
*/

View file

@ -1,8 +1,40 @@
use std::str::FromStr; use std::str::FromStr;
use crate::ast::*; use crate::{
ast,
tokens::Token,
lexer::LexicalError,
};
grammar; grammar;
extern {
type Location = usize;
type Error = LexicalError;
enum Token {
"let" => Token::KeywordLet,
"print" => Token::KeywordPrint,
"identifier" => Token::Identifier(<String>),
"int" => Token::Integer(<String>),
"return" => Token::KeywordReturn,
"fn" => Token::KeywordFn,
"(" => Token::LeftParen,
")" => Token::RightParen,
"{" => Token::LeftBracket,
"}" => Token::RightBracket,
"=" => Token::Assign,
";" => Token::Semicolon,
":" => Token::Colon,
"->" => Token::Arrow,
"," => Token::Coma,
"+" => Token::OperatorAdd,
"-" => Token::OperatorSub,
"*" => Token::OperatorMul,
"/" => Token::OperatorDiv,
"%" => Token::OperatorRem,
}
}
Comma<T>: Vec<T> = { Comma<T>: Vec<T> = {
<mut v:(<T> ",")*> <e:T?> => match e { <mut v:(<T> ",")*> <e:T?> => match e {
None => v, None => v,
@ -13,67 +45,11 @@ Comma<T>: Vec<T> = {
} }
}; };
ExprOp: OpCode = { pub Program: ast::Program = {
"+" => OpCode::Add, Statements => ast::Program::new(<>)
"-" => OpCode::Sub,
};
FactorOp: OpCode = {
"*" => OpCode::Mul,
"/" => OpCode::Div,
"%" => OpCode::Rem,
} }
Tier<Op,NextTier>: Box<Expression> = { Statements: Vec<ast::Statement> = {
<t:Tier<Op,NextTier>> <o:Op> <n:NextTier> => Box::new(Expression::BinaryOp(t, o, n)),
NextTier
};
Expr = Tier<ExprOp, Factor>;
Factor = Tier<FactorOp, Term>;
// Terms: variables, literals, calls
Term: Box<Expression> = {
<i:Identifier> => Box::new(Expression::Variable(i)),
<n:Num> => Box::new(Expression::Literal(n)),
<i:Identifier> "(" <values:Comma<Term>> ")" => Box::new(Expression::Call { function: i, args: values}),
"(" <Term> ")"
};
Identifier: String = {
<i:r"[a-zA-Z][\w]*"> => i.to_string(),
};
Num: LiteralValue = <n:r"[0-9]+"> => LiteralValue::Integer { bits: 32, signed: true, value: n.to_string()};
// Function handling
Param: Parameter = {
<Identifier> ":" <Identifier> => Parameter::new(<>)
};
Params = Comma<Param>;
FunctionReturn: String = {
"->" <i:Identifier> => i.to_string(),
}
Function: Function = {
"fn" <i:Identifier> "(" <a:Params> ")" <r:FunctionReturn?> "{" <s:Statements> "}" => Function::new(i, a, s, r)
}
// statements not including function definitions
BasicStatement: Statement = {
"let" <i:Identifier> "=" <e:Expr> ";" => Statement::Variable { name: i, value: e},
<i:Identifier> "=" <e:Expr> ";" => Statement::Variable { name: i, value: e},
"return" <e:Expr?> ";" => Statement::Return(e),
};
Statement: Statement = {
BasicStatement,
<f:Function> => Statement::Function(f),
};
Statements: Vec<Statement> = {
Statement => vec![<>], Statement => vec![<>],
<mut s:Statements> <n:Statement> => { <mut s:Statements> <n:Statement> => {
s.push(n); s.push(n);
@ -81,6 +57,58 @@ Statements: Vec<Statement> = {
}, },
}; };
pub Program: Program = { Statement: ast::Statement = {
Statements => Program::new(<>) BasicStatement,
<f:Function> => ast::Statement::Function(f),
};
// statements not including function definitions
BasicStatement: ast::Statement = {
"let" <i:"identifier"> "=" <e:Expr> ";" => ast::Statement::Variable { name: i, value: e},
<i:"identifier"> "=" <e:Expr> ";" => ast::Statement::Variable { name: i, value: e},
"return" <e:Expr?> ";" => ast::Statement::Return(e),
};
ExprOp: ast::OpCode = {
"+" => ast::OpCode::Add,
"-" => ast::OpCode::Sub,
};
FactorOp: ast::OpCode = {
"*" => ast::OpCode::Mul,
"/" => ast::OpCode::Div,
"%" => ast::OpCode::Rem,
}
Tier<Op,NextTier>: Box<ast::Expression> = {
<t:Tier<Op,NextTier>> <o:Op> <n:NextTier> => Box::new(ast::Expression::BinaryOp(t, o, n)),
NextTier
};
Expr = Tier<ExprOp, Factor>;
Factor = Tier<FactorOp, Term>;
// Terms: variables, literals, calls
Term: Box<ast::Expression> = {
<i:"identifier"> => Box::new(ast::Expression::Variable(i)),
<n:Num> => Box::new(ast::Expression::Literal(n)),
<i:"identifier"> "(" <values:Comma<Term>> ")" => Box::new(ast::Expression::Call { function: i, args: values}),
"(" <Term> ")"
};
Num: ast::LiteralValue = <n:"int"> => ast::LiteralValue::Integer { bits: None, signed: true, value: n.to_string()};
// Function handling
Param: ast::Parameter = {
<"identifier"> ":" <"identifier"> => ast::Parameter::new(<>)
};
Params = Comma<Param>;
FunctionReturn: String = {
"->" <i:"identifier"> => i.to_string(),
}
Function: ast::Function = {
"fn" <i:"identifier"> "(" <a:Params> ")" <r:FunctionReturn?> "{" <s:Statements> "}" => ast::Function::new(i, a, s, r)
} }

View file

@ -4,6 +4,7 @@ use crate::tokens::Token;
pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>; pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
#[derive(Debug, Clone, Copy)]
pub enum LexicalError { pub enum LexicalError {
InvalidToken, InvalidToken,
} }

View file

@ -1,11 +1,14 @@
#![allow(clippy::too_many_arguments)] #![allow(clippy::too_many_arguments)]
use clap::{Parser, Subcommand}; use clap::{Parser, Subcommand};
use codegen::ProgramData;
use color_eyre::Result; use color_eyre::Result;
use inkwell::{context::Context, execution_engine::JitFunction, OptimizationLevel}; use inkwell::{context::Context, execution_engine::JitFunction, OptimizationLevel};
use lalrpop_util::lalrpop_mod; use lalrpop_util::lalrpop_mod;
use std::{fs, path::PathBuf, println}; use std::{fs, path::PathBuf, println};
use crate::{ast::Program, lexer::Lexer};
pub mod ast; pub mod ast;
pub mod check; pub mod check;
pub mod codegen; pub mod codegen;
@ -90,12 +93,12 @@ fn main() -> Result<()> {
match args.command { match args.command {
Commands::Check { input } => { Commands::Check { input } => {
let code = fs::read_to_string(&input)?; let code = fs::read_to_string(input)?;
let lexer = Lexer::new(code.as_str());
let parser = grammar::ProgramParser::new(); let parser = grammar::ProgramParser::new();
let ast = parser.parse(&code).unwrap(); let ast = parser.parse(lexer).unwrap();
let str_path = input.to_string_lossy(); //let str_path = input.to_string_lossy();
//let program = ProgramData::new(&str_path, &code); //let program = ProgramData::new(&str_path, &code);
//check_program(&program, &ast); //check_program(&program, &ast);
} }
@ -106,21 +109,17 @@ fn main() -> Result<()> {
optimize: _, optimize: _,
} => { } => {
let code = fs::read_to_string(&input)?; let code = fs::read_to_string(&input)?;
let lexer = Lexer::new(code.as_str());
let parser = grammar::ProgramParser::new(); let parser = grammar::ProgramParser::new();
let ast = parser.parse(&code).unwrap(); let ast: Program = parser.parse(lexer).unwrap();
println!("{:#?}", ast); let program = ProgramData::new(&input, &code);
/*
let str_path = input.to_string_lossy();
let program = ProgramData::new(&str_path, &code);
let file_name = input.file_name().unwrap().to_string_lossy(); let file_name = input.file_name().unwrap().to_string_lossy();
if !check_program(&program, &ast) { //if !check_program(&program, &ast) {
return Ok(()); // return Ok(());
} //}
let context = Context::create(); let context = Context::create();
let codegen = codegen::CodeGen::new(&context, &file_name, program, ast)?; let codegen = codegen::CodeGen::new(&context, &file_name, program, ast)?;
@ -132,16 +131,14 @@ fn main() -> Result<()> {
} else { } else {
println!("{generated_llvm_ir}"); println!("{generated_llvm_ir}");
} }
*/
} }
Commands::Run { input } => { Commands::Run { input } => {
let code = fs::read_to_string(&input)?; let code = fs::read_to_string(&input)?;
let lexer = Lexer::new(&code[..]);
let parser = grammar::ProgramParser::new(); let parser = grammar::ProgramParser::new();
let ast = parser.parse(&code).unwrap(); let ast = parser.parse(lexer).unwrap();
/*
let str_path = input.to_string_lossy(); let program = ProgramData::new(&input, &code);
let program = ProgramData::new(&str_path, &code);
let file_name = input.file_name().unwrap().to_string_lossy(); let file_name = input.file_name().unwrap().to_string_lossy();
@ -158,7 +155,6 @@ fn main() -> Result<()> {
execution_engine.get_function("main")?; execution_engine.get_function("main")?;
main.call(); main.call();
}; };
*/
} }
} }

View file

@ -1,27 +1,42 @@
use logos::Logos; use logos::Logos;
use std::fmt; use std::fmt;
#[derive(Logos, Debug, PartialEq)] // todo: https://github.com/maciejhirsz/logos/issues/133#issuecomment-619444615
#[derive(Logos, Debug, PartialEq, Clone)]
#[logos(skip r"[ \t\n\f]+", skip r"#.*\n?")] #[logos(skip r"[ \t\n\f]+", skip r"#.*\n?")]
pub enum Token { pub enum Token {
#[token("var")] #[token("let")]
KeywordVar, KeywordLet,
#[token("print")] #[token("print")]
KeywordPrint, KeywordPrint,
#[token("fn")]
KeywordFn,
#[token("return")]
KeywordReturn,
#[regex("[_a-zA-Z][_0-9a-zA-Z]*", |lex| lex.slice().parse().ok())] #[regex(r"_?\p{XID_Start}\p{XID_Continue}*", |lex| lex.slice().parse().ok())]
Identifier(String), Identifier(String),
#[regex(r"\d+", |lex| lex.slice().parse().ok())] #[regex(r"\d+", |lex| lex.slice().parse().ok())]
Integer(i64), Integer(String),
#[token("(")] #[token("(")]
LParen, LeftParen,
#[token(")")] #[token(")")]
RParen, RightParen,
#[token("{")]
LeftBracket,
#[token("}")]
RightBracket,
#[token("=")] #[token("=")]
Assign, Assign,
#[token(";")] #[token(";")]
Semicolon, Semicolon,
#[token(":")]
Colon,
#[token("->")]
Arrow,
#[token(",")]
Coma,
#[token("+")] #[token("+")]
OperatorAdd, OperatorAdd,