This commit is contained in:
Edgar 2023-05-14 18:11:38 +02:00
parent 1daeb22097
commit 04c3fdcd07
No known key found for this signature in database
GPG key ID: 70ADAE8F35904387
8 changed files with 200 additions and 146 deletions

View file

@ -1,9 +1,9 @@
fn add(a: i64, b: i64) -> i64 {
fn add(a: i32, b: i32) -> i32 {
return a + b;
}
fn main() {
fn main() -> i32 {
let x = 2 + 3;
let y = add(x, 4);
return;
return y;
}

5
simple.ed Normal file
View file

@ -0,0 +1,5 @@
fn main(x: i64) -> i64 {
let x = 2 + 3;
return x;
}

View file

@ -23,7 +23,7 @@ impl OpCode {
pub enum LiteralValue {
String,
Integer {
bits: usize,
bits: Option<u32>,
signed: bool,
value: String,
},

View file

@ -1,4 +1,8 @@
use std::collections::HashMap;
use std::{
collections::HashMap,
path::{Path, PathBuf},
todo,
};
use color_eyre::Result;
use inkwell::{
@ -10,21 +14,19 @@ use inkwell::{
values::{BasicMetadataValueEnum, BasicValue, BasicValueEnum},
};
use itertools::{Either, Itertools};
/*
use statement::Statement;
use crate::ast::{self, statement, Expression, Function, Identifier, OpCode, SpanValue, Term, LiteralValue};
use crate::ast::{self, Expression, Function, LiteralValue, OpCode, Statement};
#[derive(Debug, Clone)]
pub struct ProgramData {
pub filename: String,
pub filename: PathBuf,
pub source: String,
}
impl ProgramData {
pub fn new(filename: &str, source: &str) -> Self {
pub fn new(filename: &Path, source: &str) -> Self {
Self {
filename: filename.to_string(),
filename: filename.to_path_buf(),
source: source.to_string(),
}
}
@ -65,9 +67,8 @@ impl<'ctx> CodeGen<'ctx> {
// create the llvm functions first.
for statement in &self.ast.statements {
match &statement.value {
Statement::Assignment(_) => unreachable!(),
Statement::Definition(_) => todo!(),
match &statement {
Statement::Variable { .. } => unreachable!(),
Statement::Return(_) => unreachable!(),
Statement::Function(function) => {
functions.push(function);
@ -106,7 +107,7 @@ impl<'ctx> CodeGen<'ctx> {
let args_types: Vec<BasicTypeEnum<'ctx>> = function
.params
.iter()
.map(|param| param.type_name.0.value.as_str())
.map(|param| param.type_name.as_str())
.map(|t| self.get_llvm_type(t))
.try_collect()?;
@ -114,18 +115,17 @@ impl<'ctx> CodeGen<'ctx> {
args_types.into_iter().map(|t| t.into()).collect_vec();
let fn_type = match &function.return_type {
Some(id) => self.get_llvm_type(&id.0.value)?.fn_type(&args_types, false),
Some(id) => self.get_llvm_type(id)?.fn_type(&args_types, false),
None => self.context.void_type().fn_type(&args_types, false),
};
self.module
.add_function(&function.ident.0.value, fn_type, None);
self.module.add_function(&function.name, fn_type, None);
Ok(())
}
fn compile_function(&self, function: &Function) -> Result<()> {
let func = self.module.get_function(&function.ident.0.value).unwrap();
let func = self.module.get_function(&function.name).unwrap();
let entry_block = self.context.append_basic_block(func, "entry");
self.builder.position_at_end(entry_block);
@ -135,7 +135,7 @@ impl<'ctx> CodeGen<'ctx> {
for (i, param) in function.params.iter().enumerate() {
let id = param.ident.clone();
variables.insert(
id.0.value.clone(),
id.clone(),
func.get_nth_param(i.try_into().unwrap())
.expect("parameter"),
);
@ -145,7 +145,7 @@ impl<'ctx> CodeGen<'ctx> {
let mut has_return = false;
for statement in &function.body {
if let Statement::Return(_) = statement.value {
if let Statement::Return(_) = statement {
has_return = true
}
self.compile_statement(&entry_block, statement, &mut variables)?;
@ -161,24 +161,17 @@ impl<'ctx> CodeGen<'ctx> {
fn compile_statement(
&self,
block: &BasicBlock,
statement: &SpanValue<Statement>,
statement: &Statement,
variables: &mut HashMap<String, BasicValueEnum<'ctx>>,
) -> Result<()> {
match &statement.value {
match statement {
// Variable assignment
Statement::Assignment(body) => {
Statement::Variable { name, value } => {
let result = self
.compile_expression(block, &body.expr, variables)?
.compile_expression(block, value, variables)?
.expect("should have result");
variables.insert(body.ident.0.value.clone(), result);
}
Statement::Definition(body) => {
let result = self
.compile_expression(block, &body.expr, variables)?
.expect("should have result");
variables.insert(body.ident.0.value.clone(), result);
variables.insert(name.clone(), result);
}
Statement::Return(ret) => {
if let Some(ret) = ret {
@ -199,24 +192,28 @@ impl<'ctx> CodeGen<'ctx> {
pub fn compile_expression(
&self,
block: &BasicBlock,
expr: &SpanValue<Box<Expression>>,
expr: &Expression,
variables: &mut HashMap<String, BasicValueEnum<'ctx>>,
) -> Result<Option<BasicValueEnum<'ctx>>> {
Ok(match &*expr.value {
Expression::Literal(term) => Some(self.compile_term(&term, variables)?),
Expression::Call(func_id, args) => self.compile_call(block, func_id, args, variables)?,
Expression::BinaryOp(lhs, op, rhs) => Some(self.compile_op(block, lhs, op, rhs, variables)?),
Ok(match expr {
Expression::Variable(term) => Some(self.compile_variable(term, variables)?),
Expression::Literal(term) => Some(self.compile_literal(term)?),
Expression::Call { function, args } => {
self.compile_call(block, function, args, variables)?
}
Expression::BinaryOp(lhs, op, rhs) => {
Some(self.compile_binary_op(block, lhs, op, rhs, variables)?)
}
})
}
pub fn compile_call(
&self,
block: &BasicBlock,
func_id: &Identifier,
args: &[SpanValue<Box<Expression>>],
func_name: &str,
args: &[Box<Expression>],
variables: &mut HashMap<String, BasicValueEnum<'ctx>>,
) -> Result<Option<BasicValueEnum<'ctx>>> {
let func_name = &func_id.0.value;
let function = self.module.get_function(func_name).expect("should exist");
let mut value_args: Vec<BasicMetadataValueEnum> = Vec::with_capacity(args.len());
@ -239,12 +236,12 @@ impl<'ctx> CodeGen<'ctx> {
})
}
pub fn compile_op(
pub fn compile_binary_op(
&self,
block: &BasicBlock,
lhs: &SpanValue<Box<Expression>>,
lhs: &Expression,
op: &OpCode,
rhs: &SpanValue<Box<Expression>>,
rhs: &Expression,
variables: &mut HashMap<String, BasicValueEnum<'ctx>>,
) -> Result<BasicValueEnum<'ctx>> {
let lhs = self
@ -267,21 +264,33 @@ impl<'ctx> CodeGen<'ctx> {
Ok(result.as_basic_value_enum())
}
pub fn compile_term(
&self,
term: &LiteralValue,
variables: &mut HashMap<String, BasicValueEnum<'ctx>>,
) -> Result<BasicValueEnum<'ctx>> {
pub fn compile_literal(&self, term: &LiteralValue) -> Result<BasicValueEnum<'ctx>> {
let value = match term {
Term::Identifier(ident) => *variables.get(&ident.0.value).expect("value"),
Term::Number(num) => self
.context
.i64_type()
.const_int(num.0.value.try_into()?, true)
.as_basic_value_enum(),
LiteralValue::String => todo!(),
LiteralValue::Integer {
bits,
signed: _,
value,
} => {
// todo: type resolution for bit size?
let bits = bits.unwrap_or(32);
self.context
.custom_width_int_type(bits)
.const_int(value.parse().unwrap(), false)
.as_basic_value_enum()
}
};
Ok(value)
}
pub fn compile_variable(
&self,
variable: &str,
variables: &mut HashMap<String, BasicValueEnum<'ctx>>,
) -> Result<BasicValueEnum<'ctx>> {
let var = *variables.get(variable).expect("value");
Ok(var)
}
}
*/

View file

@ -1,8 +1,40 @@
use std::str::FromStr;
use crate::ast::*;
use crate::{
ast,
tokens::Token,
lexer::LexicalError,
};
grammar;
extern {
type Location = usize;
type Error = LexicalError;
enum Token {
"let" => Token::KeywordLet,
"print" => Token::KeywordPrint,
"identifier" => Token::Identifier(<String>),
"int" => Token::Integer(<String>),
"return" => Token::KeywordReturn,
"fn" => Token::KeywordFn,
"(" => Token::LeftParen,
")" => Token::RightParen,
"{" => Token::LeftBracket,
"}" => Token::RightBracket,
"=" => Token::Assign,
";" => Token::Semicolon,
":" => Token::Colon,
"->" => Token::Arrow,
"," => Token::Coma,
"+" => Token::OperatorAdd,
"-" => Token::OperatorSub,
"*" => Token::OperatorMul,
"/" => Token::OperatorDiv,
"%" => Token::OperatorRem,
}
}
Comma<T>: Vec<T> = {
<mut v:(<T> ",")*> <e:T?> => match e {
None => v,
@ -13,67 +45,11 @@ Comma<T>: Vec<T> = {
}
};
ExprOp: OpCode = {
"+" => OpCode::Add,
"-" => OpCode::Sub,
};
FactorOp: OpCode = {
"*" => OpCode::Mul,
"/" => OpCode::Div,
"%" => OpCode::Rem,
pub Program: ast::Program = {
Statements => ast::Program::new(<>)
}
Tier<Op,NextTier>: Box<Expression> = {
<t:Tier<Op,NextTier>> <o:Op> <n:NextTier> => Box::new(Expression::BinaryOp(t, o, n)),
NextTier
};
Expr = Tier<ExprOp, Factor>;
Factor = Tier<FactorOp, Term>;
// Terms: variables, literals, calls
Term: Box<Expression> = {
<i:Identifier> => Box::new(Expression::Variable(i)),
<n:Num> => Box::new(Expression::Literal(n)),
<i:Identifier> "(" <values:Comma<Term>> ")" => Box::new(Expression::Call { function: i, args: values}),
"(" <Term> ")"
};
Identifier: String = {
<i:r"[a-zA-Z][\w]*"> => i.to_string(),
};
Num: LiteralValue = <n:r"[0-9]+"> => LiteralValue::Integer { bits: 32, signed: true, value: n.to_string()};
// Function handling
Param: Parameter = {
<Identifier> ":" <Identifier> => Parameter::new(<>)
};
Params = Comma<Param>;
FunctionReturn: String = {
"->" <i:Identifier> => i.to_string(),
}
Function: Function = {
"fn" <i:Identifier> "(" <a:Params> ")" <r:FunctionReturn?> "{" <s:Statements> "}" => Function::new(i, a, s, r)
}
// statements not including function definitions
BasicStatement: Statement = {
"let" <i:Identifier> "=" <e:Expr> ";" => Statement::Variable { name: i, value: e},
<i:Identifier> "=" <e:Expr> ";" => Statement::Variable { name: i, value: e},
"return" <e:Expr?> ";" => Statement::Return(e),
};
Statement: Statement = {
BasicStatement,
<f:Function> => Statement::Function(f),
};
Statements: Vec<Statement> = {
Statements: Vec<ast::Statement> = {
Statement => vec![<>],
<mut s:Statements> <n:Statement> => {
s.push(n);
@ -81,6 +57,58 @@ Statements: Vec<Statement> = {
},
};
pub Program: Program = {
Statements => Program::new(<>)
Statement: ast::Statement = {
BasicStatement,
<f:Function> => ast::Statement::Function(f),
};
// statements not including function definitions
BasicStatement: ast::Statement = {
"let" <i:"identifier"> "=" <e:Expr> ";" => ast::Statement::Variable { name: i, value: e},
<i:"identifier"> "=" <e:Expr> ";" => ast::Statement::Variable { name: i, value: e},
"return" <e:Expr?> ";" => ast::Statement::Return(e),
};
ExprOp: ast::OpCode = {
"+" => ast::OpCode::Add,
"-" => ast::OpCode::Sub,
};
FactorOp: ast::OpCode = {
"*" => ast::OpCode::Mul,
"/" => ast::OpCode::Div,
"%" => ast::OpCode::Rem,
}
Tier<Op,NextTier>: Box<ast::Expression> = {
<t:Tier<Op,NextTier>> <o:Op> <n:NextTier> => Box::new(ast::Expression::BinaryOp(t, o, n)),
NextTier
};
Expr = Tier<ExprOp, Factor>;
Factor = Tier<FactorOp, Term>;
// Terms: variables, literals, calls
Term: Box<ast::Expression> = {
<i:"identifier"> => Box::new(ast::Expression::Variable(i)),
<n:Num> => Box::new(ast::Expression::Literal(n)),
<i:"identifier"> "(" <values:Comma<Term>> ")" => Box::new(ast::Expression::Call { function: i, args: values}),
"(" <Term> ")"
};
Num: ast::LiteralValue = <n:"int"> => ast::LiteralValue::Integer { bits: None, signed: true, value: n.to_string()};
// Function handling
Param: ast::Parameter = {
<"identifier"> ":" <"identifier"> => ast::Parameter::new(<>)
};
Params = Comma<Param>;
FunctionReturn: String = {
"->" <i:"identifier"> => i.to_string(),
}
Function: ast::Function = {
"fn" <i:"identifier"> "(" <a:Params> ")" <r:FunctionReturn?> "{" <s:Statements> "}" => ast::Function::new(i, a, s, r)
}

View file

@ -4,6 +4,7 @@ use crate::tokens::Token;
pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
#[derive(Debug, Clone, Copy)]
pub enum LexicalError {
InvalidToken,
}

View file

@ -1,11 +1,14 @@
#![allow(clippy::too_many_arguments)]
use clap::{Parser, Subcommand};
use codegen::ProgramData;
use color_eyre::Result;
use inkwell::{context::Context, execution_engine::JitFunction, OptimizationLevel};
use lalrpop_util::lalrpop_mod;
use std::{fs, path::PathBuf, println};
use crate::{ast::Program, lexer::Lexer};
pub mod ast;
pub mod check;
pub mod codegen;
@ -90,12 +93,12 @@ fn main() -> Result<()> {
match args.command {
Commands::Check { input } => {
let code = fs::read_to_string(&input)?;
let code = fs::read_to_string(input)?;
let lexer = Lexer::new(code.as_str());
let parser = grammar::ProgramParser::new();
let ast = parser.parse(&code).unwrap();
let ast = parser.parse(lexer).unwrap();
let str_path = input.to_string_lossy();
//let str_path = input.to_string_lossy();
//let program = ProgramData::new(&str_path, &code);
//check_program(&program, &ast);
}
@ -106,21 +109,17 @@ fn main() -> Result<()> {
optimize: _,
} => {
let code = fs::read_to_string(&input)?;
let lexer = Lexer::new(code.as_str());
let parser = grammar::ProgramParser::new();
let ast = parser.parse(&code).unwrap();
let ast: Program = parser.parse(lexer).unwrap();
println!("{:#?}", ast);
/*
let str_path = input.to_string_lossy();
let program = ProgramData::new(&str_path, &code);
let program = ProgramData::new(&input, &code);
let file_name = input.file_name().unwrap().to_string_lossy();
if !check_program(&program, &ast) {
return Ok(());
}
//if !check_program(&program, &ast) {
// return Ok(());
//}
let context = Context::create();
let codegen = codegen::CodeGen::new(&context, &file_name, program, ast)?;
@ -132,16 +131,14 @@ fn main() -> Result<()> {
} else {
println!("{generated_llvm_ir}");
}
*/
}
Commands::Run { input } => {
let code = fs::read_to_string(&input)?;
let lexer = Lexer::new(&code[..]);
let parser = grammar::ProgramParser::new();
let ast = parser.parse(&code).unwrap();
/*
let str_path = input.to_string_lossy();
let program = ProgramData::new(&str_path, &code);
let ast = parser.parse(lexer).unwrap();
let program = ProgramData::new(&input, &code);
let file_name = input.file_name().unwrap().to_string_lossy();
@ -158,7 +155,6 @@ fn main() -> Result<()> {
execution_engine.get_function("main")?;
main.call();
};
*/
}
}

View file

@ -1,27 +1,42 @@
use logos::Logos;
use std::fmt;
#[derive(Logos, Debug, PartialEq)]
// todo: https://github.com/maciejhirsz/logos/issues/133#issuecomment-619444615
#[derive(Logos, Debug, PartialEq, Clone)]
#[logos(skip r"[ \t\n\f]+", skip r"#.*\n?")]
pub enum Token {
#[token("var")]
KeywordVar,
#[token("let")]
KeywordLet,
#[token("print")]
KeywordPrint,
#[token("fn")]
KeywordFn,
#[token("return")]
KeywordReturn,
#[regex("[_a-zA-Z][_0-9a-zA-Z]*", |lex| lex.slice().parse().ok())]
#[regex(r"_?\p{XID_Start}\p{XID_Continue}*", |lex| lex.slice().parse().ok())]
Identifier(String),
#[regex(r"\d+", |lex| lex.slice().parse().ok())]
Integer(i64),
Integer(String),
#[token("(")]
LParen,
LeftParen,
#[token(")")]
RParen,
RightParen,
#[token("{")]
LeftBracket,
#[token("}")]
RightBracket,
#[token("=")]
Assign,
#[token(";")]
Semicolon,
#[token(":")]
Colon,
#[token("->")]
Arrow,
#[token(",")]
Coma,
#[token("+")]
OperatorAdd,