initial

2024-10-18 06:53:47 +00:00 · 2023-12-14 10:25:56 +01:00 · 2023-12-14 10:25:56 +01:00 · d9281843f2
commit d9281843f2
11 changed files with 2183 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
 /target
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,21 @@
 [package]
 name = "pascal-mlir"
 version = "0.1.0"
 edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [dependencies]
 melior = { version = "0.14.0", features = ["ods-dialects"] }
 clap = { version = "4.3.3", features = ["derive"] }
 color-eyre = "0.6.2"
 itertools = "0.12"
 lalrpop-util = { version = "0.20.0", features = ["lexer"] }
 regex = "1.9"
 tracing = "0.1.37"
 tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
 annotate-snippets = { version = "0.9.1", features = ["color"] }
 logos = "0.13.0"
 [build-dependencies]
 lalrpop = "0.20.0"
--- a/README.md
+++ b/README.md
@ -0,0 +1,4 @@
 https://www.cs.utexas.edu/users/novak/iso7185.pdf
 https://lalrpop.github.io/lalrpop/lexer_tutorial/004_token_references.html
 im at 6.1.7
--- a/build.rs
+++ b/build.rs
@ -0,0 +1,3 @@
 fn main() {
    lalrpop::process_root().unwrap();
 }
--- a/programs/first.pas
+++ b/programs/first.pas
@ -0,0 +1,6 @@
 program learn_pascal;
 const
    PI = 3.141592654;
    GNU = 'GNU''s Not Unix';
--- a/src/ast.rs
+++ b/src/ast.rs
@ -0,0 +1,20 @@
 pub enum Number<'a> {
    Integer(&'a str),
    Real(&'a str)
 }
 pub enum Constant<'a> {
    Identifier {
        is_negative: bool,
        ident: &'a str
    },
    Number(Number<'a>),
    String(&'a str),
 }
 pub struct ConstantDef<'a> {
    pub ident: &'a str,
    pub value: Constant<'a>
 }
--- a/src/grammar.lalrpop
+++ b/src/grammar.lalrpop
@ -0,0 +1,49 @@
 use crate::{
    ast,
    tokens::Token,
    lexer::LexicalError,
 };
 grammar<'input>(input: &'input str);
 extern {
    type Location = usize;
    type Error = LexicalError;
    enum Token<'input> {
        "program" => Token::WordProgram,
        "identifier" => Token::Identifier(<&'input str>),
        "integer" => Token::Integer(<&'input str>),
        "real" => Token::Real(<&'input str>),
        "string" => Token::String(<&'input str>),
        "-" => Token::SpecialMinus,
        "+" => Token::SpecialPlus,
    }
 }
 Comma<T>: Vec<T> = {
    <mut v:(<T> ",")*> <e:T?> => match e {
        None => v,
        Some(e) => {
            v.push(e);
            v
        }
    }
 };
 pub Hello: String = {
    "program" => "let".to_string()
 }
 Number: ast::Number<'input> = {
    <"integer"> => ast::Number::Integer(<>),
    <"real"> => ast::Number::Real(<>),
 }
 Constant: ast::Constant<'input> = {
    <Number> => ast::Constant::Number(<>),
    <"string"> => ast::Constant::String(<>),
    "+"? <ident:"identifier"> => ast::Constant::Identifier { is_negative: false, ident },
    "-" <ident:"identifier"> => ast::Constant::Identifier { is_negative: true, ident },
 }
--- a/src/lexer.rs
+++ b/src/lexer.rs
@ -0,0 +1,47 @@
 use std::{fmt::Display, ops::Range};
 use logos::{Logos, SpannedIter};
 use crate::tokens::{LexingError, Token};
 pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
 #[derive(Debug, Clone)]
 pub enum LexicalError {
    InvalidToken(LexingError, Range<usize>),
 }
 impl Display for LexicalError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            LexicalError::InvalidToken(err, span) => {
                write!(f, "lexical error at ({:?}): {:?}", err, span)
            }
        }
    }
 }
 pub struct Lexer<'input> {
    // instead of an iterator over characters, we have a token iterator
    token_stream: SpannedIter<'input, Token<'input>>,
 }
 impl<'input> Lexer<'input> {
    pub fn new(input: &'input str) -> Self {
        // the Token::lexer() method is provided by the Logos trait
        Self {
            token_stream: Token::lexer(input).spanned(),
        }
    }
 }
 impl<'input> Iterator for Lexer<'input> {
    type Item = Spanned<Token<'input>, usize, LexicalError>;
    fn next(&mut self) -> Option<Self::Item> {
        self.token_stream.next().map(|(token, span)| match token {
            Ok(token) => Ok((span.start, token, span.end)),
            Err(err) => Err(LexicalError::InvalidToken(err, span)),
        })
    }
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -0,0 +1,11 @@
 use lalrpop_util::lalrpop_mod;
 lalrpop_mod!(pub grammar);
 pub mod lexer;
 pub mod tokens;
 pub mod ast;
 fn main() {
    println!("Hello, world!");
 }
--- a/src/tokens.rs
+++ b/src/tokens.rs
@ -0,0 +1,151 @@
 use logos::Logos;
 use std::convert::Infallible;
 //  https://github.com/maciejhirsz/logos/issues/133
 #[derive(Debug, PartialEq, Clone, Default)]
 pub enum LexingError {
    NumberParseError,
    #[default]
    Other,
 }
 impl From<std::num::ParseIntError> for LexingError {
    fn from(_: std::num::ParseIntError) -> Self {
        LexingError::NumberParseError
    }
 }
 impl From<Infallible> for LexingError {
    fn from(_: Infallible) -> Self {
        LexingError::Other
    }
 }
 #[derive(Logos, Debug, PartialEq, Clone)]
 #[logos(error = LexingError, skip r"[ \t\n\f]+", skip r"//.*\n?", skip r"\{[^}]*\}" skip r"\(\*(.|[\r\n])*?\*\)")]
 pub enum Token<'input> {
    #[regex(r"[a-zA-Z][a-zA-Z\d]*")]
    Identifier(&'input str), // also directive
    #[regex(r"[+-]?[0-9][0-9]*")]
    Integer(&'input str),
    #[regex(r"[+-]?[0-9][0-9]*\.[0-9][0-9]*([eE][+-]?[0-9][0-9]*)?")]
    #[regex(r"[+-]?[0-9][0-9]*[eE][+-]?[0-9][0-9]*")]
    Real(&'input str),
    #[regex(r#""(?:[^"]|\\")*""#)]
    String(&'input str),
    // special symbols
    #[token("+")]
    SpecialPlus,
    #[token("-")]
    SpecialMinus,
    #[token("*")]
    SpecialMul,
    #[token("/")]
    SpecialDiv,
    #[token("=")]
    SpecialEqual,
    #[token("<")]
    SpecialLower,
    #[token(">")]
    SpecialGreater,
    #[token("[")]
    SpecialOpenBracket,
    #[token("]")]
    SpecialCloseBracket,
    #[token(".")]
    SpecialDot,
    #[token(",")]
    SpecialComma,
    #[token(";")]
    SpecialDotComma,
    #[token("\"")]
    SpecialQuotation,
    #[token("(")]
    SpecialOpenParen,
    #[token(")")]
    SpecialCloseParen,
    #[token("<>")]
    SpecialSpaceship,
    #[token("<=")]
    SpecialLessEqual,
    #[token(">=")]
    SpecialGreaterEqual,
    #[token(":=")]
    SpecialAssign,
    #[token("..")]
    SpecialRange,
    // special symbols - word symbols
    #[token("and")]
    WordAnd,
    #[token("array")]
    WordArray,
    #[token("begin")]
    WordBegin,
    #[token("case")]
    WordCase,
    #[token("const")]
    WordConst,
    #[token("div")]
    WordDiv,
    #[token("do")]
    WordDo,
    #[token("downto")]
    WordDownto,
    #[token("else")]
    WordElse,
    #[token("end")]
    WordEnd,
    #[token("file")]
    WordFile,
    #[token("for")]
    WordFor,
    #[token("function")]
    WordFunction,
    #[token("goto")]
    WordGoto,
    #[token("if")]
    WordIf,
    #[token("in")]
    WordIn,
    #[token("label")]
    WordLabel,
    #[token("mod")]
    WordMod,
    #[token("nil")]
    WordNil,
    #[token("not")]
    WordNot,
    #[token("of")]
    WordOf,
    #[token("or")]
    WordOr,
    #[token("packed")]
    WordPacked,
    #[token("procedure")]
    WordProcedure,
    #[token("program")]
    WordProgram,
    #[token("record")]
    WordRecord,
    #[token("repeat")]
    WordRepeat,
    #[token("set")]
    WordSet,
    #[token("then")]
    WordThen,
    #[token("to")]
    WordTo,
    #[token("type")]
    WordType,
    #[token("until")]
    WordUntil,
    #[token("var")]
    WordVar,
    #[token("while")]
    WordWhile,
    #[token("with")]
    WordWith,
 }