1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
use logos::Logos;
use std::convert::Infallible;

#[derive(Debug, PartialEq, Clone, Default)]
pub enum LexingError {
    NumberParseError,
    #[default]
    Other,
}

impl From<std::num::ParseIntError> for LexingError {
    fn from(_: std::num::ParseIntError) -> Self {
        LexingError::NumberParseError
    }
}

impl From<Infallible> for LexingError {
    fn from(_: Infallible) -> Self {
        LexingError::Other
    }
}

#[derive(Logos, Debug, PartialEq, Clone)]
#[logos(error = LexingError, skip r"[ \t\n\f]+", skip r"//[^\n]*", skip r"/\*(?:[^*]|\*[^/])*\*/")]
pub enum Token {
    #[token("let")]
    KeywordLet,
    #[token("const")]
    KeywordConst,
    #[token("fn")]
    KeywordFn,
    #[token("return")]
    KeywordReturn,
    #[token("struct")]
    KeywordStruct,
    #[token("if")]
    KeywordIf,
    #[token("else")]
    KeywordElse,
    #[token("while")]
    KeywordWhile,
    #[token("for")]
    KeywordFor,
    #[token("match")]
    KeywordMatch,
    #[token("mod")]
    KeywordMod,
    #[token("pub")]
    KeywordPub,
    #[token("mut")]
    KeywordMut,
    #[token("use")]
    KeywordUse,
    #[token("in")]
    KeywordIn,
    #[token("extern")]
    KeywordExtern,
    #[token("as")]
    KeywordAs,

    // Modern way of allowing identifiers, read: https://unicode.org/reports/tr31/
    #[regex(r"[\p{XID_Start}_]\p{XID_Continue}*", |lex| lex.slice().to_string())]
    Identifier(String),

    // Literals
    #[regex(r"\d+", |lex| lex.slice().parse::<u128>().unwrap())]
    Integer(u128),
    #[regex(r#""(?:[^"]|\\")*""#, |lex| lex.slice().to_string())]
    String(String),
    #[regex(r"(true|false)", |lex| lex.slice().parse::<bool>().unwrap())]
    Boolean(bool),

    #[token("(")]
    LeftParen,
    #[token(")")]
    RightParen,
    #[token("{")]
    LeftBracket,
    #[token("}")]
    RightBracket,
    #[token("[")]
    LeftSquareBracket,
    #[token("]")]
    RightSquareBracket,
    #[token("=")]
    Assign,
    #[token(";")]
    Semicolon,
    #[token(":")]
    Colon,
    #[token("::")]
    DoubleColon,
    #[token("->")]
    Arrow,
    #[token(",")]
    Coma,
    #[token(".")]
    Dot,
    #[token("..")]
    TwoDots,
    #[token("<")]
    LessThanSign,
    #[token(">")]
    MoreThanSign,
    #[token(">=")]
    MoreThanEqSign,
    #[token("<=")]
    LessThanEqSign,

    #[token("+")]
    OperatorAdd,
    #[token("-")]
    OperatorSub,
    #[token("*")]
    OperatorMul,
    #[token("/")]
    OperatorDiv,
    #[token("%")]
    OperatorRem,
    #[token("&&")]
    OperatorAnd,
    #[token("||")]
    OperatorOr,
    #[token("==")]
    OperatorEq,
    #[token("!=")]
    OperatorNe,
    #[token("!")]
    OperatorNot,
    #[token("~")]
    OperatorBitwiseNot,
    #[token("^")]
    OperatorBitwiseXor,
    #[token("&")]
    OperatorBitwiseAnd,
    #[token("|")]
    OperatorBitwiseOr,
}