This commit is contained in:
Sunli
2022-02-18 10:52:37 +08:00
parent f19e45fa55
commit dde4051da6
4 changed files with 221 additions and 245 deletions

View File

@@ -1,8 +1,10 @@
use std::borrow::Cow;
use crate::Span;
#[derive(Debug, thiserror::Error, Eq, PartialEq)]
#[error("[{}:{}-{}:{}] {message}", .span.start.line, .span.start.column, .span.end.line, .span.end.column)]
pub struct LexerError {
pub span: Span,
pub message: &'static str,
pub message: Cow<'static, str>,
}

View File

@@ -0,0 +1,140 @@
use crate::{LineColumn, Span};
#[derive(Debug, Copy, Clone)]
pub(crate) struct Location {
pos: LineColumn,
idx: usize,
}
impl From<(Location, Location)> for Span {
#[inline]
fn from((start, end): (Location, Location)) -> Self {
Self {
start: start.pos,
end: end.pos,
}
}
}
pub(crate) struct InputSource<'a> {
pos: LineColumn,
idx: usize,
src: &'a [u8],
}
impl<'a> InputSource<'a> {
#[inline]
pub(crate) fn new(src: &'a [u8]) -> Self {
Self {
pos: LineColumn::new(1, 1),
idx: 0,
src,
}
}
#[inline]
pub(crate) fn location(&self) -> Location {
Location {
pos: self.pos,
idx: self.idx,
}
}
#[inline]
pub(crate) fn span(&self, start: Location) -> Span {
(start, self.location()).into()
}
#[inline]
fn advance(&mut self, len: usize) {
debug_assert!(self.idx + len <= self.src.len());
for ch in &self.src[self.idx..self.idx + len] {
match *ch {
b'\n' => {
self.pos.line += 1;
self.pos.column = 1;
}
_ => self.pos.column += 1,
}
}
self.src = &self.src[len..];
}
#[inline]
pub(crate) fn peek_char(&self) -> Option<u8> {
if self.idx < self.src.len() {
Some(self.src[self.idx])
} else {
None
}
}
#[inline]
pub(crate) fn next_char(&mut self) -> Option<u8> {
match self.peek_char() {
Some(ch) => {
self.advance(1);
Some(ch)
}
None => None,
}
}
#[inline]
pub(crate) fn skip_chars_if(&mut self, f: impl Fn(u8) -> bool) {
loop {
match self.peek_char() {
Some(ch) if f(ch) => self.advance(1),
_ => break,
}
}
}
pub(crate) fn advance_if(&mut self, needle: &[u8]) -> bool {
if self.src.starts_with(needle) {
self.advance(needle.len());
true
} else {
false
}
}
pub(crate) fn skip_raw_block(&mut self) {
let idx = {
let mut p = self.idx;
loop {
match memchr::memchr(b'{', &self.src[p..]) {
Some(idx)
if idx + 1 < self.src.len()
&& (self.src[idx + 1] == b'{' || self.src[idx + 1] == b'%') =>
{
break p + idx;
}
Some(idx) => p += idx + 1,
None => break self.src.len(),
};
}
};
self.advance(idx - self.idx);
}
pub(crate) fn skip_whitespace(&mut self) {
self.skip_chars_if(|ch| ch.is_ascii_whitespace());
}
#[inline]
pub(crate) fn string(&self, start: Location, end: Location) -> &'a str {
debug_assert!(end.idx >= start.idx);
debug_assert!(start.idx <= self.src.len());
debug_assert!(end.idx <= self.src.len());
std::str::from_utf8(&self.src[start.idx..end.idx]).unwrap()
}
#[inline]
pub(crate) fn is_empty(&self) -> bool {
self.idx == self.src.len()
}
}

View File

@@ -1,6 +1,21 @@
use std::borrow::Cow;
use crate::{LexerError, LineColumn, Span};
use crate::{
input_source::{InputSource, Location},
LexerError, Span,
};
mod char {
#[inline]
pub(super) fn is_ident_begin(ch: u8) -> bool {
ch.is_ascii_alphabetic() || ch == b'_'
}
#[inline]
pub(super) fn is_ident(ch: u8) -> bool {
ch.is_ascii_alphanumeric() || ch == b'_'
}
}
#[derive(Debug, PartialEq)]
pub(crate) enum TokenType<'a> {
@@ -62,6 +77,16 @@ pub(crate) struct Token<'a> {
pub(crate) span: Span,
}
impl<'a> Token<'a> {
#[inline]
pub(crate) fn new(ty: TokenType<'a>, span: impl Into<Span>) -> Self {
Self {
ty,
span: span.into(),
}
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
enum Scope {
Template,
@@ -70,279 +95,87 @@ enum Scope {
}
pub(crate) struct Lexer<'a> {
pos: LineColumn,
src: &'a [u8],
input: InputSource<'a>,
scope: Scope,
}
impl<'a> Lexer<'a> {
pub(crate) fn new(input: &'a str) -> Self {
Self {
pos: LineColumn { line: 1, column: 1 },
src: input.as_bytes(),
input: InputSource::new(input.as_bytes()),
scope: Scope::Template,
}
}
fn advance(&mut self, len: usize) -> LineColumn {
for ch in &self.src[..len] {
match *ch {
b'\n' => {
self.pos.line += 1;
self.pos.column = 1;
}
_ => self.pos.column += 1,
}
fn parse_variable(&mut self) -> Result<Token<'a>, LexerError> {
let start_loc = self.input.location();
match self.input.next_char().expect("unexpected end") {
b'|' => Ok(Token::new(TokenType::Pipe, self.input.span(start_loc))),
b'(' => Ok(Token::new(TokenType::ParenOpen, self.input.span(start_loc))),
b')' => Ok(Token::new(
TokenType::ParenClose,
self.input.span(start_loc),
)),
ch if char::is_ident_begin(ch) => self.parse_ident(start_loc),
ch => Err(LexerError {
span: Default::default(),
message: format!("unexpected char: '{}'", ch as char).into(),
}),
}
self.src = &self.src[len..];
self.pos
}
fn parse_in_template(&mut self) -> Option<Result<Token<'a>, LexerError>> {
let start_pos = self.pos;
let idx = {
let mut p = 0;
loop {
match memchr::memchr(b'{', &self.src[p..]) {
Some(idx)
if idx + 1 < self.src.len()
&& (self.src[idx + 1] == b'{' || self.src[idx + 1] == b'%') =>
{
break p + idx;
}
Some(_) => p += idx + 1,
None => break self.src.len(),
};
}
};
let value = &self.src[..idx];
let end_pos = self.advance(idx);
self.advance(2);
Some(Ok(Token {
ty: TokenType::Raw(std::str::from_utf8(value).unwrap()),
span: Span::new(start_pos, end_pos),
}))
}
fn parse_in_variable(&mut self) -> Option<Result<Token<'a>, LexerError>> {
todo!()
}
fn parse_in_tag(&mut self) -> Option<Result<Token<'a>, LexerError>> {
todo!()
}
#[inline]
fn next_char(&mut self) -> Option<u8> {
if self.src.is_empty() {
return None;
}
let ch = self.src[0];
self.advance(1);
Some(ch)
fn parse_ident(&mut self, start_loc: Location) -> Result<Token<'a>, LexerError> {
self.input.skip_chars_if(char::is_ident);
Ok(Token::new(
TokenType::Ident(self.input.string(start_loc, self.input.location())),
(start_loc, self.input.location()),
))
}
#[inline]
fn parse_ident(&mut self) -> Option<Result<Token<'a>, LexerError>> {
self.next_char();
while let Some(ch) = self.next_char() {}
}
fn parse_number(&mut self, start_loc: Location) -> Result<Token<'a>, LexerError> {}
}
impl<'a> Iterator for Lexer<'a> {
type Item = Result<Token<'a>, LexerError>;
fn next(&mut self) -> Option<Self::Item> {
if self.src.is_empty() {
if self.input.is_empty() {
return None;
}
match self.scope {
Scope::Template => {
if self.src.len() >= 2 {
match &self.src[..2] {
b"{{" => {
let start_pos = self.pos;
let end_pos = self.advance(2);
return Some(Ok(Token {
ty: TokenType::VariableStart,
span: Span::new(start_pos, end_pos),
}));
}
b"{%" => {
let start_pos = self.pos;
let end_pos = self.advance(2);
return Some(Ok(Token {
ty: TokenType::TagStart,
span: Span::new(start_pos, end_pos),
}));
}
_ => {}
}
}
let start_loc = self.input.location();
self.parse_in_template()
if self.input.advance_if(b"{{") {
Some(Ok(Token::new(
TokenType::VariableStart,
(start_loc, self.input.location()),
)))
} else if self.input.advance_if(b"{%") {
Some(Ok(Token::new(
TokenType::TagStart,
(start_loc, self.input.location()),
)))
} else {
self.input.skip_raw_block();
Some(Ok(Token::new(
TokenType::Raw(self.input.string(start_loc, self.input.location())),
(start_loc, self.input.location()),
)))
}
}
Scope::Variable => self.parse_in_variable(),
Scope::Tag => self.parse_in_tag(),
Scope::Variable => {
self.input.skip_whitespace();
if self.input.is_empty() {
return None;
}
Some(self.parse_variable())
}
Scope::Tag => todo!(),
}
}
}
#[inline]
fn is_ident_start(ch: u8) -> bool {
ch.is_ascii_alphabetic() || ch == b'_'
}
#[inline]
fn is_ident_char(ch: u8) -> bool {
ch.is_ascii_alphanumeric() || ch == b'_'
}
// #[cfg(test)]
// mod tests {
// use super::*;
//
// fn check_tokens(input: &[u8], tokens: Vec<Result<TemplateToken,
// LexerError>>) { let lexer = TemplateLexer::new(input);
// assert_eq!(lexer.collect::<Vec<_>>(), tokens);
// }
//
// #[test]
// fn test_variable() {
// check_tokens(
// b"{{ abc }}",
// vec![Ok(TemplateToken {
// ty: TemplateTokenType::Variable,
// span: Span {
// start: LineColumn::new(1, 3),
// end: LineColumn::new(1, 8),
// },
// value: b" abc ",
// })],
// );
//
// check_tokens(
// b"{{ abc }} def {{ ghi }}",
// vec![
// Ok(TemplateToken {
// ty: TemplateTokenType::Variable,
// span: Span {
// start: LineColumn::new(1, 3),
// end: LineColumn::new(1, 8),
// },
// value: b" abc ",
// }),
// Ok(TemplateToken {
// ty: TemplateTokenType::Raw,
// span: Span {
// start: LineColumn::new(1, 10),
// end: LineColumn::new(1, 15),
// },
// value: b" def ",
// }),
// Ok(TemplateToken {
// ty: TemplateTokenType::Variable,
// span: Span {
// start: LineColumn::new(1, 17),
// end: LineColumn::new(1, 22),
// },
// value: b" ghi ",
// }),
// ],
// );
// }
//
// #[test]
// fn test_tag() {
// check_tokens(
// b"{% abc %}",
// vec![Ok(TemplateToken {
// ty: TemplateTokenType::Tag,
// span: Span {
// start: LineColumn::new(1, 3),
// end: LineColumn::new(1, 8),
// },
// value: b" abc ",
// })],
// );
//
// check_tokens(
// b"{% abc %} def {% ghi %}",
// vec![
// Ok(TemplateToken {
// ty: TemplateTokenType::Tag,
// span: Span {
// start: LineColumn::new(1, 3),
// end: LineColumn::new(1, 8),
// },
// value: b" abc ",
// }),
// Ok(TemplateToken {
// ty: TemplateTokenType::Raw,
// span: Span {
// start: LineColumn::new(1, 10),
// end: LineColumn::new(1, 15),
// },
// value: b" def ",
// }),
// Ok(TemplateToken {
// ty: TemplateTokenType::Tag,
// span: Span {
// start: LineColumn::new(1, 17),
// end: LineColumn::new(1, 22),
// },
// value: b" ghi ",
// }),
// ],
// );
// }
//
// #[test]
// fn test_unterminated_variable() {
// let mut lexer = TemplateLexer::new(b"abc {{ abc");
//
// assert_eq!(
// lexer.next(),
// Some(Ok(TemplateToken {
// ty: TemplateTokenType::Raw,
// span: Span::new(LineColumn::new(1, 1), LineColumn::new(1,
// 5)), value: b"abc "
// }))
// );
//
// assert_eq!(
// lexer.next(),
// Some(Err(LexerError {
// span: Span::new(LineColumn::new(1, 7), LineColumn::new(1,
// 11)), message: "unterminated variable"
// }))
// );
// }
//
// #[test]
// fn test_unterminated_tag() {
// let mut lexer = TemplateLexer::new(b"abc {% abc");
//
// assert_eq!(
// lexer.next(),
// Some(Ok(TemplateToken {
// ty: TemplateTokenType::Raw,
// span: Span::new(LineColumn::new(1, 1), LineColumn::new(1,
// 5)), value: b"abc "
// }))
// );
//
// assert_eq!(
// lexer.next(),
// Some(Err(LexerError {
// span: Span::new(LineColumn::new(1, 7), LineColumn::new(1,
// 11)), message: "unterminated tag"
// }))
// );
// }
// }

View File

@@ -1,5 +1,6 @@
mod ast;
mod error;
mod input_source;
mod lexer;
mod span;