use super::*;
use std::{
collections::{hash_map::Entry, HashMap},
fs,
mem::replace,
path::{Path, PathBuf},
};
pub trait Cache<Id: ?Sized> {
type Storage: AsRef<str>;
fn fetch(&mut self, id: &Id) -> Result<&Source<Self::Storage>, Box<dyn fmt::Debug + '_>>;
fn display<'a>(&self, id: &'a Id) -> Option<Box<dyn fmt::Display + 'a>>;
}
impl<'b, C: Cache<Id>, Id: ?Sized> Cache<Id> for &'b mut C {
type Storage = C::Storage;
fn fetch(&mut self, id: &Id) -> Result<&Source<Self::Storage>, Box<dyn fmt::Debug + '_>> {
C::fetch(self, id)
}
fn display<'a>(&self, id: &'a Id) -> Option<Box<dyn fmt::Display + 'a>> {
C::display(self, id)
}
}
impl<C: Cache<Id>, Id: ?Sized> Cache<Id> for Box<C> {
type Storage = C::Storage;
fn fetch(&mut self, id: &Id) -> Result<&Source<Self::Storage>, Box<dyn fmt::Debug + '_>> {
C::fetch(self, id)
}
fn display<'a>(&self, id: &'a Id) -> Option<Box<dyn fmt::Display + 'a>> {
C::display(self, id)
}
}
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub struct Line {
offset: usize,
char_len: usize,
byte_offset: usize,
byte_len: usize,
}
impl Line {
pub fn offset(&self) -> usize {
self.offset
}
pub fn len(&self) -> usize {
self.char_len
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn span(&self) -> Range<usize> {
self.offset..self.offset + self.char_len
}
fn byte_span(&self) -> Range<usize> {
self.byte_offset..self.byte_offset + self.byte_len
}
}
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct Source<I: AsRef<str> = String> {
text: I,
lines: Vec<Line>,
len: usize,
byte_len: usize,
}
impl<I: AsRef<str>> Source<I> {
pub fn text(&self) -> &str {
self.text.as_ref()
}
}
impl<I: AsRef<str>> From<I> for Source<I> {
fn from(input: I) -> Self {
let mut char_offset = 0;
let mut byte_offset = 0;
let mut last_line: Option<(Line, bool)> = None;
let mut lines: Vec<Line> = input
.as_ref()
.split_inclusive([
'\r', '\n', '\x0B', '\x0C', '\u{0085}', '\u{2028}', '\u{2029}', ])
.flat_map(|line| {
if let Some((last, ends_with_cr)) = last_line.as_mut() {
if *ends_with_cr && line == "\n" {
last.char_len += 1;
last.byte_len += 1;
char_offset += 1;
byte_offset += 1;
return last_line.take().map(|(l, _)| l);
}
}
let char_len = line.chars().count();
let ends_with_cr = line.ends_with('\r');
let line = Line {
offset: char_offset,
char_len,
byte_offset,
byte_len: line.len(),
};
char_offset += char_len;
byte_offset += line.byte_len;
replace(&mut last_line, Some((line, ends_with_cr))).map(|(l, _)| l)
})
.collect();
if let Some((l, _)) = last_line {
lines.push(l);
}
Self {
text: input,
lines,
len: char_offset,
byte_len: byte_offset,
}
}
}
impl<I: AsRef<str>> Source<I> {
pub fn len(&self) -> usize {
self.len
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
self.text.as_ref().chars()
}
pub fn line(&self, idx: usize) -> Option<Line> {
self.lines.get(idx).copied()
}
pub fn lines(&self) -> impl ExactSizeIterator<Item = Line> + '_ {
self.lines.iter().copied()
}
pub fn get_offset_line(&self, offset: usize) -> Option<(Line, usize, usize)> {
if offset <= self.len {
let idx = self
.lines
.binary_search_by_key(&offset, |line| line.offset)
.unwrap_or_else(|idx| idx.saturating_sub(1));
let line = self.line(idx)?;
assert!(
offset >= line.offset,
"offset = {}, line.offset = {}",
offset,
line.offset
);
Some((line, idx, offset - line.offset))
} else {
None
}
}
pub fn get_byte_line(&self, byte_offset: usize) -> Option<(Line, usize, usize)> {
if byte_offset <= self.byte_len {
let idx = self
.lines
.binary_search_by_key(&byte_offset, |line| line.byte_offset)
.unwrap_or_else(|idx| idx.saturating_sub(1));
let line = self.line(idx)?;
assert!(
byte_offset >= line.byte_offset,
"byte_offset = {}, line.byte_offset = {}",
byte_offset,
line.byte_offset
);
Some((line, idx, byte_offset - line.byte_offset))
} else {
None
}
}
pub fn get_line_range<S: Span>(&self, span: &S) -> Range<usize> {
let start = self.get_offset_line(span.start()).map_or(0, |(_, l, _)| l);
let end = self
.get_offset_line(span.end().saturating_sub(1).max(span.start()))
.map_or(self.lines.len(), |(_, l, _)| l + 1);
start..end
}
pub fn get_line_text(&self, line: Line) -> Option<&'_ str> {
self.text.as_ref().get(line.byte_span())
}
}
impl<I: AsRef<str>> Cache<()> for Source<I> {
type Storage = I;
fn fetch(&mut self, _: &()) -> Result<&Source<I>, Box<dyn fmt::Debug + '_>> {
Ok(self)
}
fn display(&self, _: &()) -> Option<Box<dyn fmt::Display>> {
None
}
}
impl<I: AsRef<str>, Id: fmt::Display + Eq> Cache<Id> for (Id, Source<I>) {
type Storage = I;
fn fetch(&mut self, id: &Id) -> Result<&Source<I>, Box<dyn fmt::Debug + '_>> {
if id == &self.0 {
Ok(&self.1)
} else {
Err(Box::new(format!("Failed to fetch source '{}'", id)))
}
}
fn display<'a>(&self, id: &'a Id) -> Option<Box<dyn fmt::Display + 'a>> {
Some(Box::new(id))
}
}
#[derive(Default, Debug, Clone)]
pub struct FileCache {
files: HashMap<PathBuf, Source>,
}
impl Cache<Path> for FileCache {
type Storage = String;
fn fetch(&mut self, path: &Path) -> Result<&Source, Box<dyn fmt::Debug + '_>> {
Ok(match self.files.entry(path.to_path_buf()) {
Entry::Occupied(entry) => entry.into_mut(),
Entry::Vacant(entry) => entry.insert(Source::from(
fs::read_to_string(path).map_err(|e| Box::new(e) as _)?,
)),
})
}
fn display<'a>(&self, path: &'a Path) -> Option<Box<dyn fmt::Display + 'a>> {
Some(Box::new(path.display()))
}
}
#[derive(Debug, Clone)]
pub struct FnCache<Id, F, I>
where
I: AsRef<str>,
{
sources: HashMap<Id, Source<I>>,
get: F,
}
impl<Id, F, I> FnCache<Id, F, I>
where
I: AsRef<str>,
{
pub fn new(get: F) -> Self {
Self {
sources: HashMap::default(),
get,
}
}
pub fn with_sources(mut self, sources: HashMap<Id, Source<I>>) -> Self
where
Id: Eq + Hash,
{
self.sources.reserve(sources.len());
for (id, src) in sources {
self.sources.insert(id, src);
}
self
}
}
impl<Id: fmt::Display + Hash + PartialEq + Eq + Clone, F, I> Cache<Id> for FnCache<Id, F, I>
where
I: AsRef<str>,
F: for<'a> FnMut(&'a Id) -> Result<I, Box<dyn fmt::Debug>>,
{
type Storage = I;
fn fetch(&mut self, id: &Id) -> Result<&Source<I>, Box<dyn fmt::Debug + '_>> {
Ok(match self.sources.entry(id.clone()) {
Entry::Occupied(entry) => entry.into_mut(),
Entry::Vacant(entry) => entry.insert(Source::from((self.get)(id)?)),
})
}
fn display<'a>(&self, id: &'a Id) -> Option<Box<dyn fmt::Display + 'a>> {
Some(Box::new(id))
}
}
pub fn sources<Id, S, I>(iter: I) -> impl Cache<Id>
where
Id: fmt::Display + Hash + PartialEq + Eq + Clone + 'static,
I: IntoIterator<Item = (Id, S)>,
S: AsRef<str>,
{
FnCache::new(
(move |id| Err(Box::new(format!("Failed to fetch source '{}'", id)) as _)) as fn(&_) -> _,
)
.with_sources(
iter.into_iter()
.map(|(id, s)| (id, Source::from(s)))
.collect(),
)
}
#[cfg(test)]
mod tests {
use std::iter::zip;
use std::sync::Arc;
use super::Source;
fn test_with_lines(lines: Vec<&str>) {
let source: String = lines.iter().copied().collect();
let source = Source::from(source);
assert_eq!(source.lines.len(), lines.len());
let mut offset = 0;
for (source_line, raw_line) in zip(source.lines.iter().copied(), lines.into_iter()) {
assert_eq!(source_line.offset, offset);
assert_eq!(source_line.char_len, raw_line.chars().count());
assert_eq!(source.get_line_text(source_line).unwrap(), raw_line);
offset += source_line.char_len;
}
assert_eq!(source.len, offset);
}
#[test]
fn source_from_empty() {
test_with_lines(vec![]); }
#[test]
fn source_from_single() {
test_with_lines(vec!["Single line"]);
test_with_lines(vec!["Single line with LF\n"]);
test_with_lines(vec!["Single line with CRLF\r\n"]);
}
#[test]
fn source_from_multi() {
test_with_lines(vec!["Two\r\n", "lines\n"]);
test_with_lines(vec!["Some\n", "more\r\n", "lines"]);
test_with_lines(vec!["\n", "\r\n", "\n", "Empty Lines"]);
}
#[test]
fn source_from_trims_trailing_spaces() {
test_with_lines(vec!["Trailing spaces \n", "are trimmed\t"]);
}
#[test]
fn source_from_alternate_line_endings() {
test_with_lines(vec![
"CR\r",
"VT\x0B",
"FF\x0C",
"NEL\u{0085}",
"LS\u{2028}",
"PS\u{2029}",
]);
}
#[test]
fn source_from_other_string_types() {
let raw = r#"A raw string
with multiple
lines behind
an Arc"#;
let arc = Arc::from(raw);
let source = Source::from(arc);
assert_eq!(source.lines.len(), 4);
let mut offset = 0;
for (source_line, raw_line) in zip(source.lines.iter().copied(), raw.split_inclusive('\n'))
{
assert_eq!(source_line.offset, offset);
assert_eq!(source_line.char_len, raw_line.chars().count());
assert_eq!(source.get_line_text(source_line).unwrap(), raw_line);
offset += source_line.char_len;
}
assert_eq!(source.len, offset);
}
#[test]
fn source_from_reference() {
let raw = r#"A raw string
with multiple
lines"#;
fn non_owning_source(input: &str) -> Source<&str> {
Source::from(input)
}
let source = non_owning_source(raw);
assert_eq!(source.lines.len(), 3);
}
}