mirror of
https://github.com/edg-l/sitewriter.git
synced 2024-11-09 17:48:24 +00:00
initial commit
This commit is contained in:
parent
66c9797854
commit
11dc59ff46
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
/target
|
||||
Cargo.lock
|
18
Cargo.toml
Normal file
18
Cargo.toml
Normal file
|
@ -0,0 +1,18 @@
|
|||
[package]
|
||||
name = "sitewriter"
|
||||
version = "0.1.0"
|
||||
authors = ["Edgar L. <contact@edgarluque.com>"]
|
||||
edition = "2018"
|
||||
description = "A sitemap writing library."
|
||||
readme = "README.md"
|
||||
repository = "https://github.com/edg-l/sitewriter"
|
||||
license = "MIT"
|
||||
keywords = ["sitemap", "sitemapindex", "xml", "parser"]
|
||||
categories = ["parsing"]
|
||||
|
||||
[features]
|
||||
default = ["chrono"]
|
||||
|
||||
[dependencies]
|
||||
chrono = { version = "0.4", optional = true }
|
||||
quick-xml = "0.20"
|
51
README.md
Normal file
51
README.md
Normal file
|
@ -0,0 +1,51 @@
|
|||
# Sitewriter
|
||||
A rust library to generate sitemaps.
|
||||
|
||||
It uses the [quick-xml](https://github.com/tafia/quick-xml) so it should be fast.
|
||||
|
||||
To handle the `lastmod` tag it uses [chrono](https://docs.rs/chrono/) but it can be disabled with `default-features = false`.
|
||||
|
||||
|
||||
## Example
|
||||
|
||||
```rust
|
||||
use chrono::prelude::*;
|
||||
use sitewriter::*;
|
||||
|
||||
fn main() {
|
||||
let mut sitemap = Sitemap::new();
|
||||
sitemap.urls.push(Url::new("https://edgarluque.com/projects"));
|
||||
|
||||
sitemap.urls.push(Url {
|
||||
loc: "https://edgarluque.com/",
|
||||
changefreq: Some(ChangeFreq::Daily),
|
||||
priority: Some(1.0),
|
||||
lastmod: Some(Utc::now()),
|
||||
});
|
||||
|
||||
sitemap.urls.push(Url {
|
||||
loc: "https://edgarluque.com/blog",
|
||||
changefreq: Some(ChangeFreq::Weekly),
|
||||
priority: Some(0.8),
|
||||
lastmod: Some(Utc::now()),
|
||||
});
|
||||
|
||||
sitemap.urls.push(Url {
|
||||
loc: "https://edgarluque.com/blog/sitewriter",
|
||||
changefreq: Some(ChangeFreq::Never),
|
||||
priority: Some(0.5),
|
||||
lastmod: Some(Utc.ymd(2020, 11, 22).and_hms(15, 10, 15)),
|
||||
});
|
||||
|
||||
sitemap.urls.push(Url {
|
||||
loc: "https://edgarluque.com/blog/some-future-post",
|
||||
changefreq: Some(ChangeFreq::Never),
|
||||
priority: Some(0.5),
|
||||
lastmod: Some(Utc.from_utc_datetime(&Local.ymd(2020, 12, 5).and_hms(12, 30, 0).naive_utc())),
|
||||
});
|
||||
|
||||
|
||||
let result = sitemap.into_str();
|
||||
println!("{}", result);
|
||||
}
|
||||
```
|
39
examples/gen_sitemap.rs
Normal file
39
examples/gen_sitemap.rs
Normal file
|
@ -0,0 +1,39 @@
|
|||
use chrono::prelude::*;
|
||||
use sitewriter::*;
|
||||
|
||||
fn main() {
|
||||
let mut sitemap = Sitemap::new();
|
||||
sitemap.urls.push(Url::new("https://edgarluque.com/projects"));
|
||||
|
||||
sitemap.urls.push(Url {
|
||||
loc: "https://edgarluque.com/",
|
||||
changefreq: Some(ChangeFreq::Daily),
|
||||
priority: Some(1.0),
|
||||
lastmod: Some(Utc::now()),
|
||||
});
|
||||
|
||||
sitemap.urls.push(Url {
|
||||
loc: "https://edgarluque.com/blog",
|
||||
changefreq: Some(ChangeFreq::Weekly),
|
||||
priority: Some(0.8),
|
||||
lastmod: Some(Utc::now()),
|
||||
});
|
||||
|
||||
sitemap.urls.push(Url {
|
||||
loc: "https://edgarluque.com/blog/sitewriter",
|
||||
changefreq: Some(ChangeFreq::Never),
|
||||
priority: Some(0.5),
|
||||
lastmod: Some(Utc.ymd(2020, 11, 22).and_hms(15, 10, 15)),
|
||||
});
|
||||
|
||||
sitemap.urls.push(Url {
|
||||
loc: "https://edgarluque.com/blog/some-future-post",
|
||||
changefreq: Some(ChangeFreq::Never),
|
||||
priority: Some(0.5),
|
||||
lastmod: Some(Utc.from_utc_datetime(&Local.ymd(2020, 12, 5).and_hms(12, 30, 0).naive_utc())),
|
||||
});
|
||||
|
||||
|
||||
let result = sitemap.into_str();
|
||||
println!("{}", result);
|
||||
}
|
209
src/lib.rs
Normal file
209
src/lib.rs
Normal file
|
@ -0,0 +1,209 @@
|
|||
#[cfg(feature = "chrono")]
|
||||
use chrono::{DateTime, Utc, SecondsFormat};
|
||||
use quick_xml::{
|
||||
events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event},
|
||||
Writer,
|
||||
};
|
||||
use std::fmt::Display;
|
||||
use std::io::Cursor;
|
||||
use std::borrow::Cow;
|
||||
|
||||
/// How frequently the page is likely to change. This value provides general information to search engines and may not correlate exactly to how often they crawl the page.
|
||||
#[derive(Debug)]
|
||||
pub enum ChangeFreq {
|
||||
Always,
|
||||
Hourly,
|
||||
Daily,
|
||||
Weekly,
|
||||
Monthly,
|
||||
Yearly,
|
||||
Never,
|
||||
}
|
||||
|
||||
impl Display for ChangeFreq {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let what = match self {
|
||||
ChangeFreq::Always => "always",
|
||||
ChangeFreq::Hourly => "hourly",
|
||||
ChangeFreq::Daily => "daily",
|
||||
ChangeFreq::Weekly => "weekly",
|
||||
ChangeFreq::Monthly => "montly",
|
||||
ChangeFreq::Yearly => "yearly",
|
||||
ChangeFreq::Never => "never",
|
||||
};
|
||||
f.write_str(what)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parent tag for each URL entry.
|
||||
#[derive(Debug)]
|
||||
pub struct Url<'a> {
|
||||
/// URL of the page.
|
||||
///
|
||||
/// This URL must begin with the protocol (such as http) and end with a trailing slash, if your web server requires it. This value must be less than 2,048 characters.
|
||||
pub loc: &'a str,
|
||||
#[cfg(feature = "chrono")]
|
||||
/// The date of last modification of the file.
|
||||
pub lastmod: Option<DateTime<Utc>>,
|
||||
#[cfg(not(feature = "chrono"))]
|
||||
/// The date of last modification of the file.
|
||||
///
|
||||
/// This date should be in W3C Datetime format. This format allows you to omit the time portion, if desired, and use YYYY-MM-DD.
|
||||
pub lastmod: Option<&'a str>,
|
||||
/// How frequently the page is likely to change.
|
||||
pub changefreq: Option<ChangeFreq>,
|
||||
/// The priority of this URL relative to other URLs on your site. Valid values range from 0.0 to 1.0.
|
||||
///
|
||||
/// This value does not affect how your pages are compared to pages on other sites—it only lets the search engines know which pages you deem most important for the crawlers.
|
||||
pub priority: Option<f32>,
|
||||
}
|
||||
|
||||
impl<'a> Url<'a> {
|
||||
/// Creates a url (sitemap entry) with only the required elements.
|
||||
pub fn new(loc: &'a str) -> Self {
|
||||
Self {
|
||||
loc,
|
||||
lastmod: None,
|
||||
changefreq: None,
|
||||
priority: None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Struct to hold the sitemap information.
|
||||
#[derive(Debug)]
|
||||
pub struct Sitemap<'a> {
|
||||
pub urls: Vec<Url<'a>>,
|
||||
}
|
||||
|
||||
fn write_tag<T: std::io::Write>(writer: &mut Writer<T>, tag: &str, text: &str) {
|
||||
writer
|
||||
.write_event(Event::Start(BytesStart::borrowed_name(tag.as_bytes())))
|
||||
.expect(&format!("error opening {}", tag));
|
||||
writer
|
||||
.write_event(Event::Text(BytesText::from_plain_str(text)))
|
||||
.expect(&format!("error writing text to {}", tag));
|
||||
writer
|
||||
.write_event(Event::End(BytesEnd::borrowed(tag.as_bytes())))
|
||||
.expect(&format!("error opening {}", tag));
|
||||
}
|
||||
|
||||
impl<'a> Sitemap<'a> {
|
||||
/// Create a new sitemap.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
urls: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates the sitemap using the provided writer.
|
||||
///
|
||||
/// It's recommended to use [`into_bytes`] or [`into_str`]
|
||||
pub fn generate<T>(&self, inner_writer: T) -> T
|
||||
where
|
||||
T: std::io::Write,
|
||||
{
|
||||
let mut writer = Writer::new_with_indent(inner_writer, b' ', 4);
|
||||
writer
|
||||
.write_event(Event::Decl(BytesDecl::new(b"1.0", Some(b"UTF-8"), None)))
|
||||
.expect("error creating xml decl");
|
||||
|
||||
let urlset_name = b"urlset";
|
||||
let mut urlset = BytesStart::borrowed_name(urlset_name);
|
||||
urlset.push_attribute(("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9"));
|
||||
writer
|
||||
.write_event(Event::Start(urlset))
|
||||
.expect("error opening urlset");
|
||||
|
||||
for url in self.urls.iter() {
|
||||
writer
|
||||
.write_event(Event::Start(BytesStart::borrowed_name(b"url")))
|
||||
.expect("error opening url");
|
||||
write_tag(&mut writer, "loc", url.loc);
|
||||
|
||||
#[cfg(feature = "chrono")]
|
||||
{
|
||||
if let Some(lastmod) = &url.lastmod {
|
||||
write_tag(&mut writer, "lastmod", &lastmod.to_rfc3339_opts(SecondsFormat::Secs, true));
|
||||
}
|
||||
}
|
||||
#[cfg(not(feature = "chrono"))]
|
||||
{
|
||||
if let Some(lastmod) = &url.lastmod {
|
||||
write_tag(&mut writer, "lastmod", lastmod);
|
||||
}
|
||||
}
|
||||
if let Some(priority) = &url.priority {
|
||||
write_tag(&mut writer, "priority", &format!("{:.1}", priority))
|
||||
}
|
||||
if let Some(changefreq) = &url.changefreq {
|
||||
write_tag(&mut writer, "changefreq", &changefreq.to_string());
|
||||
}
|
||||
|
||||
writer
|
||||
.write_event(Event::End(BytesEnd::borrowed(b"url")))
|
||||
.expect("error closing url");
|
||||
}
|
||||
|
||||
writer
|
||||
.write_event(Event::End(BytesEnd::borrowed(urlset_name)))
|
||||
.expect("error closing urlset");
|
||||
|
||||
writer.into_inner()
|
||||
}
|
||||
|
||||
/// Generates the sitemap.
|
||||
pub fn into_bytes(&self) -> Cow<'a, [u8]> {
|
||||
let inner = Cursor::new(Vec::new());
|
||||
let result = self.generate(inner);
|
||||
Cow::Owned(result.into_inner())
|
||||
}
|
||||
|
||||
/// Generates the sitemap returning a string.
|
||||
pub fn into_str(&self) -> Cow<'a, str> {
|
||||
let bytes = self.into_bytes();
|
||||
let res = std::str::from_utf8(&bytes).expect("error parsing sitemap bytes to str").to_owned();
|
||||
Cow::Owned(res)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::*;
|
||||
|
||||
#[cfg(feature = "chrono")]
|
||||
#[test]
|
||||
fn it_works() {
|
||||
use chrono::Utc;
|
||||
|
||||
let mut sitemap = Sitemap::new();
|
||||
sitemap.urls.push(Url::new("https://domain.com/"));
|
||||
|
||||
sitemap.urls.push(Url {
|
||||
loc: "https://domain.com/url",
|
||||
changefreq: Some(ChangeFreq::Daily),
|
||||
priority: Some(0.8),
|
||||
lastmod: Some(Utc::now())
|
||||
}
|
||||
);
|
||||
|
||||
sitemap.into_str();
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "chrono"))]
|
||||
#[test]
|
||||
fn it_works() {
|
||||
let mut sitemap = Sitemap::new();
|
||||
sitemap.urls.push(Url::new("https://domain.com/"));
|
||||
|
||||
sitemap.urls.push(Url {
|
||||
loc: "https://domain.com/url",
|
||||
changefreq: Some(ChangeFreq::Daily),
|
||||
priority: Some(0.8),
|
||||
lastmod: Some("2020-11-22"),
|
||||
}
|
||||
);
|
||||
|
||||
sitemap.into_str();
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue