mirror of
https://github.com/edg-l/rustyman.git
synced 2024-11-09 17:48:20 +00:00
use the stack instead of heap in more places
This commit is contained in:
parent
40badf70c4
commit
7bbc325eb7
168
src/lib.rs
168
src/lib.rs
|
@ -27,9 +27,7 @@
|
||||||
#![deny(rustdoc::missing_doc_code_examples)]
|
#![deny(rustdoc::missing_doc_code_examples)]
|
||||||
|
|
||||||
use std::{
|
use std::{
|
||||||
cell::RefCell,
|
collections::{BinaryHeap},
|
||||||
collections::{BinaryHeap, HashMap},
|
|
||||||
rc::Rc,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
use bit_vec::BitVec;
|
use bit_vec::BitVec;
|
||||||
|
@ -38,22 +36,25 @@ use bit_vec::BitVec;
|
||||||
// - https://en.wikipedia.org/wiki/Huffman_coding
|
// - https://en.wikipedia.org/wiki/Huffman_coding
|
||||||
// - https://aquarchitect.github.io/swift-algorithm-club/Huffman%20Coding/
|
// - https://aquarchitect.github.io/swift-algorithm-club/Huffman%20Coding/
|
||||||
|
|
||||||
|
/// Max symbols in the frequency table. Covers all possible u8 values.
|
||||||
|
pub const MAX_SYMBOLS: usize = u8::MAX as usize + 1;
|
||||||
|
|
||||||
|
const TREE_SIZE: usize = MAX_SYMBOLS * 2 - 1;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
struct Node {
|
struct Node {
|
||||||
pub data: Option<u8>,
|
pub index: usize,
|
||||||
pub count: usize,
|
pub count: usize,
|
||||||
pub index: Option<usize>,
|
|
||||||
pub parent: Option<usize>,
|
pub parent: Option<usize>,
|
||||||
pub left: Option<usize>,
|
pub left: Option<usize>,
|
||||||
pub right: Option<usize>,
|
pub right: Option<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Node {
|
impl Node {
|
||||||
fn new(data: u8, count: usize) -> Self {
|
fn new(index: usize, count: usize) -> Self {
|
||||||
Self {
|
Self {
|
||||||
data: Some(data),
|
index,
|
||||||
count,
|
count,
|
||||||
index: None,
|
|
||||||
parent: None,
|
parent: None,
|
||||||
left: None,
|
left: None,
|
||||||
right: None,
|
right: None,
|
||||||
|
@ -87,112 +88,69 @@ impl Ord for Node {
|
||||||
/// - Decompress with [Self::decompress]
|
/// - Decompress with [Self::decompress]
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub struct Huffman {
|
pub struct Huffman {
|
||||||
tree: Vec<Node>,
|
tree: [Node; TREE_SIZE],
|
||||||
// index lookup table for the leaf nodes.
|
root_index: usize,
|
||||||
indexes: HashMap<u8, usize>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Huffman {
|
impl Huffman {
|
||||||
/// Initializes the huffman interface using the provided frequency table.
|
/// Initializes the huffman interface using the provided frequency table.
|
||||||
pub fn new(frequency_table: &HashMap<u8, usize>) -> Self {
|
pub fn new(frequency_table: &[usize; MAX_SYMBOLS]) -> Self {
|
||||||
let tree = Self::build_tree(frequency_table);
|
let mut tree = std::array::from_fn(|i| Node::new(i, 0));
|
||||||
let indexes = tree
|
let root_index = Self::build_tree(&mut tree, frequency_table);
|
||||||
.iter()
|
|
||||||
.filter(|x| x.data.is_some())
|
|
||||||
.map(|n| (n.data.unwrap(), n.index.expect("should have index")))
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
Self { tree, indexes }
|
Self { tree, root_index }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates the Huffman frequency table from the provided data and initializes from it.
|
/// Creates the Huffman frequency table from the provided data and initializes from it.
|
||||||
pub fn new_from_data(data: &[u8]) -> Self {
|
pub fn new_from_data(data: &[u8]) -> Self {
|
||||||
Self::new(&Self::calculate_freq_table(data))
|
let mut table = [0; MAX_SYMBOLS];
|
||||||
|
Self::calculate_freq_table(&mut table, data);
|
||||||
|
Self::new(&table)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculates the frequency table from the provided data.
|
/// Calculates the frequency table from the provided data.
|
||||||
pub fn calculate_freq_table(data: &[u8]) -> HashMap<u8, usize> {
|
pub fn calculate_freq_table(table: &mut [usize; MAX_SYMBOLS], data: &[u8]) {
|
||||||
let mut table: HashMap<u8, usize> = HashMap::with_capacity(256.min(data.len() / 2));
|
table.fill(0);
|
||||||
|
|
||||||
for i in data {
|
for i in data {
|
||||||
if let Some(c) = table.get_mut(i) {
|
table[*i as usize] += 1;
|
||||||
*c += 1;
|
|
||||||
} else {
|
|
||||||
table.insert(*i, 1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
table
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Builds a binary tree, the root is the last node, the leafs are at the start.
|
/// Builds a binary tree, the root is the last node, the leafs are at the start.
|
||||||
fn build_tree(table: &HashMap<u8, usize>) -> Vec<Node> {
|
///
|
||||||
let mut priority_queue: BinaryHeap<Rc<RefCell<Node>>> = table
|
/// Returns the root index.
|
||||||
|
fn build_tree(tree: &mut [Node; TREE_SIZE], table: &[usize; MAX_SYMBOLS]) -> usize {
|
||||||
|
let mut priority_queue: BinaryHeap<Node> = table
|
||||||
.iter()
|
.iter()
|
||||||
.map(|(c, v)| Rc::new(RefCell::new(Node::new(*c, *v))))
|
.enumerate()
|
||||||
|
.map(|(index, v)| Node::new(index, *v))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let mut tree: Vec<Rc<RefCell<Node>>> = Vec::with_capacity(priority_queue.len() * 2);
|
let mut tree_index = 256;
|
||||||
|
|
||||||
// Handle case where the frequency table has only 1 value.
|
|
||||||
if priority_queue.len() == 1 {
|
|
||||||
let shared_node = priority_queue.pop().unwrap();
|
|
||||||
let mut node = shared_node.borrow_mut();
|
|
||||||
node.index = Some(tree.len());
|
|
||||||
|
|
||||||
tree.push(shared_node.clone());
|
|
||||||
|
|
||||||
let parent = Node {
|
|
||||||
data: None,
|
|
||||||
count: node.count,
|
|
||||||
left: node.index,
|
|
||||||
right: None,
|
|
||||||
parent: None,
|
|
||||||
index: Some(tree.len()),
|
|
||||||
};
|
|
||||||
|
|
||||||
node.parent = parent.index;
|
|
||||||
|
|
||||||
let parent = Rc::new(RefCell::new(parent));
|
|
||||||
tree.push(parent);
|
|
||||||
}
|
|
||||||
|
|
||||||
while priority_queue.len() > 1 {
|
while priority_queue.len() > 1 {
|
||||||
let shared_node1 = priority_queue.pop().unwrap();
|
let node1 = priority_queue.pop().unwrap();
|
||||||
let shared_node2 = priority_queue.pop().unwrap();
|
let node2 = priority_queue.pop().unwrap();
|
||||||
|
|
||||||
let mut node1 = shared_node1.borrow_mut();
|
tree[node1.index] = node1;
|
||||||
if node1.index.is_none() {
|
tree[node2.index] = node2;
|
||||||
node1.index = Some(tree.len());
|
tree[node1.index].parent = Some(tree_index);
|
||||||
tree.push(shared_node1.clone());
|
tree[node2.index].parent = Some(tree_index);
|
||||||
}
|
|
||||||
|
|
||||||
let mut node2 = shared_node2.borrow_mut();
|
|
||||||
if node2.index.is_none() {
|
|
||||||
node2.index = Some(tree.len());
|
|
||||||
tree.push(shared_node2.clone());
|
|
||||||
}
|
|
||||||
|
|
||||||
let parent_index = tree.len();
|
|
||||||
|
|
||||||
node1.parent = Some(parent_index);
|
|
||||||
node2.parent = Some(parent_index);
|
|
||||||
|
|
||||||
let parent = Node {
|
let parent = Node {
|
||||||
data: None,
|
|
||||||
count: node1.count + node2.count,
|
count: node1.count + node2.count,
|
||||||
left: node1.index,
|
left: Some(node1.index),
|
||||||
right: node2.index,
|
right: Some(node2.index),
|
||||||
parent: None,
|
parent: None,
|
||||||
index: Some(parent_index),
|
index: tree_index,
|
||||||
};
|
};
|
||||||
|
tree[tree_index] = parent;
|
||||||
|
tree_index += 1;
|
||||||
|
|
||||||
let parent = Rc::new(RefCell::new(parent));
|
|
||||||
tree.push(parent.clone());
|
|
||||||
priority_queue.push(parent);
|
priority_queue.push(parent);
|
||||||
}
|
}
|
||||||
|
|
||||||
tree.into_iter().map(|x| *x.borrow()).collect()
|
priority_queue.pop().unwrap().index
|
||||||
}
|
}
|
||||||
|
|
||||||
// Recursively walk to the root and back to calculate the bits.
|
// Recursively walk to the root and back to calculate the bits.
|
||||||
|
@ -223,9 +181,7 @@ impl Huffman {
|
||||||
for b in data.iter() {
|
for b in data.iter() {
|
||||||
self.traverse(
|
self.traverse(
|
||||||
&mut bits,
|
&mut bits,
|
||||||
*self.indexes.get(b).unwrap_or_else(|| {
|
*b as usize,
|
||||||
panic!("frequency table did not contain this byte: {:?}", b)
|
|
||||||
}),
|
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -241,13 +197,12 @@ impl Huffman {
|
||||||
|
|
||||||
let bits = BitVec::from_bytes(data);
|
let bits = BitVec::from_bytes(data);
|
||||||
let mut decompressed = Vec::with_capacity(bits.len() * 2);
|
let mut decompressed = Vec::with_capacity(bits.len() * 2);
|
||||||
let root_index = self.tree.len() - 1;
|
let byte_count = self.tree[self.root_index].count;
|
||||||
let byte_count = self.tree[root_index].count;
|
|
||||||
|
|
||||||
let mut bits_iter = bits.iter();
|
let mut bits_iter = bits.iter();
|
||||||
|
|
||||||
for _ in 0..byte_count {
|
for _ in 0..byte_count {
|
||||||
let mut index = root_index;
|
let mut index = self.root_index;
|
||||||
|
|
||||||
while self.tree[index].left.is_some() || self.tree[index].right.is_some() {
|
while self.tree[index].left.is_some() || self.tree[index].right.is_some() {
|
||||||
let bit = bits_iter.next().expect("missing data");
|
let bit = bits_iter.next().expect("missing data");
|
||||||
|
@ -258,7 +213,7 @@ impl Huffman {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
decompressed.push(self.tree[index].data.expect("should have data"));
|
decompressed.push(index as u8);
|
||||||
}
|
}
|
||||||
|
|
||||||
decompressed
|
decompressed
|
||||||
|
@ -296,22 +251,25 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn create_freq_table() {
|
fn create_freq_table() {
|
||||||
let table = Huffman::calculate_freq_table(&[0]);
|
let mut table = [0; MAX_SYMBOLS];
|
||||||
|
Huffman::calculate_freq_table(&mut table, &[0u8]);
|
||||||
|
|
||||||
assert_eq!(table.len(), 1);
|
assert_eq!(table[0], 1);
|
||||||
assert_eq!(*table.get(&0).unwrap(), 1);
|
|
||||||
|
|
||||||
let table = Huffman::calculate_freq_table(&[0, 1, 2, 2, 3, 3, 3]);
|
|
||||||
|
|
||||||
assert_eq!(table.len(), 4);
|
|
||||||
assert_eq!(*table.get(&0).unwrap(), 1);
|
|
||||||
assert_eq!(*table.get(&1).unwrap(), 1);
|
|
||||||
assert_eq!(*table.get(&2).unwrap(), 2);
|
|
||||||
assert_eq!(*table.get(&3).unwrap(), 3);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_payload_size_1() {
|
fn payload_size_0() {
|
||||||
|
let payload = &[];
|
||||||
|
|
||||||
|
let huffman = Huffman::new_from_data(payload);
|
||||||
|
let compressed = huffman.compress(payload);
|
||||||
|
let decompressed = huffman.decompress(&compressed);
|
||||||
|
|
||||||
|
assert_eq!(&payload[..], decompressed)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn payload_size_1() {
|
||||||
let payload = &[0u8];
|
let payload = &[0u8];
|
||||||
|
|
||||||
let huffman = Huffman::new_from_data(payload);
|
let huffman = Huffman::new_from_data(payload);
|
||||||
|
@ -334,10 +292,12 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn proptest_freq_table(data: Vec<u8>) {
|
fn proptest_freq_table(data: Vec<u8>) {
|
||||||
let table = Huffman::calculate_freq_table(&data);
|
let mut table = [0; MAX_SYMBOLS];
|
||||||
|
Huffman::calculate_freq_table(&mut table, &data);
|
||||||
|
|
||||||
for b in &data {
|
for b in data {
|
||||||
prop_assert!(table.get(b).is_some());
|
prop_assert!(table.get(b as usize).is_some());
|
||||||
|
prop_assert!(table[b as usize] > 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue