use the stack instead of heap in more places

This commit is contained in:
Edgar 2022-10-26 15:23:54 +02:00
parent 40badf70c4
commit 7bbc325eb7
No known key found for this signature in database

View file

@ -27,9 +27,7 @@
#![deny(rustdoc::missing_doc_code_examples)] #![deny(rustdoc::missing_doc_code_examples)]
use std::{ use std::{
cell::RefCell, collections::{BinaryHeap},
collections::{BinaryHeap, HashMap},
rc::Rc,
}; };
use bit_vec::BitVec; use bit_vec::BitVec;
@ -38,22 +36,25 @@ use bit_vec::BitVec;
// - https://en.wikipedia.org/wiki/Huffman_coding // - https://en.wikipedia.org/wiki/Huffman_coding
// - https://aquarchitect.github.io/swift-algorithm-club/Huffman%20Coding/ // - https://aquarchitect.github.io/swift-algorithm-club/Huffman%20Coding/
/// Max symbols in the frequency table. Covers all possible u8 values.
pub const MAX_SYMBOLS: usize = u8::MAX as usize + 1;
const TREE_SIZE: usize = MAX_SYMBOLS * 2 - 1;
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
struct Node { struct Node {
pub data: Option<u8>, pub index: usize,
pub count: usize, pub count: usize,
pub index: Option<usize>,
pub parent: Option<usize>, pub parent: Option<usize>,
pub left: Option<usize>, pub left: Option<usize>,
pub right: Option<usize>, pub right: Option<usize>,
} }
impl Node { impl Node {
fn new(data: u8, count: usize) -> Self { fn new(index: usize, count: usize) -> Self {
Self { Self {
data: Some(data), index,
count, count,
index: None,
parent: None, parent: None,
left: None, left: None,
right: None, right: None,
@ -87,112 +88,69 @@ impl Ord for Node {
/// - Decompress with [Self::decompress] /// - Decompress with [Self::decompress]
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub struct Huffman { pub struct Huffman {
tree: Vec<Node>, tree: [Node; TREE_SIZE],
// index lookup table for the leaf nodes. root_index: usize,
indexes: HashMap<u8, usize>,
} }
impl Huffman { impl Huffman {
/// Initializes the huffman interface using the provided frequency table. /// Initializes the huffman interface using the provided frequency table.
pub fn new(frequency_table: &HashMap<u8, usize>) -> Self { pub fn new(frequency_table: &[usize; MAX_SYMBOLS]) -> Self {
let tree = Self::build_tree(frequency_table); let mut tree = std::array::from_fn(|i| Node::new(i, 0));
let indexes = tree let root_index = Self::build_tree(&mut tree, frequency_table);
.iter()
.filter(|x| x.data.is_some())
.map(|n| (n.data.unwrap(), n.index.expect("should have index")))
.collect();
Self { tree, indexes } Self { tree, root_index }
} }
/// Creates the Huffman frequency table from the provided data and initializes from it. /// Creates the Huffman frequency table from the provided data and initializes from it.
pub fn new_from_data(data: &[u8]) -> Self { pub fn new_from_data(data: &[u8]) -> Self {
Self::new(&Self::calculate_freq_table(data)) let mut table = [0; MAX_SYMBOLS];
Self::calculate_freq_table(&mut table, data);
Self::new(&table)
} }
/// Calculates the frequency table from the provided data. /// Calculates the frequency table from the provided data.
pub fn calculate_freq_table(data: &[u8]) -> HashMap<u8, usize> { pub fn calculate_freq_table(table: &mut [usize; MAX_SYMBOLS], data: &[u8]) {
let mut table: HashMap<u8, usize> = HashMap::with_capacity(256.min(data.len() / 2)); table.fill(0);
for i in data { for i in data {
if let Some(c) = table.get_mut(i) { table[*i as usize] += 1;
*c += 1;
} else {
table.insert(*i, 1);
} }
} }
table
}
/// Builds a binary tree, the root is the last node, the leafs are at the start. /// Builds a binary tree, the root is the last node, the leafs are at the start.
fn build_tree(table: &HashMap<u8, usize>) -> Vec<Node> { ///
let mut priority_queue: BinaryHeap<Rc<RefCell<Node>>> = table /// Returns the root index.
fn build_tree(tree: &mut [Node; TREE_SIZE], table: &[usize; MAX_SYMBOLS]) -> usize {
let mut priority_queue: BinaryHeap<Node> = table
.iter() .iter()
.map(|(c, v)| Rc::new(RefCell::new(Node::new(*c, *v)))) .enumerate()
.map(|(index, v)| Node::new(index, *v))
.collect(); .collect();
let mut tree: Vec<Rc<RefCell<Node>>> = Vec::with_capacity(priority_queue.len() * 2); let mut tree_index = 256;
// Handle case where the frequency table has only 1 value.
if priority_queue.len() == 1 {
let shared_node = priority_queue.pop().unwrap();
let mut node = shared_node.borrow_mut();
node.index = Some(tree.len());
tree.push(shared_node.clone());
let parent = Node {
data: None,
count: node.count,
left: node.index,
right: None,
parent: None,
index: Some(tree.len()),
};
node.parent = parent.index;
let parent = Rc::new(RefCell::new(parent));
tree.push(parent);
}
while priority_queue.len() > 1 { while priority_queue.len() > 1 {
let shared_node1 = priority_queue.pop().unwrap(); let node1 = priority_queue.pop().unwrap();
let shared_node2 = priority_queue.pop().unwrap(); let node2 = priority_queue.pop().unwrap();
let mut node1 = shared_node1.borrow_mut(); tree[node1.index] = node1;
if node1.index.is_none() { tree[node2.index] = node2;
node1.index = Some(tree.len()); tree[node1.index].parent = Some(tree_index);
tree.push(shared_node1.clone()); tree[node2.index].parent = Some(tree_index);
}
let mut node2 = shared_node2.borrow_mut();
if node2.index.is_none() {
node2.index = Some(tree.len());
tree.push(shared_node2.clone());
}
let parent_index = tree.len();
node1.parent = Some(parent_index);
node2.parent = Some(parent_index);
let parent = Node { let parent = Node {
data: None,
count: node1.count + node2.count, count: node1.count + node2.count,
left: node1.index, left: Some(node1.index),
right: node2.index, right: Some(node2.index),
parent: None, parent: None,
index: Some(parent_index), index: tree_index,
}; };
tree[tree_index] = parent;
tree_index += 1;
let parent = Rc::new(RefCell::new(parent));
tree.push(parent.clone());
priority_queue.push(parent); priority_queue.push(parent);
} }
tree.into_iter().map(|x| *x.borrow()).collect() priority_queue.pop().unwrap().index
} }
// Recursively walk to the root and back to calculate the bits. // Recursively walk to the root and back to calculate the bits.
@ -223,9 +181,7 @@ impl Huffman {
for b in data.iter() { for b in data.iter() {
self.traverse( self.traverse(
&mut bits, &mut bits,
*self.indexes.get(b).unwrap_or_else(|| { *b as usize,
panic!("frequency table did not contain this byte: {:?}", b)
}),
None, None,
) )
} }
@ -241,13 +197,12 @@ impl Huffman {
let bits = BitVec::from_bytes(data); let bits = BitVec::from_bytes(data);
let mut decompressed = Vec::with_capacity(bits.len() * 2); let mut decompressed = Vec::with_capacity(bits.len() * 2);
let root_index = self.tree.len() - 1; let byte_count = self.tree[self.root_index].count;
let byte_count = self.tree[root_index].count;
let mut bits_iter = bits.iter(); let mut bits_iter = bits.iter();
for _ in 0..byte_count { for _ in 0..byte_count {
let mut index = root_index; let mut index = self.root_index;
while self.tree[index].left.is_some() || self.tree[index].right.is_some() { while self.tree[index].left.is_some() || self.tree[index].right.is_some() {
let bit = bits_iter.next().expect("missing data"); let bit = bits_iter.next().expect("missing data");
@ -258,7 +213,7 @@ impl Huffman {
} }
} }
decompressed.push(self.tree[index].data.expect("should have data")); decompressed.push(index as u8);
} }
decompressed decompressed
@ -296,22 +251,25 @@ mod tests {
#[test] #[test]
fn create_freq_table() { fn create_freq_table() {
let table = Huffman::calculate_freq_table(&[0]); let mut table = [0; MAX_SYMBOLS];
Huffman::calculate_freq_table(&mut table, &[0u8]);
assert_eq!(table.len(), 1); assert_eq!(table[0], 1);
assert_eq!(*table.get(&0).unwrap(), 1);
let table = Huffman::calculate_freq_table(&[0, 1, 2, 2, 3, 3, 3]);
assert_eq!(table.len(), 4);
assert_eq!(*table.get(&0).unwrap(), 1);
assert_eq!(*table.get(&1).unwrap(), 1);
assert_eq!(*table.get(&2).unwrap(), 2);
assert_eq!(*table.get(&3).unwrap(), 3);
} }
#[test] #[test]
fn test_payload_size_1() { fn payload_size_0() {
let payload = &[];
let huffman = Huffman::new_from_data(payload);
let compressed = huffman.compress(payload);
let decompressed = huffman.decompress(&compressed);
assert_eq!(&payload[..], decompressed)
}
#[test]
fn payload_size_1() {
let payload = &[0u8]; let payload = &[0u8];
let huffman = Huffman::new_from_data(payload); let huffman = Huffman::new_from_data(payload);
@ -334,10 +292,12 @@ mod tests {
#[test] #[test]
fn proptest_freq_table(data: Vec<u8>) { fn proptest_freq_table(data: Vec<u8>) {
let table = Huffman::calculate_freq_table(&data); let mut table = [0; MAX_SYMBOLS];
Huffman::calculate_freq_table(&mut table, &data);
for b in &data { for b in data {
prop_assert!(table.get(b).is_some()); prop_assert!(table.get(b as usize).is_some());
prop_assert!(table[b as usize] > 0);
} }
} }
} }