InMemoryIndex: use default trait

pull/3/head
Horki 3 years ago
parent 1bd7691d47
commit e1f5214930

@ -4,8 +4,8 @@
//! `InMemoryIndex` can be used to do that, up to the size of the machine's
//! memory.
use std::collections::HashMap;
use byteorder::{LittleEndian, WriteBytesExt};
use std::collections::HashMap;
/// Break a string into words.
fn tokenize(text: &str) -> Vec<&str> {
@ -21,6 +21,7 @@ fn tokenize(text: &str) -> Vec<&str> {
/// answer simple search queries. And you can use the `read`, `write`, and
/// `merge` modules to save an in-memory index to disk and merge it with other
/// indices, producing a large index.
#[derive(Default)]
pub struct InMemoryIndex {
/// The total number of words in the indexed documents.
pub word_count: usize,
@ -34,7 +35,7 @@ pub struct InMemoryIndex {
/// document id in increasing order. This is handy for some algorithms you
/// might want to run on the index, so we preserve this property wherever
/// possible.
pub map: HashMap<String, Vec<Hit>>
pub map: HashMap<String, Vec<Hit>>,
}
/// A `Hit` indicates that a particular document contains some term, how many
@ -48,10 +49,7 @@ pub type Hit = Vec<u8>;
impl InMemoryIndex {
/// Create a new, empty index.
pub fn new() -> InMemoryIndex {
InMemoryIndex {
word_count: 0,
map: HashMap::new()
}
InMemoryIndex::default()
}
/// Index a single document.
@ -64,20 +62,22 @@ impl InMemoryIndex {
let text = text.to_lowercase();
let tokens = tokenize(&text);
for (i, token) in tokens.iter().enumerate() {
let hits =
index.map
.entry(token.to_string())
.or_insert_with(|| {
let mut hits = Vec::with_capacity(4 + 4);
hits.write_u32::<LittleEndian>(document_id).unwrap();
vec![hits]
});
let hits = index.map.entry(token.to_string()).or_insert_with(|| {
let mut hits = Vec::with_capacity(4 + 4);
hits.write_u32::<LittleEndian>(document_id).unwrap();
vec![hits]
});
hits[0].write_u32::<LittleEndian>(i as u32).unwrap();
index.word_count += 1;
}
if document_id % 100 == 0 {
println!("indexed document {}, {} bytes, {} words", document_id, text.len(), index.word_count);
println!(
"indexed document {}, {} bytes, {} words",
document_id,
text.len(),
index.word_count
);
}
index
@ -90,9 +90,7 @@ impl InMemoryIndex {
/// `*self` remains sorted by document id after merging.
pub fn merge(&mut self, other: InMemoryIndex) {
for (term, hits) in other.map {
self.map.entry(term)
.or_insert_with(|| vec![])
.extend(hits)
self.map.entry(term).or_insert_with(|| vec![]).extend(hits)
}
self.word_count += other.word_count;
}

Loading…
Cancel
Save