fix: clippy lints

Signed-off-by: simonsan <14062932+simonsan@users.noreply.github.com>
3 months ago · e538dfe98b
parent 604ac59393
commit e538dfe98b
5 changed files with 96 additions and 85 deletions
--- a/src/index.rs
+++ b/src/index.rs
@ -4,8 +4,8 @@
 //! `InMemoryIndex` can be used to do that, up to the size of the machine's
 //! memory.

-use std::collections::HashMap;
 use byteorder::{LittleEndian, WriteBytesExt};
+use std::collections::HashMap;

 /// Break a string into words.
 fn tokenize(text: &str) -> Vec<&str> {
@ -34,7 +34,7 @@ pub struct InMemoryIndex {
    /// document id in increasing order. This is handy for some algorithms you
    /// might want to run on the index, so we preserve this property wherever
    /// possible.
-    pub map: HashMap<String, Vec<Hit>>
+    pub map: HashMap<String, Vec<Hit>>,
 }

 /// A `Hit` indicates that a particular document contains some term, how many
@ -47,37 +47,39 @@ pub type Hit = Vec<u8>;

 impl InMemoryIndex {
    /// Create a new, empty index.
-    pub fn new() -> InMemoryIndex {
-        InMemoryIndex {
+    pub fn new() -> Self {
+        Self {
            word_count: 0,
-            map: HashMap::new()
+            map: HashMap::new(),
        }
    }

    /// Index a single document.
    ///
    /// The resulting index contains exactly one `Hit` per term.
-    pub fn from_single_document(document_id: usize, text: String) -> InMemoryIndex {
+    pub fn from_single_document(document_id: usize, text: String) -> Self {
        let document_id = document_id as u32;
-        let mut index = InMemoryIndex::new();
+        let mut index = Self::new();

        let text = text.to_lowercase();
        let tokens = tokenize(&text);
        for (i, token) in tokens.iter().enumerate() {
-            let hits =
-                index.map
-                .entry(token.to_string())
-                .or_insert_with(|| {
-                    let mut hits = Vec::with_capacity(4 + 4);
-                    hits.write_u32::<LittleEndian>(document_id).unwrap();
-                    vec![hits]
-                });
+            let hits = index.map.entry((*token).to_string()).or_insert_with(|| {
+                let mut hits = Vec::with_capacity(4 + 4);
+                hits.write_u32::<LittleEndian>(document_id).unwrap();
+                vec![hits]
+            });
            hits[0].write_u32::<LittleEndian>(i as u32).unwrap();
            index.word_count += 1;
        }

        if document_id % 100 == 0 {
-            println!("indexed document {}, {} bytes, {} words", document_id, text.len(), index.word_count);
+            println!(
+                "indexed document {}, {} bytes, {} words",
+                document_id,
+                text.len(),
+                index.word_count
+            );
        }

        index
@ -88,11 +90,9 @@ impl InMemoryIndex {
    /// If both `*self` and `other` are sorted by document id, and all document
    /// ids in `other` are greater than every document id in `*self`, then
    /// `*self` remains sorted by document id after merging.
-    pub fn merge(&mut self, other: InMemoryIndex) {
+    pub fn merge(&mut self, other: Self) {
        for (term, hits) in other.map {
-            self.map.entry(term)
-                .or_insert_with(|| vec![])
-                .extend(hits)
+            self.map.entry(term).or_default().extend(hits);
        }
        self.word_count += other.word_count;
    }
--- a/src/merge.rs
+++ b/src/merge.rs
@ -3,27 +3,27 @@ use std::io::{self, BufWriter};
 use std::mem;
 use std::path::{Path, PathBuf};

-use crate::tmp::TmpDir;
 use crate::read::IndexFileReader;
+use crate::tmp::TmpDir;
 use crate::write::IndexFileWriter;

 pub struct FileMerge {
    output_dir: PathBuf,
    tmp_dir: TmpDir,
-    stacks: Vec<Vec<PathBuf>>
+    stacks: Vec<Vec<PathBuf>>,
 }

 // How many files to merge at a time, at most.
 const NSTREAMS: usize = 8;

-const MERGED_FILENAME: &'static str = "index.dat";
+const MERGED_FILENAME: &str = "index.dat";

 impl FileMerge {
-    pub fn new(output_dir: &Path) -> FileMerge {
-        FileMerge {
+    pub fn new(output_dir: &Path) -> Self {
+        Self {
            output_dir: output_dir.to_owned(),
-            tmp_dir: TmpDir::new(output_dir.to_owned()),
-            stacks: vec![]
+            tmp_dir: TmpDir::new(output_dir),
+            stacks: vec![],
        }
    }

@ -63,22 +63,20 @@ impl FileMerge {
        }
        assert!(tmp.len() <= 1);
        match tmp.pop() {
-            Some(last_file) =>
-                fs::rename(last_file, self.output_dir.join(MERGED_FILENAME)),
-            None =>
-                Err(io::Error::new(io::ErrorKind::Other,
-                                   "no documents were parsed or none contained any words"))
+            Some(last_file) => fs::rename(last_file, self.output_dir.join(MERGED_FILENAME)),
+            None => Err(io::Error::new(
+                io::ErrorKind::Other,
+                "no documents were parsed or none contained any words",
+            )),
        }
    }
 }

-fn merge_streams(files: Vec<PathBuf>, out: BufWriter<File>)
-    -> io::Result<()>
-{
-    let mut streams: Vec<IndexFileReader> =
-        files.into_iter()
-            .map(IndexFileReader::open_and_delete)
-            .collect::<io::Result<_>>()?;
+fn merge_streams(files: Vec<PathBuf>, out: BufWriter<File>) -> io::Result<()> {
+    let mut streams: Vec<IndexFileReader> = files
+        .into_iter()
+        .map(IndexFileReader::open_and_delete)
+        .collect::<io::Result<_>>()?;

    let mut output = IndexFileWriter::new(out)?;

@ -113,8 +111,8 @@ fn merge_streams(files: Vec<PathBuf>, out: BufWriter<File>)
                }
            }
        }
-        output.write_contents_entry(term, df, point, nbytes as u64);
-        point += nbytes as u64;
+        output.write_contents_entry(term, df, point, nbytes);
+        point += nbytes;
    }

    assert!(streams.iter().all(|s| s.peek().is_none()));
--- a/src/read.rs
+++ b/src/read.rs
@ -1,12 +1,12 @@
 //! Reading index files linearly from disk, a capability needed for merging
 //! index files.

+use crate::write::IndexFileWriter;
+use byteorder::{LittleEndian, ReadBytesExt};
 use std::fs::{self, File};
 use std::io::prelude::*;
 use std::io::{self, BufReader, SeekFrom};
 use std::path::Path;
-use byteorder::{LittleEndian, ReadBytesExt};
-use crate::write::IndexFileWriter;

 /// A `IndexFileReader` does a single linear pass over an index file from
 /// beginning to end. Needless to say, this is not how an index is normally
@ -30,7 +30,7 @@ pub struct IndexFileReader {
    /// The next entry in the table of contents, if any; or `None` if we've
    /// reached the end of the table. `IndexFileReader` always reads ahead one
    /// entry in the contents and stores it here.
-    next: Option<Entry>
+    next: Option<Entry>,
 }

 /// An entry in the table of contents of an index file.
@ -51,7 +51,7 @@ pub struct Entry {
    pub offset: u64,

    /// Length of the index data for this term, in bytes.
-    pub nbytes: u64
+    pub nbytes: u64,
 }

 impl IndexFileReader {
@ -62,31 +62,35 @@ impl IndexFileReader {
    /// from its directory, but it'll still take up space on disk until the
    /// file is closed, which normally happens when the `IndexFileReader` is
    /// dropped.
-    pub fn open_and_delete<P: AsRef<Path>>(filename: P) -> io::Result<IndexFileReader> {
+    pub fn open_and_delete<P: AsRef<Path>>(filename: P) -> io::Result<Self> {
        let filename = filename.as_ref();
        let mut main_raw = File::open(filename)?;

        // Read the file header.
        let contents_offset = main_raw.read_u64::<LittleEndian>()?;
-        println!("opened {}, table of contents starts at {}", filename.display(), contents_offset);
+        println!(
+            "opened {}, table of contents starts at {}",
+            filename.display(),
+            contents_offset
+        );

        // Open again so we have two read heads;
        // move the contents read head to its starting position.
        // Set up buffering.
        let mut contents_raw = File::open(filename)?;
-        contents_raw.seek(SeekFrom::Start(contents_offset))?;
+        let _start = contents_raw.seek(SeekFrom::Start(contents_offset))?;
        let main = BufReader::new(main_raw);
        let mut contents = BufReader::new(contents_raw);

        // We always read ahead one entry, so load the first entry right away.
-        let first = IndexFileReader::read_entry(&mut contents)?;
+        let first = Self::read_entry(&mut contents)?;

-        fs::remove_file(filename)?;  // YOLO
+        fs::remove_file(filename)?; // YOLO

-        Ok(IndexFileReader {
-            main: main,
-            contents: contents,
-            next: first
+        Ok(Self {
+            main,
+            contents,
+            next: first,
        })
    }

@ -98,30 +102,30 @@ impl IndexFileReader {
        // that's considered a success, with no entry read.
        let offset = match f.read_u64::<LittleEndian>() {
            Ok(value) => value,
-            Err(err) =>
+            Err(err) => {
                if err.kind() == io::ErrorKind::UnexpectedEof {
-                    return Ok(None)
+                    return Ok(None);
                } else {
-                    return Err(err)
+                    return Err(err);
                }
+            }
        };

        let nbytes = f.read_u64::<LittleEndian>()?;
        let df = f.read_u32::<LittleEndian>()?;
        let term_len = f.read_u32::<LittleEndian>()? as usize;
-        let mut bytes = Vec::with_capacity(term_len);
-        bytes.resize(term_len, 0);
+        let mut bytes = vec![0; term_len];
        f.read_exact(&mut bytes)?;
        let term = match String::from_utf8(bytes) {
            Ok(s) => s,
-            Err(_) => return Err(io::Error::new(io::ErrorKind::Other, "unicode fail"))
+            Err(_) => return Err(io::Error::new(io::ErrorKind::Other, "unicode fail")),
        };

        Ok(Some(Entry {
-            term: term,
-            df: df,
-            offset: offset,
-            nbytes: nbytes
+            term,
+            df,
+            offset,
+            nbytes,
        }))
    }

@ -129,13 +133,15 @@ impl IndexFileReader {
    /// (Since we always read ahead one entry, this method can't fail.)
    ///
    /// Returns `None` if we've reached the end of the file.
-    pub fn peek(&self) -> Option<&Entry> { self.next.as_ref() }
+    pub fn peek(&self) -> Option<&Entry> {
+        self.next.as_ref()
+    }

    /// True if the next entry is for the given term.
    pub fn is_at(&self, term: &str) -> bool {
        match self.next {
            Some(ref e) => e.term == term,
-            None => false
+            None => false,
        }
    }

@ -148,11 +154,12 @@ impl IndexFileReader {
            let e = self.next.as_ref().expect("no entry to move");
            if e.nbytes > usize::max_value() as u64 {
                // This can only happen on 32-bit platforms.
-                return Err(io::Error::new(io::ErrorKind::Other,
-                                          "computer not big enough to hold index entry"));
+                return Err(io::Error::new(
+                    io::ErrorKind::Other,
+                    "computer not big enough to hold index entry",
+                ));
            }
-            let mut buf = Vec::with_capacity(e.nbytes as usize);
-            buf.resize(e.nbytes as usize, 0);
+            let mut buf = vec![0; e.nbytes as usize];
            self.main.read_exact(&mut buf)?;
            out.write_main(&buf)?;
        }
--- a/src/tmp.rs
+++ b/src/tmp.rs
@ -9,8 +9,8 @@ pub struct TmpDir {
 }

 impl TmpDir {
-    pub fn new<P: AsRef<Path>>(dir: P) -> TmpDir {
-        TmpDir {
+    pub fn new<P: AsRef<Path>>(dir: P) -> Self {
+        Self {
            dir: dir.as_ref().to_owned(),
            n: 1
        }
--- a/src/write.rs
+++ b/src/write.rs
@ -1,10 +1,10 @@
-use std::fs::File;
-use std::io::{self, BufWriter, SeekFrom};
-use std::io::prelude::*;
-use std::path::PathBuf;
 use crate::index::InMemoryIndex;
 use crate::tmp::TmpDir;
 use byteorder::{LittleEndian, WriteBytesExt};
+use std::fs::File;
+use std::io::prelude::*;
+use std::io::{self, BufWriter, SeekFrom};
+use std::path::PathBuf;

 /// Writer for saving an index to a binary file.
 ///
@ -23,17 +23,17 @@ pub struct IndexFileWriter {
    writer: BufWriter<File>,

    /// The table of contents for this file.
-    contents_buf: Vec<u8>
+    contents_buf: Vec<u8>,
 }

 impl IndexFileWriter {
-    pub fn new(mut f: BufWriter<File>) -> io::Result<IndexFileWriter> {
+    pub fn new(mut f: BufWriter<File>) -> io::Result<Self> {
        const HEADER_SIZE: u64 = 8;
        f.write_u64::<LittleEndian>(0)?;
-        Ok(IndexFileWriter {
+        Ok(Self {
            offset: HEADER_SIZE,
            writer: f,
-            contents_buf: vec![]
+            contents_buf: vec![],
        })
    }

@ -48,7 +48,9 @@ impl IndexFileWriter {
        self.contents_buf.write_u64::<LittleEndian>(nbytes).unwrap();
        self.contents_buf.write_u32::<LittleEndian>(df).unwrap();
        let bytes = term.bytes();
-        self.contents_buf.write_u32::<LittleEndian>(bytes.len() as u32).unwrap();
+        self.contents_buf
+            .write_u32::<LittleEndian>(bytes.len() as u32)
+            .unwrap();
        self.contents_buf.extend(bytes);
    }

@ -56,8 +58,12 @@ impl IndexFileWriter {
    pub fn finish(mut self) -> io::Result<()> {
        let contents_start = self.offset;
        self.writer.write_all(&self.contents_buf)?;
-        println!("{} bytes main, {} bytes total", contents_start, contents_start + self.contents_buf.len() as u64);
-        self.writer.seek(SeekFrom::Start(0))?;
+        println!(
+            "{} bytes main, {} bytes total",
+            contents_start,
+            contents_start + self.contents_buf.len() as u64
+        );
+        let _start = self.writer.seek(SeekFrom::Start(0))?;
        self.writer.write_u64::<LittleEndian>(contents_start)?;
        Ok(())
    }
@ -70,7 +76,7 @@ pub fn write_index_to_tmp_file(index: InMemoryIndex, tmp_dir: &mut TmpDir) -> io
    // The merge algorithm requires the entries within each file to be sorted by term.
    // Sort before writing anything.
    let mut index_as_vec: Vec<_> = index.map.into_iter().collect();
-    index_as_vec.sort_by(|&(ref a, _), &(ref b, _)| a.cmp(b));
+    index_as_vec.sort_by(|(a, _), (b, _)| a.cmp(b));

    for (term, hits) in index_as_vec {
        let df = hits.len() as u32;
@ -83,6 +89,6 @@ pub fn write_index_to_tmp_file(index: InMemoryIndex, tmp_dir: &mut TmpDir) -> io
    }

    writer.finish()?;
-    println!("wrote file {:?}", filename);
+    println!("wrote file {filename:?}");
    Ok(filename)
 }