More comments.

7 years ago · 2bba0ca4c8
parent 470ffe448d
commit 2bba0ca4c8
1 changed files with 48 additions and 3 deletions
--- a/src/read.rs
+++ b/src/read.rs
@ -10,21 +10,58 @@ use write::IndexFileWriter;

 /// A `IndexFileReader` does a single linear pass over an index file from
 /// beginning to end. Needless to say, this is not how an index is normally
-/// used! This is only used when merging multiple index files.
+/// used! This is used only when merging multiple index files.
+///
+/// The only way to advance through the file is to use the `.move_entry_to()`
+/// method.
 pub struct IndexFileReader {
+    /// Reader that reads the actual index data.
+    ///
+    /// We have two readers. The index data is most of the file. There's also a
+    /// table of contents, stored separately at the end. We have to read them
+    /// in tandem, so we open the file twice.
    main: BufReader<File>,
+
+    /// Reader that reads the table of contents. (Since this table is stored at
+    /// the end of the file, we have to begin by `seek`ing to it; see the code
+    /// in `IndexFileReader::open_and_delete`.)
    contents: BufReader<File>,
+
+    /// The next entry in the table of contents, if any; or `None` if we've
+    /// reached the end of the table. `IndexFileReader` always reads ahead one
+    /// entry in the contents and stores it here.
    next: Option<Entry>
 }

+/// An entry in the table of contents of an index file.
+///
+/// Each entry in the table of contents is small. It consists of a string, the
+/// `term`; summary information about that term, as used in the corpus (`df`);
+/// and a pointer to bulkier data that tells more (`offset` and `nbytes`).
 pub struct Entry {
+    /// The term is a word that appears in one or more documents in the corpus.
+    /// The index file contains information about the documents that use this
+    /// word.
    pub term: String,
+
+    /// Total number of documents in the corpus that contain this term.
    pub df: u32,
+
+    /// Offset of the index data for this term from the beginning of the file, in bytes.
    pub offset: u64,
+
+    /// Length of the index data for this term, in bytes.
    pub nbytes: u64
 }

 impl IndexFileReader {
+    /// Open an index file to read it from beginning to end.
+    ///
+    /// This deletes the file, which may not work properly on Windows. Patches
+    /// welcome!  On Unix, it works like this: the file immediately disappears
+    /// from its directory, but it'll still take up space on disk until the
+    /// file is closed, which normally happens when the `IndexFileReader` is
+    /// dropped.
    pub fn open_and_delete<P: AsRef<Path>>(filename: P) -> io::Result<IndexFileReader> {
        let filename = filename.as_ref();
        let mut main_raw = File::open(filename)?;
@ -53,6 +90,9 @@ impl IndexFileReader {
        })
    }

+    /// Read the next entry from the table of contents.
+    ///
+    /// Returns `Ok(None)` if we have reached the end of the file.
    fn read_entry(f: &mut BufReader<File>) -> io::Result<Option<Entry>> {
        // If the first read here fails with `UnexpectedEof`,
        // that's considered a success, with no entry read.
@ -85,8 +125,13 @@ impl IndexFileReader {
        }))
    }

+    /// Borrow a reference to the next entry in the table of contents.
+    /// (Since we always read ahead one entry, this method can't fail.)
+    ///
+    /// Returns `None` if we've reached the end of the file.
    pub fn peek(&self) -> Option<&Entry> { self.next.as_ref() }

+    /// True if the next entry is for the given term.
    pub fn is_at(&self, term: &str) -> bool {
        match self.next {
            Some(ref e) => e.term == term,
@ -94,8 +139,8 @@ impl IndexFileReader {
        }
    }

-    /// Copy the current entry to the specified output stream,
-    /// then read the header for the next entry.
+    /// Copy the current entry to the specified output stream, then read the
+    /// header for the next entry.
    pub fn move_entry_to(&mut self, out: &mut IndexFileWriter) -> io::Result<()> {
        // This block limits the scope of borrowing `self.next` (for `e`),
        // because after this block is over we'll want to assign to `self.next`.