From 9bda0a4835a7225b9823b3f23dcdef5e8a0c2a1f Mon Sep 17 00:00:00 2001 From: Romain Date: Wed, 23 Jan 2019 22:48:54 +0100 Subject: [PATCH] feat(database): new include/exclude arguments to filter the input folder --- bin/options.js | 14 ++ src/components/index/glob.js | 27 ++-- src/components/index/pattern.js | 36 +++++ test/components/index/glob.spec.js | 192 ++++++++++++------------ test/components/index/pattern.spec.js | 206 ++++++++++++++++++++++++++ 5 files changed, 360 insertions(+), 115 deletions(-) create mode 100644 src/components/index/pattern.js create mode 100644 test/components/index/pattern.spec.js diff --git a/bin/options.js b/bin/options.js index bcedef1..aa09133 100644 --- a/bin/options.js +++ b/bin/options.js @@ -43,6 +43,18 @@ const OPTIONS = { type: 'boolean', 'default': false }, + 'include': { + group: 'Input options:', + description: 'Glob pattern of files to include', + type: 'array', + 'default': false + }, + 'exclude': { + group: 'Input options:', + description: 'Glob pattern of files to exclude', + type: 'array', + 'default': false + }, // ------------------------------------ // Output options @@ -324,6 +336,8 @@ exports.get = (args) => { includePhotos: opts['include-photos'], includeVideos: opts['include-videos'], includeRawPhotos: opts['include-raw-photos'], + include: opts['include'], + exclude: opts['exclude'], cleanup: opts['cleanup'], title: opts['title'], thumbSize: opts['thumb-size'], diff --git a/src/components/index/glob.js b/src/components/index/glob.js index 6a56d83..27b8063 100644 --- a/src/components/index/glob.js +++ b/src/components/index/glob.js @@ -1,6 +1,6 @@ -const micromatch = require('micromatch') const readdir = require('readdir-enhanced') const warn = require('debug')('thumbsup:warn') +const GlobPattern = require('./pattern') const PHOTO_EXT = ['bmp', 'gif', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'webp'] const VIDEO_EXT = ['3gp', 'flv', 'm2ts', 'm4v', 'mkv', 'mp4', 'mov', 'mts', 'ogg', 'ogv', 'webm'] @@ -15,10 +15,14 @@ const RAW_PHOTO_EXT = [ */ exports.find = function (rootFolder, options, callback) { const entries = {} - const pattern = exports.globPattern(options) + const pattern = new GlobPattern({ + include: (options.include && options.include.length > 0) ? options.include : '**/**', + exclude: options.exclude || [], + extensions: exports.supportedExtensions(options) + }) const stream = readdir.readdirStreamStat(rootFolder, { - filter: entry => micromatch.match(entry.path, pattern, { nocase: true }).length !== 0, - deep: stats => canTraverse(stats.path), + filter: file => pattern.match(file.path), + deep: dir => pattern.canTraverse(dir.path), basePath: '', sep: '/' }) @@ -27,21 +31,10 @@ exports.find = function (rootFolder, options, callback) { stream.on('end', () => callback(null, entries)) } -exports.globPattern = function (options) { +exports.supportedExtensions = function (options) { const extensions = [] if (options.includePhotos !== false) Array.prototype.push.apply(extensions, PHOTO_EXT) if (options.includeVideos !== false) Array.prototype.push.apply(extensions, VIDEO_EXT) if (options.includeRawPhotos) Array.prototype.push.apply(extensions, RAW_PHOTO_EXT) - return '**/*.{' + extensions.join(',') + '}' -} - -function canTraverse (folder) { - // ignore folders starting with '.' - // and thumbnail folders from Synology NAS - // it's better to skip them in the "traverse phase" than to remove them at the end - const match = micromatch.match(folder, '**/**', { - dot: false, - ignore: ['**/@eaDir', '#recycle'] - }) - return match.length > 0 + return extensions } diff --git a/src/components/index/pattern.js b/src/components/index/pattern.js new file mode 100644 index 0000000..aaa5456 --- /dev/null +++ b/src/components/index/pattern.js @@ -0,0 +1,36 @@ +const micromatch = require('micromatch') + +class GlobPattern { + constructor ({ include, exclude, extensions }) { + this.includeList = include + this.excludeList = exclude + this.directoryExcludeList = exclude.concat(['**/@eaDir/**', '#recycle/**']) + this.extensions = extPattern(extensions) + } + + match (filePath) { + const opts = { nocase: true } + return micromatch.any(filePath, this.includeList, opts) && + micromatch.any(filePath, this.excludeList, opts) === false && + micromatch.isMatch(filePath, this.extensions, opts) + } + + // this is used to cull folders early + // instead of traversing them but discard all their files later + canTraverse (folderPath) { + const opts = { dot: false, nocase: true } + const withSlash = `${folderPath}/` + return micromatch.any(withSlash, this.includeList, opts) && + micromatch.any(withSlash, this.directoryExcludeList, opts) === false + } +} + +function extPattern (extensions) { + if (extensions.length === 1) { + return '**/*.' + extensions[0] + } else { + return '**/*.{' + extensions.join(',') + '}' + } +} + +module.exports = GlobPattern diff --git a/test/components/index/glob.spec.js b/test/components/index/glob.spec.js index 9e8934c..ef44964 100644 --- a/test/components/index/glob.spec.js +++ b/test/components/index/glob.spec.js @@ -26,47 +26,37 @@ describe('Index: glob', function () { require('micromatch').match('file.txt', '**/**') }) - it('uses a valid glob pattern to filter files', () => { - const pattern = glob.globPattern({}) - should(pattern).startWith('**/*.{') - should(pattern).endWith('}') - }) - it('can include photo extensions', () => { - const pattern = glob.globPattern({ includePhotos: true, includeVideos: false, includeRawPhotos: false }) - should(pattern.indexOf('jpg')).above(-1) - should(pattern.indexOf('mp4')).eql(-1) - should(pattern.indexOf('cr2')).eql(-1) + const ext = glob.supportedExtensions({ includePhotos: true, includeVideos: false, includeRawPhotos: false }) + should(ext.indexOf('jpg')).above(-1) + should(ext.indexOf('mp4')).eql(-1) + should(ext.indexOf('cr2')).eql(-1) }) it('can include video extensions', () => { - const pattern = glob.globPattern({ includePhotos: false, includeVideos: true, includeRawPhotos: false }) - should(pattern.indexOf('jpg')).eql(-1) - should(pattern.indexOf('mp4')).above(-1) - should(pattern.indexOf('cr2')).eql(-1) + const ext = glob.supportedExtensions({ includePhotos: false, includeVideos: true, includeRawPhotos: false }) + should(ext.indexOf('jpg')).eql(-1) + should(ext.indexOf('mp4')).above(-1) + should(ext.indexOf('cr2')).eql(-1) }) it('can include raw photo extensions', () => { - const pattern = glob.globPattern({ includePhotos: false, includeVideos: false, includeRawPhotos: true }) - should(pattern.indexOf('jpg')).eql(-1) - should(pattern.indexOf('mp4')).eql(-1) - should(pattern.indexOf('cr2')).above(-1) + const ext = glob.supportedExtensions({ includePhotos: false, includeVideos: false, includeRawPhotos: true }) + should(ext.indexOf('jpg')).eql(-1) + should(ext.indexOf('mp4')).eql(-1) + should(ext.indexOf('cr2')).above(-1) }) - it('can list top-level images', (done) => { + it('lists all images by default', (done) => { mock({ 'media/IMG_0001.jpg': '...', 'media/IMG_0002.jpg': '...' }) - glob.find('media', {}, (err, map) => { - if (err) return done(err) - const keys = Object.keys(map).sort() - should(keys).eql([ - 'IMG_0001.jpg', - 'IMG_0002.jpg' - ]) - done() - }) + const options = {} + assertGlobReturns('media', options, [ + 'IMG_0001.jpg', + 'IMG_0002.jpg' + ], done) }) it('can list nested images', (done) => { @@ -74,15 +64,11 @@ describe('Index: glob', function () { 'media/2016/June/IMG_0001.jpg': '...', 'media/2017/IMG_0002.jpg': '...' }) - glob.find('media', {}, (err, map) => { - if (err) return done(err) - const keys = Object.keys(map).sort() - should(keys).eql([ - '2016/June/IMG_0001.jpg', - '2017/IMG_0002.jpg' - ]) - done() - }) + const options = {} + assertGlobReturns('media', options, [ + '2016/June/IMG_0001.jpg', + '2017/IMG_0002.jpg' + ], done) }) it('includes photos and videos by default', (done) => { @@ -90,28 +76,22 @@ describe('Index: glob', function () { 'media/IMG_0001.jpg': '...', 'media/IMG_0002.mp4': '...' }) - glob.find('media', {}, (err, map) => { - if (err) return done(err) - const keys = Object.keys(map).sort() - should(keys).eql([ - 'IMG_0001.jpg', - 'IMG_0002.mp4' - ]) - done() - }) + const options = {} + assertGlobReturns('media', options, [ + 'IMG_0001.jpg', + 'IMG_0002.mp4' + ], done) }) - it('can excludes photos', (done) => { + it('can exclude photos', (done) => { mock({ 'media/IMG_0001.jpg': '...', 'media/IMG_0002.mp4': '...' }) - glob.find('media', { includePhotos: false }, (err, map) => { - if (err) return done(err) - const keys = Object.keys(map).sort() - should(keys).eql(['IMG_0002.mp4']) - done() - }) + const options = { includePhotos: false } + assertGlobReturns('media', options, [ + 'IMG_0002.mp4' + ], done) }) it('can excludes videos', (done) => { @@ -119,12 +99,10 @@ describe('Index: glob', function () { 'media/IMG_0001.jpg': '...', 'media/IMG_0002.mp4': '...' }) - glob.find('media', { includeVideos: false }, (err, map) => { - if (err) return done(err) - const keys = Object.keys(map).sort() - should(keys).eql(['IMG_0001.jpg']) - done() - }) + const options = { includeVideos: false } + assertGlobReturns('media', options, [ + 'IMG_0001.jpg' + ], done) }) it('can include raw photos', (done) => { @@ -132,26 +110,21 @@ describe('Index: glob', function () { 'media/IMG_0001.jpg': '...', 'media/IMG_0002.cr2': '...' }) - glob.find('media', { includeRawPhotos: true }, (err, map) => { - if (err) return done(err) - const keys = Object.keys(map).sort() - should(keys).eql(['IMG_0001.jpg', 'IMG_0002.cr2']) - done() - }) + const options = { includeRawPhotos: true } + assertGlobReturns('media', options, [ + 'IMG_0001.jpg', + 'IMG_0002.cr2' + ], done) }) - it('is case insensitive', (done) => { + it('is case insensitive for the extension', (done) => { mock({ 'media/IMG_0001.JPG': '...' }) - glob.find('media', {}, (err, map) => { - if (err) return done(err) - const keys = Object.keys(map).sort() - should(keys).eql([ - 'IMG_0001.JPG' - ]) - done() - }) + const options = {} + assertGlobReturns('media', options, [ + 'IMG_0001.JPG' + ], done) }) it('ignores any folder starting with a dot', (done) => { @@ -161,15 +134,11 @@ describe('Index: glob', function () { 'media/nested/.private/IMG_0003.jpg': '...', 'media/just/a.dot/IMG_0004.jpg': '...' }) - glob.find('media', {}, (err, map) => { - if (err) return done(err) - const keys = Object.keys(map).sort() - should(keys).eql([ - 'IMG_0001.jpg', - 'just/a.dot/IMG_0004.jpg' - ]) - done() - }) + const options = {} + assertGlobReturns('media', options, [ + 'IMG_0001.jpg', + 'just/a.dot/IMG_0004.jpg' + ], done) }) it('ignores folders called @eaDir (Synology thumbnail folders)', (done) => { @@ -177,14 +146,10 @@ describe('Index: glob', function () { 'media/holidays/IMG_0001.jpg': '...', 'media/holidays/@eaDir/IMG_0001.jpg': '...' }) - glob.find('media', {}, (err, map) => { - if (err) return done(err) - const keys = Object.keys(map).sort() - should(keys).eql([ - 'holidays/IMG_0001.jpg' - ]) - done() - }) + const options = {} + assertGlobReturns('media', options, [ + 'holidays/IMG_0001.jpg' + ], done) }) it('ignores root folders called #recycle (Synology recycle bin)', (done) => { @@ -192,14 +157,36 @@ describe('Index: glob', function () { 'media/holidays/IMG_0001.jpg': '...', 'media/#recycle/IMG_0002.jpg': '...' }) - glob.find('media', {}, (err, map) => { - if (err) return done(err) - const keys = Object.keys(map).sort() - should(keys).eql([ - 'holidays/IMG_0001.jpg' - ]) - done() + const options = {} + assertGlobReturns('media', options, [ + 'holidays/IMG_0001.jpg' + ], done) + }) + + it('can specify an include pattern', (done) => { + mock({ + 'media/work/IMG_0001.jpg': '...', + 'media/holidays/IMG_0002.jpg': '...' }) + const options = { + include: [ 'holidays/**' ] + } + assertGlobReturns('media', options, [ + 'holidays/IMG_0002.jpg' + ], done) + }) + + it('can specify an exclude pattern', (done) => { + mock({ + 'media/work/IMG_0001.jpg': '...', + 'media/holidays/IMG_0002.jpg': '...' + }) + const options = { + exclude: [ 'work/**' ] + } + assertGlobReturns('media', options, [ + 'holidays/IMG_0002.jpg' + ], done) }) it('ignores invalid file names', function (done) { @@ -257,3 +244,12 @@ describe('Index: glob', function () { }) }) }) + +function assertGlobReturns (root, options, expected, done) { + glob.find(root, options, (err, map) => { + if (err) return done(err) + const keys = Object.keys(map).sort() + should(keys).eql(expected) + done() + }) +} diff --git a/test/components/index/pattern.spec.js b/test/components/index/pattern.spec.js new file mode 100644 index 0000000..631be97 --- /dev/null +++ b/test/components/index/pattern.spec.js @@ -0,0 +1,206 @@ +const GlobPattern = require('../../../src/components/index/pattern') +const should = require('should/as-function') + +describe('Index: pattern', function () { + describe('matching files', () => { + it('matches files with the valid extension', () => { + const pattern = new GlobPattern({ + include: ['**/**'], + exclude: [], + extensions: ['jpg'] + }) + should(pattern.match('holidays/IMG_0001.jpg')).eql(true) + }) + + it('matches files with one of the valid extensions', () => { + const pattern = new GlobPattern({ + include: ['**/**'], + exclude: [], + extensions: ['jpg', 'png'] + }) + should(pattern.match('holidays/IMG_0001.png')).eql(true) + }) + + it('rejects files with an invalid extension', () => { + const pattern = new GlobPattern({ + include: ['**/**'], + exclude: [], + extensions: ['jpg'] + }) + should(pattern.match('holidays/IMG_0001.foo')).eql(false) + }) + + it('matches files that meet the include pattern', () => { + const pattern = new GlobPattern({ + include: ['holidays/**'], + exclude: [], + extensions: ['jpg'] + }) + should(pattern.match('holidays/IMG_0001.jpg')).eql(true) + }) + + it('matches files that meet one of the include patterns', () => { + const pattern = new GlobPattern({ + include: ['work/**', 'holidays/**'], + exclude: [], + extensions: ['jpg'] + }) + should(pattern.match('holidays/IMG_0001.jpg')).eql(true) + }) + + it('can include deep sub-directory patterns', () => { + const pattern = new GlobPattern({ + include: ['**/london/**'], + exclude: [], + extensions: ['jpg'] + }) + should(pattern.match('holidays/london/IMG_0001.jpg')).eql(true) + }) + + it('can use a partial filename as an include', () => { + const pattern = new GlobPattern({ + include: ['**/IMG_*'], + exclude: [], + extensions: ['jpg'] + }) + should(pattern.match('holidays/IMG_0001.jpg')).eql(true) + }) + + it('rejects files that dont meet any of the include patterns', () => { + const pattern = new GlobPattern({ + include: ['work/**', 'home/**'], + exclude: [], + extensions: ['jpg'] + }) + should(pattern.match('holidays/IMG_0001.jpg')).eql(false) + }) + + it('matches files that dont meet any exclude patterns', () => { + const pattern = new GlobPattern({ + include: ['**/**'], + exclude: ['home/**', 'work/**'], + extensions: ['jpg'] + }) + should(pattern.match('holidays/IMG_0001.jpg')).eql(true) + }) + + it('rejects files that meet an exclude pattern', () => { + const pattern = new GlobPattern({ + include: ['**/**'], + exclude: ['home/**', 'work/**'], + extensions: ['jpg'] + }) + should(pattern.match('home/IMG_0001.jpg')).eql(false) + }) + + it('exclude are stronger than include', () => { + const pattern = new GlobPattern({ + include: ['home/**'], + exclude: ['**/2016/**'], + extensions: ['jpg'] + }) + should(pattern.match('home/2016/IMG_0001.jpg')).eql(false) + }) + }) + + describe('traversing folders', () => { + it('traverses folders that meet an include pattern', () => { + const pattern = new GlobPattern({ + include: ['holidays/**', 'home/**'], + exclude: [], + extensions: [] + }) + should(pattern.canTraverse('holidays')).eql(true) + }) + + it('traverses nested folders that meet an include pattern', () => { + const pattern = new GlobPattern({ + include: ['holidays/**', 'home/**'], + exclude: [], + extensions: [] + }) + should(pattern.canTraverse('holidays/2016')).eql(true) + }) + + it('traverses folders that meet an include directory', () => { + const pattern = new GlobPattern({ + include: ['holidays/'], + exclude: [], + extensions: [] + }) + should(pattern.canTraverse('holidays')).eql(true) + }) + + it('ignores folders that meet an exclude pattern', () => { + const pattern = new GlobPattern({ + include: ['**/**'], + exclude: ['holidays/**'], + extensions: [] + }) + should(pattern.canTraverse('holidays')).eql(false) + }) + + it('ignores folders that meet an excluded directory name', () => { + const pattern = new GlobPattern({ + include: ['**/**'], + exclude: ['holidays/'], + extensions: [] + }) + should(pattern.canTraverse('holidays')).eql(false) + }) + + it('ignores folders that meet a nested exclude pattern', () => { + const pattern = new GlobPattern({ + include: ['**/**'], + exclude: ['holidays/2016/**'], + extensions: [] + }) + should(pattern.canTraverse('holidays/2016')).eql(false) + }) + + it('ignores folders that meet a wildcard exclude pattern', () => { + const pattern = new GlobPattern({ + include: ['**/**'], + exclude: ['**/2016/**'], + extensions: [] + }) + should(pattern.canTraverse('holidays/2016')).eql(false) + }) + + it('ignores folders starting with a dot', () => { + const pattern = new GlobPattern({ + include: ['**/**'], + exclude: [], + extensions: [] + }) + should(pattern.canTraverse('.git')).eql(false) + }) + + it('ignores nested folders starting with a dot', () => { + const pattern = new GlobPattern({ + include: ['**/**'], + exclude: [], + extensions: [] + }) + should(pattern.canTraverse('test/.git')).eql(false) + }) + + it('ignores nested Synology @eaDir thumbnail folders', () => { + const pattern = new GlobPattern({ + include: ['**/**'], + exclude: [], + extensions: [] + }) + should(pattern.canTraverse('photos/@eaDir')).eql(false) + }) + + it('ignores the Synology recycle bin', () => { + const pattern = new GlobPattern({ + include: ['**/**'], + exclude: [], + extensions: [] + }) + should(pattern.canTraverse('#recycle')).eql(false) + }) + }) +})