fix(index): don’t crash when trying to parse exiftool output for a badly encoded file

pull/93/head
Romain 7 years ago
parent a2042f3128
commit d0bbf94b1c

@ -0,0 +1,13 @@
<?xpacket begin='' id='BADENCODING'?>
<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='Image::ExifTool 10.20'>
<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
<rdf:Description rdf:about='' xmlns:dc='http://purl.org/dc/elements/1.1/'>
<dc:title>
<rdf:Alt>
<rdf:li xml:lang='x-default'>?,n+.!??.?<3F><><EFBFBD><EFBFBD>?b?/?5q1V130?-?(????=?D?:s6k?325n6?2.2?,p?.?K??q?z</rdf:li>
</rdf:Alt>
</dc:title>
</rdf:Description>
</rdf:RDF>
</x:xmpmeta>
<?xpacket end='w'?>

12
package-lock.json generated

@ -1981,8 +1981,7 @@
"inherits": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz",
"integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=",
"dev": true
"integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4="
},
"ini": {
"version": "1.3.5",
@ -3519,8 +3518,7 @@
"process-nextick-args": {
"version": "1.0.7",
"resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-1.0.7.tgz",
"integrity": "sha1-FQ4gt1ZZCtP5EJPyWk8q2L/zC6M=",
"dev": true
"integrity": "sha1-FQ4gt1ZZCtP5EJPyWk8q2L/zC6M="
},
"progress": {
"version": "1.1.8",
@ -3632,7 +3630,6 @@
"version": "2.3.3",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.3.tgz",
"integrity": "sha512-m+qzzcn7KUxEmd1gMbchF+Y2eIUbieUaxkWtptyHywrX0rE8QEYqPC07Vuy4Wm32/xE16NcdBctb8S0Xe/5IeQ==",
"dev": true,
"requires": {
"core-util-is": "1.0.2",
"inherits": "2.0.3",
@ -4333,7 +4330,6 @@
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.0.3.tgz",
"integrity": "sha512-4AH6Z5fzNNBcH+6XDMfA/BTt87skxqJlO0lAh3Dker5zThcAxG6mKz+iGu308UKoPPQ8Dcqx/4JhujzltRa+hQ==",
"dev": true,
"requires": {
"safe-buffer": "5.1.1"
}
@ -4455,7 +4451,6 @@
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/through2/-/through2-2.0.3.tgz",
"integrity": "sha1-AARWmzfHx0ujnEPzzteNGtlBQL4=",
"dev": true,
"requires": {
"readable-stream": "2.3.3",
"xtend": "4.0.1"
@ -4772,8 +4767,7 @@
"util-deprecate": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
"integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=",
"dev": true
"integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8="
},
"uuid": {
"version": "3.1.0",

@ -47,6 +47,7 @@
"micromatch": "^3.1.4",
"moment": "^2.19.2",
"readdir-enhanced": "^2.0.0",
"through2": "^2.0.3",
"thumbsup-downsize": "^1.0.0",
"url-join": "^2.0.2",
"video.js": "^6.2.8",

@ -1,7 +1,8 @@
const _ = require('lodash')
const os = require('os')
const debug = require('debug')('thumbsup:debug')
const es = require('event-stream')
const exiftool = require('./stream.js')
const os = require('os')
/*
Fans out the list of files to multiple exiftool processes (= CPU count)
@ -11,8 +12,10 @@ exports.parse = (rootFolder, filePaths) => {
// create several buckets of work
const workers = os.cpus().length
const buckets = _.chunk(filePaths, Math.ceil(filePaths.length / workers))
debug(`Split files into ${buckets.length} batches for exiftool`)
// create several <exiftool> streams that can work in parallel
const streams = _.range(buckets.length).map(i => {
debug(`Calling exiftool with ${buckets[i].length} files`)
return exiftool.parse(rootFolder, buckets[i])
})
// merge the object streams

@ -3,6 +3,7 @@ const debug = require('debug')('thumbsup:debug')
const error = require('debug')('thumbsup:error')
const es = require('event-stream')
const JSONStream = require('JSONStream')
const through2 = require('through2')
/*
Spawn a single <exiftool> process and send all the files to be parsed
@ -44,6 +45,13 @@ exports.parse = (rootFolder, filePaths) => {
// parse every top-level object and emit it on the stream
return es.pipeline(
child.stdout,
through2(chunkToString),
JSONStream.parse([true])
)
}
function chunkToString (chunk, enc, callback) {
// convert to string to help JSONStream deal with odd encodings
this.push(chunk.toString())
callback()
}

@ -6,7 +6,7 @@ const should = require('should/as-function')
// Find all test photos
const folder = path.join(__dirname, '..', '..', '..', 'fixtures')
const files = readdir.sync(folder, {
filter: stats => stats.isFile() && stats.path.charAt(0) !== '.',
filter: stats => stats.isFile() && path.extname(stats.path) === '.jpg',
deep: true
})
@ -26,4 +26,18 @@ describe('exiftool', function () {
done()
})
})
it('can process badly encoded fields', (done) => {
// here we test with an XMP file because it's easier to see what's wrong
// but the problem will more likely be with a badly encoded XMP section inside a JPG file
// note: use <vi> to edit <bad-encoding.xmp> if required, to avoid converting it to UTF
const stream = exiftool.parse(folder, ['bad-encoding.xmp'])
const processed = []
stream.on('data', entry => {
processed.push(entry.SourceFile)
})
.on('end', () => {
should(processed).eql(['bad-encoding.xmp'])
done()
})
})
})

Loading…
Cancel
Save