Fix truncation when resuming

There already was code that looks like it was supposed to truncate files, but it calculated the index wrong and didn't properly check all lines. It worked out, though, because it didn't actually call the truncate function.

Now, truncation occurs to the last `</page>` tag. If the XML file ends with a `</page>` tag, then nothing gets truncated. The page is added after that; if nothing was truncated, this will result in the same page being listed twice (which already happened with the missing truncation), but if truncation did happen then the file should no longer be invalid.
pull/436/head
Pokechu22 2 years ago
parent 43945c467f
commit 9b2c6e40ae

@ -1155,21 +1155,19 @@ def reverse_readline(filename, buf_size=8192, truncate=False):
lines[-1] += segment
else:
if truncate and '</page>' in segment:
pages = buffer.split('</page>')
fh.seek(-offset+buf_size-len(pages[-1]), os.SEEK_END)
fh.truncate
fh.seek(-offset+buffer.rindex('</page>')+len('</page>\n'), os.SEEK_END)
fh.truncate()
raise StopIteration
else:
yield segment.decode('utf-8')
segment = lines[0]
for index in range(len(lines) - 1, 0, -1):
segment = lines[index]
if truncate and '</page>' in segment:
pages = buffer.split('</page>')
fh.seek(-offset-len(pages[-1]), os.SEEK_END)
fh.truncate
fh.seek(-offset+buffer.rindex('</page>\n')+len('</page>\n'), os.SEEK_END)
fh.truncate()
raise StopIteration
else:
yield lines[index].decode('utf-8')
yield segment.decode('utf-8')
yield segment.decode('utf-8')
def saveImageNames(config={}, images=[], session=None):

Loading…
Cancel
Save