|
|
|
# Requires PyCrypto and python-magic
|
|
|
|
import sys
|
|
|
|
from pathlib import Path
|
|
|
|
from urllib.parse import urlparse
|
|
|
|
import json
|
|
|
|
import magic
|
|
|
|
|
|
|
|
import click
|
|
|
|
import requests
|
|
|
|
|
|
|
|
import database_connection # noqa: F401
|
|
|
|
import decrypt_attachment
|
|
|
|
from matrix_connection import get_download_url
|
|
|
|
from schema import Message
|
|
|
|
|
|
|
|
|
|
|
|
def download_stem(message, prefer_thumbnails):
|
|
|
|
image_url = (message.thumbnail_url if prefer_thumbnails else None) \
|
|
|
|
or message.image_url
|
|
|
|
return urlparse(image_url).path.lstrip('/')
|
|
|
|
|
|
|
|
|
|
|
|
def run_downloads(messages, download_dir, prefer_thumbnails):
|
|
|
|
for msg in messages:
|
|
|
|
image_url = (msg.thumbnail_url if prefer_thumbnails else None) or msg.image_url
|
|
|
|
download_url = get_download_url(image_url).replace('//bitcointechweekly.com','//matrix.bitcointechweekly.com:8448')
|
|
|
|
try:
|
|
|
|
res = requests.head(download_url, verify=False)
|
|
|
|
assert res.status_code == 200
|
|
|
|
print(res.status_code)
|
|
|
|
# mtype, subtype = res.headers['content-type'].split('/', 2)
|
|
|
|
# if mtype != 'image':
|
|
|
|
# print(f"Skipping {download_url}: {res.headers['content-type']}")
|
|
|
|
# continue
|
|
|
|
res = requests.get(download_url, verify=False)
|
|
|
|
# print(res.status_code)
|
|
|
|
assert res.status_code == 200
|
|
|
|
|
|
|
|
filename = (download_dir / download_stem(msg, prefer_thumbnails))
|
|
|
|
print('Downloading', download_url, '->', filename)
|
|
|
|
|
|
|
|
#Encryption details
|
|
|
|
file_meta = msg.content.get('file')
|
|
|
|
key_meta = file_meta['key']['k']
|
|
|
|
iv = file_meta['iv']
|
|
|
|
_hash = file_meta['hashes']['sha256']
|
|
|
|
decrypted = decrypt_attachment.decrypt(res.content, key_meta, _hash, iv)
|
|
|
|
mtype, subtype = magic.from_buffer(decrypted, mime=True).split('/', 2)
|
|
|
|
|
|
|
|
filename = (download_dir / download_stem(msg, prefer_thumbnails)
|
|
|
|
).with_suffix('.' + subtype)
|
|
|
|
|
|
|
|
# metadata_file = (download_dir / download_stem(msg, prefer_thumbnails)
|
|
|
|
# ).with_suffix('.metadata')
|
|
|
|
|
|
|
|
with open(filename, 'wb') as fp:
|
|
|
|
fp.write(decrypted)
|
|
|
|
# with open(metadata_file, 'w') as fp:
|
|
|
|
# json.dump(msg.content, fp)
|
|
|
|
except:
|
|
|
|
e = sys.exc_info()[0]
|
|
|
|
print( "<p>Error downloading '%s' : %s</p>" % (download_url, e) )
|
|
|
|
# break
|
|
|
|
|
|
|
|
|
|
|
|
@click.command()
|
|
|
|
@click.option('--thumbnails/--no-thumbnails', default=True)
|
|
|
|
@click.argument('output', required=False)
|
|
|
|
def download_images(thumbnails, output):
|
|
|
|
"""Download thumbnails."""
|
|
|
|
noun = 'thumbnails' if thumbnails else 'images'
|
|
|
|
download_dir = Path(output or noun)
|
|
|
|
messages = [msg for msg in Message.objects
|
|
|
|
if msg.content.get('msgtype') == 'm.image']
|
|
|
|
download_dir.mkdir(exist_ok=True)
|
|
|
|
current_stems = {p.stem for p in download_dir.glob('*')}
|
|
|
|
new_messages = [msg for msg in messages
|
|
|
|
if download_stem(msg, thumbnails)
|
|
|
|
not in current_stems]
|
|
|
|
# skip_count = len(messages) - len(new_messages)
|
|
|
|
# if skip_count:
|
|
|
|
# print(f"Skipping {skip_count} already-downloaded {noun}")
|
|
|
|
if new_messages:
|
|
|
|
print(f"Downloading {len(new_messages)} new {noun}...")
|
|
|
|
else:
|
|
|
|
print("Nothing to do")
|
|
|
|
run_downloads(new_messages, download_dir, prefer_thumbnails=thumbnails)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
download_images()
|