implementing filesystem rescan to clean index

This commit is contained in:
simon 2021-10-08 14:56:07 +07:00
parent c2a6ac6f44
commit 64ccd3830e
5 changed files with 46 additions and 5 deletions

View File

@ -297,6 +297,7 @@ class FilesystemScanner:
"""rename media files as identified by find_bad_media_url""" """rename media files as identified by find_bad_media_url"""
for bad_filename in self.to_rename: for bad_filename in self.to_rename:
channel, filename, expected_filename = bad_filename channel, filename, expected_filename = bad_filename
print(f"renaming [{filename}] to [{expected_filename}]")
old_path = os.path.join(self.VIDEOS, channel, filename) old_path = os.path.join(self.VIDEOS, channel, filename)
new_path = os.path.join(self.VIDEOS, channel, expected_filename) new_path = os.path.join(self.VIDEOS, channel, expected_filename)
os.rename(old_path, new_path) os.rename(old_path, new_path)
@ -306,6 +307,7 @@ class FilesystemScanner:
bulk_list = [] bulk_list = []
for video_mismatch in self.mismatch: for video_mismatch in self.mismatch:
youtube_id, media_url = video_mismatch youtube_id, media_url = video_mismatch
print(f"{youtube_id}: fixing media url {media_url}")
action = {"update": {"_id": youtube_id, "_index": "ta_video"}} action = {"update": {"_id": youtube_id, "_index": "ta_video"}}
source = {"doc": {"media_url": media_url}} source = {"doc": {"media_url": media_url}}
bulk_list.append(json.dumps(action)) bulk_list.append(json.dumps(action))
@ -323,7 +325,8 @@ class FilesystemScanner:
def delete_from_index(self): def delete_from_index(self):
"""find indexed but deleted mediafile""" """find indexed but deleted mediafile"""
for indexed in self.to_delete: for indexed in self.to_delete:
youtube_id, _ = indexed youtube_id = indexed[0]
print(f"deleting {youtube_id} from index")
url = self.ES_URL + "/ta_video/_doc/" + youtube_id url = self.ES_URL + "/ta_video/_doc/" + youtube_id
request = requests.delete(url) request = requests.delete(url)
if not request.ok: if not request.ok:
@ -456,12 +459,16 @@ def scan_filesystem():
filesystem_handler = FilesystemScanner() filesystem_handler = FilesystemScanner()
filesystem_handler.list_comarison() filesystem_handler.list_comarison()
if filesystem_handler.to_rename: if filesystem_handler.to_rename:
print("renaming files")
filesystem_handler.rename_files() filesystem_handler.rename_files()
if filesystem_handler.mismatch: if filesystem_handler.mismatch:
print("fixing media urls in index")
filesystem_handler.send_mismatch_bulk() filesystem_handler.send_mismatch_bulk()
if filesystem_handler.to_delete: if filesystem_handler.to_delete:
print("delete metadata from index")
filesystem_handler.delete_from_index() filesystem_handler.delete_from_index()
if filesystem_handler.to_index: if filesystem_handler.to_index:
print("index new videos")
for missing_vid in filesystem_handler.to_index: for missing_vid in filesystem_handler.to_index:
youtube_id = missing_vid[2] youtube_id = missing_vid[2]
index_new_video(youtube_id, missing_vid=missing_vid) index_new_video(youtube_id, missing_vid=missing_vid)

View File

@ -11,7 +11,11 @@ from home.src.config import AppConfig
from home.src.download import ChannelSubscription, PendingList, VideoDownloader from home.src.download import ChannelSubscription, PendingList, VideoDownloader
from home.src.helper import RedisArchivist, RedisQueue from home.src.helper import RedisArchivist, RedisQueue
from home.src.index_management import backup_all_indexes, restore_from_backup from home.src.index_management import backup_all_indexes, restore_from_backup
from home.src.reindex import ManualImport, reindex_old_documents from home.src.reindex import (
ManualImport,
reindex_old_documents,
scan_filesystem
)
CONFIG = AppConfig().config CONFIG = AppConfig().config
REDIS_HOST = os.environ.get("REDIS_HOST") REDIS_HOST = os.environ.get("REDIS_HOST")
@ -154,3 +158,9 @@ def kill_dl(task_id):
"message": "", "message": "",
} }
RedisArchivist().set_message("progress:download", mess_dict) RedisArchivist().set_message("progress:download", mess_dict)
@shared_task
def rescan_filesystem():
"""check the media folder for missmatches"""
scan_filesystem()

View File

@ -126,7 +126,11 @@
</div> </div>
</div> </div>
<div class="settings-group"> <div class="settings-group">
<p>Rescan filesystem.</p> <h2>Rescan filesystem</h2>
<i>Coming soon</i> <p><span class="danger-zone">Danger Zone</span>: This will delete the metadata of deleted videos from the filesystem.</p>
<p>Rescan your media folder looking for missing videos and clean up index. More infos on the Github <a href="https://github.com/bbilly1/tubearchivist/wiki/Settings" target="_blank">Wiki</a>.</p>
<div id="fs-rescan">
<button onclick="fsRescan()">Rescan filesystem</button>
</div>
</div> </div>
{% endblock content %} {% endblock content %}

View File

@ -22,6 +22,7 @@ from home.tasks import (
download_single, download_single,
extrac_dl, extrac_dl,
kill_dl, kill_dl,
rescan_filesystem,
run_backup, run_backup,
run_manual_import, run_manual_import,
run_restore_backup, run_restore_backup,
@ -506,6 +507,7 @@ class PostData:
"manual-import": self.manual_import, "manual-import": self.manual_import,
"db-backup": self.db_backup, "db-backup": self.db_backup,
"db-restore": self.db_restore, "db-restore": self.db_restore,
"fs-rescan": self.fs_rescan,
"channel-search": self.channel_search, "channel-search": self.channel_search,
} }
@ -658,6 +660,13 @@ class PostData:
run_restore_backup.delay() run_restore_backup.delay()
return {"success": True} return {"success": True}
@staticmethod
def fs_rescan():
"""start file system rescan task"""
print("start filesystem scan")
rescan_filesystem.delay()
return {"success": True}
def channel_search(self): def channel_search(self):
"""search for channel name as_you_type""" """search for channel name as_you_type"""
search_query = self.exec_val search_query = self.exec_val

View File

@ -156,7 +156,7 @@ function dbBackup() {
function dbRestore() { function dbRestore() {
var payload = JSON.stringify({'db-restore': true}); var payload = JSON.stringify({'db-restore': true});
sendPost(payload) sendPost(payload);
// clear button // clear button
var message = document.createElement('p'); var message = document.createElement('p');
message.innerText = 'restoring from backup'; message.innerText = 'restoring from backup';
@ -165,6 +165,17 @@ function dbRestore() {
toReplace.appendChild(message); toReplace.appendChild(message);
} }
function fsRescan() {
var payload = JSON.stringify({'fs-rescan': true});
sendPost(payload);
// clear button
var message = document.createElement('p');
message.innerText = 'File system scan in progress';
var toReplace = document.getElementById('fs-rescan');
toReplace.innerHTML = '';
toReplace.appendChild(message);
}
// player // player
function createPlayer(button) { function createPlayer(button) {
var mediaUrl = button.getAttribute('data-src'); var mediaUrl = button.getAttribute('data-src');