mirror of
https://github.com/tubearchivist/tubearchivist
synced 2024-11-02 09:41:07 +00:00
implement os.listdir sanitizer for hidden files, #30
This commit is contained in:
parent
2de99d7e37
commit
11067094b2
@ -18,6 +18,7 @@ from home.src.helper import (
|
||||
DurationConverter,
|
||||
RedisQueue,
|
||||
clean_string,
|
||||
ignore_filelist,
|
||||
set_message,
|
||||
)
|
||||
from home.src.index import YoutubeChannel, index_new_video
|
||||
@ -219,11 +220,13 @@ class PendingList:
|
||||
|
||||
def get_all_downloaded(self):
|
||||
"""get a list of all videos in archive"""
|
||||
all_channel_folders = os.listdir(self.VIDEOS)
|
||||
channel_folders = os.listdir(self.VIDEOS)
|
||||
all_channel_folders = ignore_filelist(channel_folders)
|
||||
all_downloaded = []
|
||||
for channel_folder in all_channel_folders:
|
||||
channel_path = os.path.join(self.VIDEOS, channel_folder)
|
||||
all_videos = os.listdir(channel_path)
|
||||
videos = os.listdir(channel_path)
|
||||
all_videos = ignore_filelist(videos)
|
||||
youtube_vids = [i[9:20] for i in all_videos]
|
||||
for youtube_id in youtube_vids:
|
||||
all_downloaded.append(youtube_id)
|
||||
@ -506,7 +509,8 @@ class VideoDownloader:
|
||||
|
||||
# check if already in cache to continue from there
|
||||
cache_dir = self.config["application"]["cache_dir"]
|
||||
all_cached = os.listdir(cache_dir + "/download/")
|
||||
cached = os.listdir(cache_dir + "/download/")
|
||||
all_cached = ignore_filelist(cached)
|
||||
for file_name in all_cached:
|
||||
if youtube_id in file_name:
|
||||
obs["outtmpl"] = cache_dir + "/download/" + file_name
|
||||
@ -531,7 +535,9 @@ class VideoDownloader:
|
||||
os.makedirs(new_folder, exist_ok=True)
|
||||
# find real filename
|
||||
cache_dir = self.config["application"]["cache_dir"]
|
||||
for file_str in os.listdir(cache_dir + "/download"):
|
||||
cached = os.listdir(cache_dir + "/download/")
|
||||
all_cached = ignore_filelist(cached)
|
||||
for file_str in all_cached:
|
||||
if youtube_id in file_str:
|
||||
old_file = file_str
|
||||
old_file_path = os.path.join(cache_dir, "download", old_file)
|
||||
|
@ -40,6 +40,19 @@ def clean_string(file_name):
|
||||
return cleaned
|
||||
|
||||
|
||||
def ignore_filelist(filelist):
|
||||
"""ignore temp files for os.listdir sanitizer"""
|
||||
to_ignore = ["Icon\r\r", "Temporary Items", "Network Trash Folder"]
|
||||
cleaned = []
|
||||
for file_name in filelist:
|
||||
if file_name.startswith(".") or file_name in to_ignore:
|
||||
continue
|
||||
|
||||
cleaned.append(file_name)
|
||||
|
||||
return cleaned
|
||||
|
||||
|
||||
def process_url_list(url_str):
|
||||
"""parse url_list to find valid youtube video or channel ids"""
|
||||
to_replace = ["watch?v=", "playlist?list="]
|
||||
@ -118,7 +131,8 @@ def monitor_cache_dir(cache_dir):
|
||||
look at download cache dir directly as alternative progress info
|
||||
"""
|
||||
dl_cache = os.path.join(cache_dir, "download")
|
||||
cache_file = os.listdir(dl_cache)
|
||||
all_cache_file = os.listdir(dl_cache)
|
||||
cache_file = ignore_filelist(all_cache_file)
|
||||
if cache_file:
|
||||
filename = cache_file[0][12:].replace("_", " ").split(".")[0]
|
||||
mess_dict = {
|
||||
|
@ -13,6 +13,7 @@ from datetime import datetime
|
||||
|
||||
import requests
|
||||
from home.src.config import AppConfig
|
||||
from home.src.helper import ignore_filelist
|
||||
|
||||
# expected mapping and settings
|
||||
INDEX_CONFIG = [
|
||||
@ -433,9 +434,11 @@ class ElasticBackup:
|
||||
"""extract backup zip and return filelist"""
|
||||
cache_dir = self.config["application"]["cache_dir"]
|
||||
backup_dir = os.path.join(cache_dir, "backup")
|
||||
backup_files = os.listdir(backup_dir)
|
||||
all_backup_files = ignore_filelist(backup_files)
|
||||
all_available_backups = [
|
||||
i
|
||||
for i in os.listdir(backup_dir)
|
||||
for i in all_backup_files
|
||||
if i.startswith("ta_") and i.endswith(".zip")
|
||||
]
|
||||
all_available_backups.sort()
|
||||
|
@ -21,6 +21,7 @@ from home.src.helper import (
|
||||
clean_string,
|
||||
get_message,
|
||||
get_total_hits,
|
||||
ignore_filelist,
|
||||
set_message,
|
||||
)
|
||||
from home.src.index import YoutubeChannel, YoutubeVideo, index_new_video
|
||||
@ -209,12 +210,15 @@ class FilesystemScanner:
|
||||
|
||||
def get_all_downloaded(self):
|
||||
"""get a list of all video files downloaded"""
|
||||
all_channels = os.listdir(self.VIDEOS)
|
||||
channels = os.listdir(self.VIDEOS)
|
||||
all_channels = ignore_filelist(channels)
|
||||
all_channels.sort()
|
||||
all_downloaded = []
|
||||
for channel_name in all_channels:
|
||||
channel_path = os.path.join(self.VIDEOS, channel_name)
|
||||
for video in os.listdir(channel_path):
|
||||
videos = os.listdir(channel_path)
|
||||
all_videos = ignore_filelist(videos)
|
||||
for video in all_videos:
|
||||
youtube_id = video[9:20]
|
||||
all_downloaded.append((channel_name, video, youtube_id))
|
||||
|
||||
@ -339,8 +343,8 @@ class ManualImport:
|
||||
|
||||
def import_folder_parser(self):
|
||||
"""detect files in import folder"""
|
||||
|
||||
to_import = os.listdir(self.IMPORT_DIR)
|
||||
import_files = os.listdir(self.IMPORT_DIR)
|
||||
to_import = ignore_filelist(import_files)
|
||||
to_import.sort()
|
||||
video_files = [i for i in to_import if not i.endswith(".json")]
|
||||
|
||||
|
@ -13,6 +13,7 @@ from datetime import datetime
|
||||
|
||||
import requests
|
||||
from home.src.config import AppConfig
|
||||
from home.src.helper import ignore_filelist
|
||||
from PIL import Image
|
||||
|
||||
|
||||
@ -105,7 +106,8 @@ class SearchHandler:
|
||||
def cache_dl_vids(self, all_videos):
|
||||
"""video thumbs links for cache"""
|
||||
vid_cache = os.path.join(self.CACHE_DIR, "videos")
|
||||
all_vid_cached = os.listdir(vid_cache)
|
||||
vid_cached = os.listdir(vid_cache)
|
||||
all_vid_cached = ignore_filelist(vid_cached)
|
||||
# videos
|
||||
for video_dict in all_videos:
|
||||
youtube_id = video_dict["youtube_id"]
|
||||
@ -124,7 +126,8 @@ class SearchHandler:
|
||||
def cache_dl_chan(self, all_channels):
|
||||
"""download channel thumbs"""
|
||||
chan_cache = os.path.join(self.CACHE_DIR, "channels")
|
||||
all_chan_cached = os.listdir(chan_cache)
|
||||
chan_cached = os.listdir(chan_cache)
|
||||
all_chan_cached = ignore_filelist(chan_cached)
|
||||
for channel_dict in all_channels:
|
||||
channel_id_cache = channel_dict["channel_id"]
|
||||
channel_banner_url = channel_dict["chan_banner"]
|
||||
|
Loading…
Reference in New Issue
Block a user