use new IndexPaginate class for get_all_indexed videos

This commit is contained in:
simon 2021-11-18 12:16:21 +07:00
parent 509b0097fe
commit f371a03cc7
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
4 changed files with 14 additions and 47 deletions

View File

@ -191,47 +191,16 @@ class PendingList:
return all_pending, all_ignore return all_pending, all_ignore
def get_all_indexed(self): @staticmethod
def get_all_indexed():
"""get a list of all videos indexed""" """get a list of all videos indexed"""
headers = {"Content-type": "application/json"}
# get PIT ID
url = self.ES_URL + "/ta_video/_pit?keep_alive=1m"
response = requests.post(url, auth=self.ES_AUTH)
json_data = json.loads(response.text)
pit_id = json_data["id"]
# query
data = { data = {
"size": 500,
"query": {"match_all": {}}, "query": {"match_all": {}},
"pit": {"id": pit_id, "keep_alive": "1m"},
"sort": [{"published": {"order": "desc"}}], "sort": [{"published": {"order": "desc"}}],
} }
query_str = json.dumps(data) all_indexed = IndexPaginate("ta_video", data).get_results()
url = self.ES_URL + "/_search"
all_indexed = []
while True:
response = requests.get(
url, data=query_str, headers=headers, auth=self.ES_AUTH
)
json_data = json.loads(response.text)
all_hits = json_data["hits"]["hits"]
if all_hits:
for hit in all_hits:
all_indexed.append(hit)
search_after = hit["sort"]
# update search_after with last hit data
data["search_after"] = search_after
query_str = json.dumps(data)
else:
break
# clean up PIT
query_str = json.dumps({"id": pit_id})
requests.delete(
self.ES_URL + "/_pit",
data=query_str,
headers=headers,
auth=self.ES_AUTH,
)
return all_indexed return all_indexed
def get_all_downloaded(self): def get_all_downloaded(self):

View File

@ -249,10 +249,10 @@ class FilesystemScanner:
all_indexed_raw = index_handler.get_all_indexed() all_indexed_raw = index_handler.get_all_indexed()
all_indexed = [] all_indexed = []
for video in all_indexed_raw: for video in all_indexed_raw:
youtube_id = video["_id"] youtube_id = video["youtube_id"]
media_url = video["_source"]["media_url"] media_url = video["media_url"]
published = video["_source"]["published"] published = video["published"]
title = video["_source"]["title"] title = video["title"]
all_indexed.append((youtube_id, media_url, published, title)) all_indexed.append((youtube_id, media_url, published, title))
return all_indexed return all_indexed

View File

@ -60,8 +60,8 @@ class ThumbManager:
needed_thumbs = [] needed_thumbs = []
for video in all_indexed: for video in all_indexed:
youtube_id = video["_source"]["youtube_id"] youtube_id = video["youtube_id"]
thumb_url = video["_source"]["vid_thumb_url"] thumb_url = video["vid_thumb_url"]
if missing_only: if missing_only:
if youtube_id + ".jpg" not in all_thumbs: if youtube_id + ".jpg" not in all_thumbs:
needed_thumbs.append((youtube_id, thumb_url)) needed_thumbs.append((youtube_id, thumb_url))
@ -277,10 +277,8 @@ class ThumbManager:
all_indexed = download.PendingList().get_all_indexed() all_indexed = download.PendingList().get_all_indexed()
video_list = [] video_list = []
for video in all_indexed: for video in all_indexed:
youtube_id = video["_source"]["youtube_id"] youtube_id = video["youtube_id"]
media_url = os.path.join( media_url = os.path.join(self.MEDIA_DIR, video["media_url"])
self.MEDIA_DIR, video["_source"]["media_url"]
)
thumb_path = os.path.join( thumb_path = os.path.join(
self.CACHE_DIR, self.vid_thumb_path(youtube_id) self.CACHE_DIR, self.vid_thumb_path(youtube_id)
) )

View File

@ -209,7 +209,7 @@ def index_channel_playlists(channel_id):
all_playlists = channel_handler.get_all_playlists() all_playlists = channel_handler.get_all_playlists()
all_indexed = PendingList().get_all_indexed() all_indexed = PendingList().get_all_indexed()
all_youtube_ids = [i["_source"]["youtube_id"] for i in all_indexed] all_youtube_ids = [i["youtube_id"] for i in all_indexed]
for playlist_id, playlist_title in all_playlists: for playlist_id, playlist_title in all_playlists:
print("add playlist: " + playlist_title) print("add playlist: " + playlist_title)