From fb78e1d9ae1aeb519504dfc55b848bfe518b43fe Mon Sep 17 00:00:00 2001 From: "U-DESKTOP-3VIS1T9\\junguler" Date: Sun, 12 Feb 2023 04:34:43 +0330 Subject: [PATCH] add scrape-radio_guide.fm-manual script --- stuff/scrape-radio_guide.fm-manual.sh | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100755 stuff/scrape-radio_guide.fm-manual.sh diff --git a/stuff/scrape-radio_guide.fm-manual.sh b/stuff/scrape-radio_guide.fm-manual.sh new file mode 100755 index 000000000..bef31034f --- /dev/null +++ b/stuff/scrape-radio_guide.fm-manual.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# scrape the links +curl https://www.radioguide.fm/genre | htmlq -a href a | grep "/search/index?genre=" | sort | uniq | cut -c21- > genres.txt + +# get the links for the webpages +for i in "" \&page={2..15} ; do for j in $(cat genres.txt) ; do curl -s "https://www.radioguide.fm/search/index?genre=$j$i&limit=250" | ./htmlq -a href a | awk '!seen[$0]++' | grep "/internet-radio-" | cut -c2- | grep "/" | sed 's/\;//g' | sed '/^$/d' >> A-$j.txt ; echo -e "$j - $i" ; done ; done + +# remove genres.txt +rm genres.txt + +# scrape the links from each text file to a m3u output +for i in A-*.txt ; do for j in $(cat $i) ; do curl -s https://www.radioguide.fm/$j | grep "var stream" | cut -c52- | rev | cut -c5- | rev | sed 's/^[ \t]*//' | sed 's/\;//g' | sed '/^$/d' >> A$i ; echo -e "$i - $j" ; done ; done + +# convert links to m3u streams +for i in AA-*.txt ; do sed "s/^/#EXTINF:-1\n/" $i | sed '1s/^/#EXTM3U\n/' > $i.m3u ; done + +# remove AA- and double extensions in streams +for i in *.m3u ; do mv "$i" "`echo $i | sed -e 's/AA-//' -e 's/.txt//'`" ; done + +# move stream to git folder +mv *.m3u c:/git/m3u-radio-music-playlists/radio_guide.fm/ + +# add, commit and push +git -C c:/git/m3u-radio-music-playlists/ add . +git -C c:/git/m3u-radio-music-playlists/ commit -m "`date +'%b/%d - %I:%M %p'`" +git -C c:/git/m3u-radio-music-playlists/ push