Update scrape-internet-radio-manual.sh

2 years ago · 5f2cb9efc5
parent 64a016cee8
commit 5f2cb9efc5
1 changed files with 3 additions and 3 deletions
--- a/stuff/scrape-internet-radio-manual.sh
+++ b/stuff/scrape-internet-radio-manual.sh
@ -7,10 +7,10 @@ lynx --dump --listonly --nonumbers https://www.internet-radio.com/stations/ | gr
 cat links.txt | sed 's!https://www.internet-radio.com/stations/!!' | sed 's/\///g' | sed '/^$/d' | sed -e 's/ /%20/g' | sort | uniq > links2.txt

 # scrape links of the streams
-for i in "" page{2..10} ; do for j in $(cat links2.txt) ; do curl -s https://www.internet-radio.com/stations/$j/$i.html | htmlq --attribute href a | grep '.m3u' | cut -b 37- | awk -F '\\listen' '{print $1""}' | awk -F '\\.m3u' '{print $1""}' | awk -F '\\&t=' '{print $1""}' | awk '!seen[$0]++' | sed '/^$/d' | awk 'length>10' >> $j.txt ; echo "$j - $i scraped" ; done ; done
+for i in "" page{2..16} ; do for j in $(cat links2.txt) ; do curl -s https://www.internet-radio.com/stations/$j/$i.html | htmlq --attribute href a | grep '.m3u' | cut -b 37- | awk -F '\\listen' '{print $1""}' | awk -F '\\.m3u' '{print $1""}' | awk -F '\\&t=' '{print $1""}' | awk '!seen[$0]++' | sed '/^$/d' | awk 'length>10' >> $j.txt ; echo "$j - $i scraped" ; done ; done

-# a few links have more than 10 pages, the longest page is pop with 50 pages, so if you abseloutly need all of them you can do those with a longer loop
-# here is the list of the bigger links = Country Talk 80s Oldies Dance Gospel Christian Rock Pop
+# a few links have more than 16 pages, the longest page is pop with 50 pages, so if you need all of them you have to run another for loop for them
+# these streams need to be scraped for 34 pages or less furthur totaling of 50 pages = "Glam Rock" "Rock" "Classic" "Rock" "Pop"

 # convert links to m3u stream files
 for i in $(cat links2.txt) ; do sed "s/^/#EXTINF:-1\n/" $i.txt | sed '1s/^/#EXTM3U\n/' > $i.m3u ; done