Update scrape-internet-radio-manual.sh

pull/20/head
hossein s. borhani 2 years ago committed by GitHub
parent 64a016cee8
commit 5f2cb9efc5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -7,10 +7,10 @@ lynx --dump --listonly --nonumbers https://www.internet-radio.com/stations/ | gr
cat links.txt | sed 's!https://www.internet-radio.com/stations/!!' | sed 's/\///g' | sed '/^$/d' | sed -e 's/ /%20/g' | sort | uniq > links2.txt
# scrape links of the streams
for i in "" page{2..10} ; do for j in $(cat links2.txt) ; do curl -s https://www.internet-radio.com/stations/$j/$i.html | htmlq --attribute href a | grep '.m3u' | cut -b 37- | awk -F '\\listen' '{print $1""}' | awk -F '\\.m3u' '{print $1""}' | awk -F '\\&t=' '{print $1""}' | awk '!seen[$0]++' | sed '/^$/d' | awk 'length>10' >> $j.txt ; echo "$j - $i scraped" ; done ; done
for i in "" page{2..16} ; do for j in $(cat links2.txt) ; do curl -s https://www.internet-radio.com/stations/$j/$i.html | htmlq --attribute href a | grep '.m3u' | cut -b 37- | awk -F '\\listen' '{print $1""}' | awk -F '\\.m3u' '{print $1""}' | awk -F '\\&t=' '{print $1""}' | awk '!seen[$0]++' | sed '/^$/d' | awk 'length>10' >> $j.txt ; echo "$j - $i scraped" ; done ; done
# a few links have more than 10 pages, the longest page is pop with 50 pages, so if you abseloutly need all of them you can do those with a longer loop
# here is the list of the bigger links = Country Talk 80s Oldies Dance Gospel Christian Rock Pop
# a few links have more than 16 pages, the longest page is pop with 50 pages, so if you need all of them you have to run another for loop for them
# these streams need to be scraped for 34 pages or less furthur totaling of 50 pages = "Glam Rock" "Rock" "Classic" "Rock" "Pop"
# convert links to m3u stream files
for i in $(cat links2.txt) ; do sed "s/^/#EXTINF:-1\n/" $i.txt | sed '1s/^/#EXTM3U\n/' > $i.m3u ; done

Loading…
Cancel
Save