#!/bin/bash

# scrape the links from internet radio
#lynx --dump --listonly --nonumbers https://onlineradiobox.com/genres/ | grep "https://onlineradiobox.com/genre" | grep -v "genres" | sort | uniq > links.txt
curl -s https://onlineradiobox.com/genres/ | htmlq -a href a | grep "genre" | grep -v "genres" | sort | uniq | awk -F "/" '{print $3}' > genres.txt

# strip unnessery part of links (we'll add them later in the for loop) 
#cat links.txt | rev | cut -c2- | rev | cut -c34- > pages.txt

# scrape links of the streams
#for i in $(cat pages.txt) ; do for j in "" \?p={1..200} ; do curl https://onlineradiobox.com/genre/$i/$j | grep -oP 'stream="\K[^"]+' | grep -v "playerservices\|.m3u\|onlineradiobox" | sed 's/\;//g' | awk '!seen[$0]++' | sed '/^$/d' | awk 'length>10' >> $i.txt ; echo "$i - $j scraped" ; done ; done
for i in $(cat genres.txt) ; do curl -s https://onlineradiobox.com/genre/$i/ | htmlq .stations-list button '.station_play, .b-play' | grep -oP 'radioname="\K[^"]+|stream="\K[^"]+' | sed 's/\;//g' >> A-$i.txt ; echo -e "$i" ; done 
for i in $(cat genres.txt) ; do for j in \?p={1..200} ; do curl -s https://onlineradiobox.com/genre/$i/$j | htmlq .stations-list button '.station_play, .b-play' | grep -oP 'radioname="\K[^"]+|stream="\K[^"]+' | sed 's/\;//g' >> A-$i.txt ; echo -e "$i - $j" ; done ; done

# convert temp files to proper format
for i in A-*.txt ; do cat $i | awk '!seen[$0]++' | awk '{print "#EXTINF:-1,"$0}' | sed 's/\;//g' | sed 's/#EXTINF:-1 , http/http/g' > A$i ; done

# convert files to m3u extension
for i in AA-*.txt ; do sed '1s/^/#EXTM3U\n/' $i > $i.m3u ; done

# remove AA and double extensions from files
for i in *.m3u ; do mv "$i" "`echo $i | sed -e 's/AA-//' -e 's/.txt//'`" ; done