mirror of
https://github.com/namecoin/ncdns
synced 2024-11-16 00:13:01 +00:00
ncdumpzone: Add mode for a URL list.
This can be used as input for a YaCy crawl job.
This commit is contained in:
parent
3b7ffcbc45
commit
39fbbc0ec5
@ -11,6 +11,7 @@ import (
|
||||
extratypes "github.com/hlandau/ncbtcjsontypes"
|
||||
"github.com/namecoin/ncdns/namecoin"
|
||||
"github.com/namecoin/ncdns/ncdomain"
|
||||
"github.com/namecoin/ncdns/rrtourl"
|
||||
"github.com/namecoin/ncdns/tlsoverridefirefox"
|
||||
"github.com/namecoin/ncdns/util"
|
||||
)
|
||||
@ -29,6 +30,12 @@ func dumpRR(rr dns.RR, dest io.Writer, format string) error {
|
||||
return err
|
||||
}
|
||||
fmt.Fprint(dest, result)
|
||||
case "url-list":
|
||||
result, err := rrtourl.URLsFromRR(rr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Fprint(dest, result)
|
||||
}
|
||||
|
||||
return nil
|
||||
@ -77,7 +84,8 @@ func dumpName(item *extratypes.NameFilterItem, conn namecoin.Conn,
|
||||
// Dump extracts all domain names from conn, formats them according to the
|
||||
// specified format, and writes the result to dest.
|
||||
func Dump(conn namecoin.Conn, dest io.Writer, format string) error {
|
||||
if format != "zonefile" && format != "firefox-override" {
|
||||
if format != "zonefile" && format != "firefox-override" &&
|
||||
format != "url-list" {
|
||||
return fmt.Errorf("Invalid \"format\" argument: %s", format)
|
||||
}
|
||||
|
||||
|
@ -23,7 +23,8 @@ var (
|
||||
"Namecoin RPC password")
|
||||
formatFlag = cflag.String(flagGroup, "format", "zonefile", "Output "+
|
||||
"format. \"zonefile\" = DNS zone file. "+
|
||||
"\"firefox-override\" = Firefox cert_override.txt format.")
|
||||
"\"firefox-override\" = Firefox cert_override.txt format. "+
|
||||
"\"url-list\" = URL list.")
|
||||
)
|
||||
|
||||
var conn namecoin.Conn
|
||||
|
41
rrtourl/rrtourl.go
Normal file
41
rrtourl/rrtourl.go
Normal file
@ -0,0 +1,41 @@
|
||||
package rrtourl
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/miekg/dns"
|
||||
"github.com/namecoin/ncdns/util"
|
||||
)
|
||||
|
||||
// URLsFromRR returns a list of URL's derived from rr, which is suitable for
|
||||
// passing to a search engine crawler like YaCy. If no such list can be
|
||||
// derived, returns an empty string.
|
||||
func URLsFromRR(rr dns.RR) (string, error) {
|
||||
header := rr.Header()
|
||||
if header == nil {
|
||||
return "", fmt.Errorf("Nil RR header")
|
||||
}
|
||||
|
||||
hostFQDN := header.Name
|
||||
|
||||
// Remove things like "_443._tcp" in TLSA records
|
||||
for strings.HasPrefix(hostFQDN, "_") {
|
||||
_, hostFQDN = util.SplitDomainTail(hostFQDN)
|
||||
}
|
||||
|
||||
// Remove the trailing period from FQDN's
|
||||
host := strings.TrimSuffix(hostFQDN, ".")
|
||||
|
||||
// Remove wildcard subdomains (later we assume that they might be "www.")
|
||||
host = strings.TrimPrefix(host, "*.")
|
||||
|
||||
return "http://" + host + "/" + "\n" +
|
||||
"http://www." + host + "/" + "\n" +
|
||||
"https://" + host + "/" + "\n" +
|
||||
"https://www." + host + "/" + "\n" +
|
||||
"ftp://" + host + "/" + "\n" +
|
||||
"ftp://www." + host + "/" + "\n" +
|
||||
"ftps://" + host + "/" + "\n" +
|
||||
"ftps://www." + host + "/" + "\n", nil
|
||||
}
|
Loading…
Reference in New Issue
Block a user