forked from ayang64/adstxt
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadstxt.go
179 lines (150 loc) · 3.9 KB
/
adstxt.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
package adstxt
import (
"bufio"
"bytes"
"context"
"fmt"
"net/http"
"strings"
)
// AdsTxt stores all of the data associated with an adx.txt file.
type AdsTxt struct {
Source string // URL of ads.txt file this represents.
Partner []Buyer
Variable map[string][]string
}
// Buyer encodes data assocated with one of the partners found in an ads.txt
// file.
type Buyer struct {
Domain string
PublisherID string
AccountType string
CertificationAuthority string
}
// Attempt to split a comma separated buyer record.
func (a *AdsTxt) parseBuyerRecord(line string) error {
// if we made it here, we should look for a comma separated list.
col := strings.Split(line, ",")
if len(col) != 3 && len(col) != 4 {
// Something is very wrong here.
return fmt.Errorf("could not extract buyer records")
}
if len(col) == 3 {
// add an empty optional certificate authority field if it wasn't specified.
col = append(col, "")
}
a.Partner = append(a.Partner,
Buyer{
Domain: strings.TrimSpace(col[0]),
PublisherID: strings.TrimSpace(col[1]),
AccountType: strings.TrimSpace(col[2]),
CertificationAuthority: strings.TrimSpace(col[3]),
})
return nil
}
// only two variables are supported: CONTACT and SUBDOMAIN. lets check for
// those.
//
// i'm not sure how to write a generic parser for the variabels because the
// spec is ambiguous when it comes to if '=' can appear within the comma
// separated buyer record. it is possible to confuse a valid buyer with
// a variable.
//
// so instead i only look for "CONTACT=" and "SUBDOMAINS=".
//
// naughty spec!
//
func (a *AdsTxt) parseVariable(line string) error {
for _, v := range []string{"CONTACT", "SUBDOMAIN"} {
tok := v + "="
if len(line) >= len(tok) && strings.ToUpper(line[:len(tok)]) == tok {
val := line[len(tok):]
a.Variable[v] = append(a.Variable[v], val)
return nil
}
}
return fmt.Errorf("line does not conain a variable")
}
// Parse parses the supplied adx.txt data and returns a packed AdsTxt
// structure.
func Parse(srcurl, txt string) (AdsTxt, error) {
if txt == "" {
return rc, fmt.Errorf("given an empty string; nothing to parse")
}
rc := AdsTxt{
Source: srcurl,
Variable: make(map[string][]string),
}
// create a scanner that reads line by line.
for scanner := bufio.NewScanner(strings.NewReader(txt)); scanner.Scan(); {
line := strings.TrimSpace(scanner.Text())
// record is blank.
if line == "" {
continue
}
// record is a comment.
if line[0] == '#' {
continue
}
if rc.parseVariable(line) == nil {
continue
}
// at this point there is no need to check the result of
// parseBuyerRecord(). if it doesn't work then we have an invalid record
// and the loop will continue anyway.
rc.parseBuyerRecord(line)
}
return rc, nil
}
type fetchresult struct {
URL string
Contents string
}
func fetch(url string, rc chan<- AdsTxt) {
resp, err := http.Get(url)
if err != nil {
rc <- AdsTxt{Source: url}
return
}
defer resp.Body.Close()
if err != nil {
return
}
var buf bytes.Buffer
buf.ReadFrom(resp.Body)
a, _ := Parse(url, buf.String())
rc <- a
}
// Fetch downloads and parses ads.txt files at each of the supplied URLs.
func Fetch(ctx context.Context, urls ...string) ([]AdsTxt, error) {
if len(urls) == 0 {
return nil, fmt.Errorf("no URLs supplied; nothing to do")
}
ads := make(chan AdsTxt)
go func() {
// make sure we close we our ads channel on return.
defer close(ads)
results := make(chan AdsTxt)
// dispatch web queries.
go func() {
for _, url := range urls {
go fetch(url, results)
}
}()
// wait for respnses from each of the servers.
for i := 0; i < len(urls); i++ {
select {
case r := <-results:
ads <- r
case <-ctx.Done():
// deadline reached.
return
}
}
}()
var rc []AdsTxt
for a := range ads {
rc = append(rc, a)
}
return rc, nil
}