Skip to content

Commit

Permalink
Add: extract-urls processor
Browse files Browse the repository at this point in the history
  • Loading branch information
abhimanyu003 committed Apr 19, 2024
1 parent 7b6f4de commit 5099bc9
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 0 deletions.
53 changes: 53 additions & 0 deletions cmd/processor_extract-url.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,5 @@ require (
golang.org/x/text v0.14.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
mvdan.cc/xurls/v2 v2.5.0 // indirect
)
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,5 @@ gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
mvdan.cc/xurls/v2 v2.5.0 h1:lyBNOm8Wo71UknhUs4QTFUNNMyxy2JEIaKKo0RWOh+8=
mvdan.cc/xurls/v2 v2.5.0/go.mod h1:yQgaGQ1rFtJUzkmKiHYSSfuQxqfYmd//X6PxvholpeE=
1 change: 1 addition & 0 deletions processors/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ var List = []list.Item{
CountWords{},
EscapeQuotes{},
ExtractEmails{},
ExtractURLs{},
ExtractIPs{},
FormatJSON{},
HexDecode{},
Expand Down
45 changes: 45 additions & 0 deletions processors/url.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package processors

import (
"fmt"
"mvdan.cc/xurls/v2"
"net/url"
"strings"
)

// URLEncode encode url string.
Expand Down Expand Up @@ -69,3 +71,46 @@ func (p URLDecode) Description() string {
func (p URLDecode) FilterValue() string {
return p.Title()
}

// ExtractURLs decode url string.
type ExtractURLs struct{}

func (p ExtractURLs) Name() string {
return "extract-url"
}

func (p ExtractURLs) Alias() []string {
return []string{"url-ext", "extract-urls", "ext-url"}
}

func (p ExtractURLs) Transform(data []byte, _ ...Flag) (string, error) {
rxRelaxed := xurls.Relaxed()
urls := rxRelaxed.FindAllString(string(data), -1)

var output string

for _, u := range urls {
output = output + u + "\n"
}

output = strings.TrimSuffix(output, "\n")

return output, nil
}

func (p ExtractURLs) Flags() []Flag {
return nil
}

func (p ExtractURLs) Title() string {
title := "Extract URLs"
return fmt.Sprintf("%s (%s)", title, p.Name())
}

func (p ExtractURLs) Description() string {
return "Extract URLs from text"
}

func (p ExtractURLs) FilterValue() string {
return p.Title()
}
79 changes: 79 additions & 0 deletions processors/url_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,85 @@ func TestURLDecode_Command(t *testing.T) {
}
}

func TestExtractURL_Command(t *testing.T) {
test := struct {
alias []string
description string
filterValue string
flags []Flag
name string
title string
}{
alias: []string{"url-ext", "extract-urls", "ext-url"},
description: "Extract URLs from text",
filterValue: "Extract URLs (extract-url)",
flags: nil,
name: "extract-url",
title: "Extract URLs (extract-url)",
}
p := ExtractURLs{}
if got := p.Alias(); !reflect.DeepEqual(got, test.alias) {
t.Errorf("Alias() = %v, want %v", got, test.alias)
}
if got := p.Description(); got != test.description {
t.Errorf("Description() = %v, want %v", got, test.description)
}
if got := p.FilterValue(); got != test.filterValue {
t.Errorf("FilterValue() = %v, want %v", got, test.filterValue)
}
if got := p.Flags(); !reflect.DeepEqual(got, test.flags) {
t.Errorf("Flags() = %v, want %v", got, test.flags)
}
if got := p.Name(); got != test.name {
t.Errorf("Name() = %v, want %v", got, test.name)
}
if got := p.Title(); got != test.title {
t.Errorf("Title() = %v, want %v", got, test.title)
}
}

func TestExtractURL_Transform(t *testing.T) {
type args struct {
data []byte
in1 []Flag
}
tests := []struct {
name string
args args
want string
wantErr bool
}{
{
name: "Should extract http://foo.com/",
args: args{data: []byte("must have scheme: http://foo.com/.")},
want: "http://foo.com/",
},
{
name: "Should extract foo.com",
args: args{data: []byte("must have scheme: foo.com/.")},
want: "foo.com/",
},
{
name: "multiple urls foo.com example.com",
args: args{data: []byte("multiple urls foo.com example.com")},
want: "foo.com\nexample.com",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
p := ExtractURLs{}
got, err := p.Transform(tt.args.data, tt.args.in1...)
if (err != nil) != tt.wantErr {
t.Errorf("Transform() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("Transform() got = %v, want %v", got, tt.want)
}
})
}
}

func TestURLDecode_Transform(t *testing.T) {
type args struct {
data []byte
Expand Down

0 comments on commit 5099bc9

Please sign in to comment.