From 5099bc9f9178466348219defb3bb25b316bb38be Mon Sep 17 00:00:00 2001 From: Abhimanyu Sharma Date: Sat, 20 Apr 2024 04:41:13 +0530 Subject: [PATCH] Add: extract-urls processor --- cmd/processor_extract-url.go | 53 ++++++++++++++++++++++++ go.mod | 1 + go.sum | 2 + processors/processor.go | 1 + processors/url.go | 45 ++++++++++++++++++++ processors/url_test.go | 79 ++++++++++++++++++++++++++++++++++++ 6 files changed, 181 insertions(+) create mode 100644 cmd/processor_extract-url.go diff --git a/cmd/processor_extract-url.go b/cmd/processor_extract-url.go new file mode 100644 index 0000000..e5bb907 --- /dev/null +++ b/cmd/processor_extract-url.go @@ -0,0 +1,53 @@ +// Code generated by github.com/abhimanyu003/sttr/cmd/generate.go. DO NOT EDIT + +package cmd + +import ( + "fmt" + "os" + + "github.com/abhimanyu003/sttr/processors" + "github.com/abhimanyu003/sttr/utils" + "github.com/spf13/cobra" +) + +func init() { + rootCmd.AddCommand(extractUrlCmd) +} + +var extractUrlCmd = &cobra.Command{ + Use: "extract-url [string]", + Short: "Extract URLs from text", + Aliases: []string{"url-ext", "extract-urls", "ext-url"}, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + var err error + var in []byte + var out string + + if len(args) == 0 { + in = []byte(utils.ReadMultilineInput()) + } else { + if fi, err := os.Stat(args[0]); err == nil && !fi.IsDir() { + d, err := os.ReadFile(args[0]) + if err != nil { + return err + } + in = d + } else { + in = []byte(args[0]) + } + } + + flags := make([]processors.Flag, 0) + p := processors.ExtractURLs{} + + out, err = p.Transform(in, flags...) + if err != nil { + return err + } + + _, err = fmt.Fprint(os.Stdout, out) + return err + }, +} diff --git a/go.mod b/go.mod index a22ffe0..4529127 100644 --- a/go.mod +++ b/go.mod @@ -43,4 +43,5 @@ require ( golang.org/x/text v0.14.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect + mvdan.cc/xurls/v2 v2.5.0 // indirect ) diff --git a/go.sum b/go.sum index 8127216..a6a0447 100644 --- a/go.sum +++ b/go.sum @@ -106,3 +106,5 @@ gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +mvdan.cc/xurls/v2 v2.5.0 h1:lyBNOm8Wo71UknhUs4QTFUNNMyxy2JEIaKKo0RWOh+8= +mvdan.cc/xurls/v2 v2.5.0/go.mod h1:yQgaGQ1rFtJUzkmKiHYSSfuQxqfYmd//X6PxvholpeE= diff --git a/processors/processor.go b/processors/processor.go index 6b75b7f..f3b1900 100644 --- a/processors/processor.go +++ b/processors/processor.go @@ -24,6 +24,7 @@ var List = []list.Item{ CountWords{}, EscapeQuotes{}, ExtractEmails{}, + ExtractURLs{}, ExtractIPs{}, FormatJSON{}, HexDecode{}, diff --git a/processors/url.go b/processors/url.go index 1b27000..6ef8bbe 100644 --- a/processors/url.go +++ b/processors/url.go @@ -2,7 +2,9 @@ package processors import ( "fmt" + "mvdan.cc/xurls/v2" "net/url" + "strings" ) // URLEncode encode url string. @@ -69,3 +71,46 @@ func (p URLDecode) Description() string { func (p URLDecode) FilterValue() string { return p.Title() } + +// ExtractURLs decode url string. +type ExtractURLs struct{} + +func (p ExtractURLs) Name() string { + return "extract-url" +} + +func (p ExtractURLs) Alias() []string { + return []string{"url-ext", "extract-urls", "ext-url"} +} + +func (p ExtractURLs) Transform(data []byte, _ ...Flag) (string, error) { + rxRelaxed := xurls.Relaxed() + urls := rxRelaxed.FindAllString(string(data), -1) + + var output string + + for _, u := range urls { + output = output + u + "\n" + } + + output = strings.TrimSuffix(output, "\n") + + return output, nil +} + +func (p ExtractURLs) Flags() []Flag { + return nil +} + +func (p ExtractURLs) Title() string { + title := "Extract URLs" + return fmt.Sprintf("%s (%s)", title, p.Name()) +} + +func (p ExtractURLs) Description() string { + return "Extract URLs from text" +} + +func (p ExtractURLs) FilterValue() string { + return p.Title() +} diff --git a/processors/url_test.go b/processors/url_test.go index 1fc13d8..801211b 100644 --- a/processors/url_test.go +++ b/processors/url_test.go @@ -116,6 +116,85 @@ func TestURLDecode_Command(t *testing.T) { } } +func TestExtractURL_Command(t *testing.T) { + test := struct { + alias []string + description string + filterValue string + flags []Flag + name string + title string + }{ + alias: []string{"url-ext", "extract-urls", "ext-url"}, + description: "Extract URLs from text", + filterValue: "Extract URLs (extract-url)", + flags: nil, + name: "extract-url", + title: "Extract URLs (extract-url)", + } + p := ExtractURLs{} + if got := p.Alias(); !reflect.DeepEqual(got, test.alias) { + t.Errorf("Alias() = %v, want %v", got, test.alias) + } + if got := p.Description(); got != test.description { + t.Errorf("Description() = %v, want %v", got, test.description) + } + if got := p.FilterValue(); got != test.filterValue { + t.Errorf("FilterValue() = %v, want %v", got, test.filterValue) + } + if got := p.Flags(); !reflect.DeepEqual(got, test.flags) { + t.Errorf("Flags() = %v, want %v", got, test.flags) + } + if got := p.Name(); got != test.name { + t.Errorf("Name() = %v, want %v", got, test.name) + } + if got := p.Title(); got != test.title { + t.Errorf("Title() = %v, want %v", got, test.title) + } +} + +func TestExtractURL_Transform(t *testing.T) { + type args struct { + data []byte + in1 []Flag + } + tests := []struct { + name string + args args + want string + wantErr bool + }{ + { + name: "Should extract http://foo.com/", + args: args{data: []byte("must have scheme: http://foo.com/.")}, + want: "http://foo.com/", + }, + { + name: "Should extract foo.com", + args: args{data: []byte("must have scheme: foo.com/.")}, + want: "foo.com/", + }, + { + name: "multiple urls foo.com example.com", + args: args{data: []byte("multiple urls foo.com example.com")}, + want: "foo.com\nexample.com", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p := ExtractURLs{} + got, err := p.Transform(tt.args.data, tt.args.in1...) + if (err != nil) != tt.wantErr { + t.Errorf("Transform() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("Transform() got = %v, want %v", got, tt.want) + } + }) + } +} + func TestURLDecode_Transform(t *testing.T) { type args struct { data []byte