diff --git a/endpoints/events/test/ow_failed.txt b/endpoints/events/test/base64_quoted_vast.txt similarity index 100% rename from endpoints/events/test/ow_failed.txt rename to endpoints/events/test/base64_quoted_vast.txt diff --git a/endpoints/events/test/base64_vast.txt b/endpoints/events/test/base64_vast.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/endpoints/events/test/raw_vast.txt b/endpoints/events/test/raw_vast.txt index 2bd4a7e0d79..f245a2fdd0a 100644 --- a/endpoints/events/test/raw_vast.txt +++ b/endpoints/events/test/raw_vast.txt @@ -98,4 +98,5 @@ creative_url RhythmXchange Adserver ebdr adnxs -adnxs \ No newline at end of file +adnxs +Yahoo Ad Manager Plus [ini:PDC][fmt:Video][crs:3682][csz:15s] VAST 2.0 Linear Ad00:00:15 \ No newline at end of file diff --git a/endpoints/events/vtrack_ow.go b/endpoints/events/vtrack_ow.go index 28b35eb8b0c..abcd86bac4e 100644 --- a/endpoints/events/vtrack_ow.go +++ b/endpoints/events/vtrack_ow.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/base64" "errors" + "regexp" "strings" "time" @@ -16,6 +17,7 @@ import ( var ( errEventURLNotConfigured = errors.New("event urls not configured") + tmpWSRemoverRegex = regexp.MustCompile(`>\s+<`) ) // InjectVideoEventTrackers injects the video tracking events @@ -52,16 +54,18 @@ func InjectVideoEventTrackers( etreeParserTime := time.Since(_startTime) if fastXMLExperiment && err == nil { + etreeXMLResponse := response + _startTime = time.Now() fastXMLResponse, _ := injectVideoEventsFastXML(vastXML, eventURLMap, nurlPresent, imp.Video.Linearity) fastXMLParserTime := time.Since(_startTime) //temporary if fastXMLResponse != vastXML { - fastXMLResponse = tmpFastXMLProcessing(fastXMLResponse) + fastXMLResponse, etreeXMLResponse = tmpFastXMLProcessing(fastXMLResponse, response) } - isResponseMismatch := (response != fastXMLResponse) + isResponseMismatch := (etreeXMLResponse != fastXMLResponse) if isResponseMismatch { openrtb_ext.FastXMLLogf("\n[XML_PARSER_TEST] method:[vcr] creative:[%s]", base64.StdEncoding.EncodeToString([]byte(vastXML))) @@ -77,17 +81,6 @@ func InjectVideoEventTrackers( return response, metrics, err } -func tmpFastXMLProcessing(vast string) string { - //replace only if trackers are injected - vast = strings.ReplaceAll(vast, " >", ">") - // if strings.Contains(vast, "'") { - // if index := strings.Index(vast, "<") //step2: remove inbetween whitespaces + fastXML = strings.ReplaceAll(fastXML, " ><", "><") //step3: remove attribute endtag whitespace (this should be always before step2) + fastXML = strings.ReplaceAll(fastXML, "'", "\"") //step4: convert single quote to double quote + + etreeXML = tmpWSRemoverRegex.ReplaceAllString(etreeXML, "><") //step2: remove inbetween whitespaces + etreeXML = strings.ReplaceAll(etreeXML, " ><", "><") //step3: remove attribute endtag whitespace (this should be always before step2) + etreeXML = strings.ReplaceAll(etreeXML, "'", "\"") + return fastXML, etreeXML +} diff --git a/endpoints/events/vtrack_ow_test.go b/endpoints/events/vtrack_ow_test.go index afa7d95e0ec..d984c568045 100644 --- a/endpoints/events/vtrack_ow_test.go +++ b/endpoints/events/vtrack_ow_test.go @@ -10,12 +10,36 @@ import ( "strings" "testing" + "github.com/beevik/etree" "github.com/prebid/openrtb/v20/adcom1" "github.com/prebid/openrtb/v20/openrtb2" "github.com/prebid/prebid-server/v2/openrtb_ext" "github.com/stretchr/testify/assert" ) +func search(arr []int, value int) bool { + idx := sort.SearchInts(arr, value) + return idx < len(arr) && arr[idx] == value +} + +func quoteUnescape[T []byte | string](s T) string { + buf := bytes.Buffer{} + for i := 0; i < len(s); i++ { + ch := s[i] + if ch == '\\' { + if i+1 < len(s) { + nextCh := s[i+1] + if nextCh == '\\' || nextCh == '"' || nextCh == '\'' { + i++ + ch = nextCh + } + } + } + buf.WriteByte(ch) + } + return buf.String() +} + func TestInjectVideoEventTrackers(t *testing.T) { type args struct { externalURL string @@ -431,115 +455,119 @@ func TestInjectVideoEventTrackers(t *testing.T) { } } -func quoteUnescape[T []byte | string](s T) string { - buf := bytes.Buffer{} - for i := 0; i < len(s); i++ { - ch := s[i] - if ch == '\\' { - if i+1 < len(s) { - nextCh := s[i+1] - if nextCh == '\\' || nextCh == '"' || nextCh == '\'' { - i++ - ch = nextCh - } - } - } - buf.WriteByte(ch) +func TestETreeBehaviour(t *testing.T) { + // vast1 := `Appreciate00:00:30` + tests := []struct { + name string + in string + out string + }{ + { + name: "test", + in: " [ini:PDC][fmt:Video][crs:3682][csz:15s] ", + out: "", + }} + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + doc := etree.NewDocument() + doc.WriteSettings.CanonicalEndTags = true + + err := doc.ReadFromString(tt.in) + assert.Nil(t, err) + + out, err := doc.WriteToString() + assert.Nil(t, err) + assert.Equal(t, tt.out, out) + }) } - return buf.String() } func TestCompareXMLParsers(t *testing.T) { - fileName := `./test/raw_vast.txt` - //fileName = `../../base64_vast.txt` + //$ cat *-prod.txt | sed -n 's/.*creative:\[\(.*\)\].*/\1/p' > $GOPATH/src/github.com/PubMatic-OpenWrap/prebid-server/endpoints/events/test/base64_vast.txt + type stats struct { + valid []int + generalMismatch []int + singleQuote []int + } - base64Decode := strings.Contains(fileName, "base64") + var ( + //fileName = `./test/base64_vast.txt` + //fileName = `./test/base64_quoted_vast.txt` + fileName = `./test/raw_vast.txt` + quoted = strings.Contains(fileName, "quoted") //xml files retrived from prod vast unwrapper + base64Decode = strings.Contains(fileName, "base64") + debugLines = []int{} + st = stats{} + currentLine, xmlCount = 0, 0 + ) file, err := os.Open(fileName) if !assert.Nil(t, err) { return } - defer file.Close() - var mismatched, debugLines []int - line := 0 + scanner := bufio.NewScanner(file) scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) - - //debugLines = []int{19, 24, 25, 29, 58, 80, 83, 84, 86, 93, 128, 147, 151, 155, 159, 168, 184, 190, 199, 200, 225, 226, 243, 249, 254, 261, 272, 281, 291, 298, 310, 312, 320, 323, 328, 340, 350, 358, 362, 373, 376, 384} sort.Ints(debugLines) for scanner.Scan() { - line++ + currentLine++ vast := scanner.Text() - if len(debugLines) > 0 && sort.SearchInts(debugLines, line) == len(debugLines) { - continue - } - if base64Decode { - data, err := base64.StdEncoding.DecodeString(vast) - if !assert.Nil(t, err) { - continue + //presetup + { + //debug + if len(debugLines) > 0 { + if found := search(debugLines, currentLine); !found { + continue + } + } + + //base64decode + if base64Decode { + data, err := base64.StdEncoding.DecodeString(vast) + if !assert.Nil(t, err) { + continue + } + vast = string(data) + if quoted { + vast = quoteUnescape(data) + } } - vast = quoteUnescape(data) } - t.Run(fmt.Sprintf("vast_%d", line), func(t *testing.T) { + + t.Run(fmt.Sprintf("vast_%d", currentLine), func(t *testing.T) { + xmlCount++ + etreeXML, _ := injectVideoEventsETree(vast, eventURLMap, false, adcom1.LinearityLinear) fastXML, _ := injectVideoEventsFastXML(vast, eventURLMap, false, adcom1.LinearityLinear) + if vast != fastXML { - fastXML = tmpFastXMLProcessing(fastXML) + fastXML, etreeXML = tmpFastXMLProcessing(fastXML, etreeXML) } - if !assert.Equal(t, etreeXML, fastXML) { - mismatched = append(mismatched, line) + if len(debugLines) > 0 { + assert.Equal(t, etreeXML, fastXML, vast) } - }) - } - t.Logf("\ntotal:[%v] mismatched:[%v] lines:[%v]", line, len(mismatched), mismatched) - assert.Equal(t, 0, len(mismatched)) - assert.Nil(t, scanner.Err()) -} - -func TestBase64(t *testing.T) { - fileName := `./test/ow_failed.txt` - file, err := os.Open(fileName) - if !assert.Nil(t, err) { - return + if etreeXML != fastXML { + if idx := strings.Index(etreeXML, "'"); idx != -1 && + (strings.HasPrefix(fastXML[idx:], "'") || strings.HasPrefix(fastXML[idx:], "\"")) { + st.singleQuote = append(st.singleQuote, currentLine) + } else { + st.generalMismatch = append(st.generalMismatch, currentLine) + } + return + } + st.valid = append(st.valid, currentLine) + }) } - defer file.Close() - var mismatched, errored, debugLines []int - var line, singleQuotePresent, maxLength int - - maxLength = 14884 - scanner := bufio.NewScanner(file) - scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) - - //debugLines = []int{19, 24, 25, 29, 58, 80, 83, 84, 86, 93, 128, 147, 151, 155, 159, 168, 184, 190, 199, 200, 225, 226, 243, 249, 254, 261, 272, 281, 291, 298, 310, 312, 320, 323, 328, 340, 350, 358, 362, 373, 376, 384} - sort.Ints(debugLines) - - for scanner.Scan() { - line++ - value := scanner.Text() - - if len(debugLines) > 0 && sort.SearchInts(debugLines, line) == len(debugLines) { - continue - } - - vast, err := base64.RawStdEncoding.DecodeString(value[0:maxLength]) - - if err != nil { - errored = append(errored, line) - continue - } - - if bytes.Contains(vast, []byte("'")) { - singleQuotePresent++ - } else { - mismatched = append(mismatched, line) - } - } - assert.Empty(t, mismatched) - assert.Empty(t, errored) + t.Logf("\nTotal:[%v] validCount:[%v] generalMismatch:[%v] singleQuote:[%v]", xmlCount, st.valid, st.generalMismatch, st.singleQuote) + assert.NotZero(t, xmlCount) + assert.Equal(t, xmlCount, len(st.valid), "validXMLCount") + assert.Equal(t, 0, len(st.generalMismatch), "generalMismatch") + assert.Equal(t, 0, len(st.singleQuote), "singleQuote") + assert.Nil(t, scanner.Err()) } diff --git a/go.mod b/go.mod index fdf70090dc8..95df10bfc77 100644 --- a/go.mod +++ b/go.mod @@ -51,7 +51,8 @@ require ( git.pubmatic.com/PubMatic/go-common v0.0.0-20240313090142-97ff3d63b7c3 git.pubmatic.com/PubMatic/go-netacuity-client v0.0.0-20240104092757-5d6f15e25fe3 git.pubmatic.com/vastunwrap v0.0.0-00010101000000-000000000000 - github.com/PubMatic-OpenWrap/fastxml v0.0.0-20240826060652-d9d5d05fdad2 + github.com/PubMatic-OpenWrap/fastxml v0.0.0-20241125102315-0d8f851a6e52 + github.com/beevik/etree/110 v0.0.0-00010101000000-000000000000 github.com/diegoholiveira/jsonlogic/v3 v3.5.3 github.com/go-sql-driver/mysql v1.7.1 github.com/golang/mock v1.6.0 @@ -88,7 +89,6 @@ require ( github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82 // indirect - github.com/yudai/pp v2.0.1+incompatible // indirect golang.org/x/crypto v0.21.0 // indirect golang.org/x/sys v0.18.0 // indirect google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect @@ -100,6 +100,6 @@ replace github.com/prebid/prebid-server/v2 => ./ replace github.com/prebid/openrtb/v20 => github.com/PubMatic-OpenWrap/prebid-openrtb/v20 v20.0.0-20240222072752-2d647d1707ef -replace github.com/beevik/etree v1.0.2 => github.com/PubMatic-OpenWrap/etree v1.0.2-0.20240914050009-a916f68552f5 +replace github.com/beevik/etree v1.0.2 => github.com/PubMatic-OpenWrap/etree v1.0.2-0.20241125102329-0b5c47d99ad5 replace github.com/beevik/etree/110 => github.com/beevik/etree v1.1.0 diff --git a/go.sum b/go.sum index 4451dc12c04..b5d85f1ecb8 100644 --- a/go.sum +++ b/go.sum @@ -55,7 +55,6 @@ git.pubmatic.com/PubMatic/go-netacuity-client v0.0.0-20240104092757-5d6f15e25fe3 git.pubmatic.com/PubMatic/go-netacuity-client v0.0.0-20240104092757-5d6f15e25fe3/go.mod h1:w733mqJnHt0hLR9mIFMzyDR0D94qzc7mFHsuE0tFQho= git.pubmatic.com/PubMatic/vastunwrap v0.0.0-20240827084017-0e392d3beb8b h1:7AsXylZJDwq514L8KE0Id079VNfUsDEMUIYMlRYH+0Y= git.pubmatic.com/PubMatic/vastunwrap v0.0.0-20240827084017-0e392d3beb8b/go.mod h1:kcoJf7k+xug8X8fLWmsiKhPnYP+k7RZkfUoUo5QF+KA= -github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/DATA-DOG/go-sqlmock v1.5.0 h1:Shsta01QNfFxHCfpW6YH2STWB0MudeXXEWMr20OEh60= @@ -66,10 +65,10 @@ github.com/IABTechLab/adscert v0.34.0/go.mod h1:pCLd3Up1kfTrH6kYFUGGeavxIc1f6Tvv github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I= github.com/NYTimes/gziphandler v1.1.1/go.mod h1:n/CVRwUEOgIxrgPvAQhUUr9oeUtvrhMomdKFjzJNB0c= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= -github.com/PubMatic-OpenWrap/etree v1.0.2-0.20240914050009-a916f68552f5 h1:qNRDZVW/TJI0O4hPVdk5YCl+WxD6alYdaCG0im73lNo= -github.com/PubMatic-OpenWrap/etree v1.0.2-0.20240914050009-a916f68552f5/go.mod h1:5Y8qgcuDoy3XXG907UXkGnVTwihF16rXyJa4zRT7hOE= -github.com/PubMatic-OpenWrap/fastxml v0.0.0-20240826060652-d9d5d05fdad2 h1:4zaGImZVnKCJudxKfsVNJAqGhCPxbjApAo0QvEThwpw= -github.com/PubMatic-OpenWrap/fastxml v0.0.0-20240826060652-d9d5d05fdad2/go.mod h1:TGGzSA5ziWpfLsKvqOzgdPGEZ7SJIQjHbcJw6lWoyHU= +github.com/PubMatic-OpenWrap/etree v1.0.2-0.20241125102329-0b5c47d99ad5 h1:uNJ9lOn3q677J2PbR9wbnHec8452lHYvUZCfqMUxk0s= +github.com/PubMatic-OpenWrap/etree v1.0.2-0.20241125102329-0b5c47d99ad5/go.mod h1:5Y8qgcuDoy3XXG907UXkGnVTwihF16rXyJa4zRT7hOE= +github.com/PubMatic-OpenWrap/fastxml v0.0.0-20241125102315-0d8f851a6e52 h1:n1pLyiO0A0dUwvYjktIIRd6Kikm//msfvV6JO7m3lIE= +github.com/PubMatic-OpenWrap/fastxml v0.0.0-20241125102315-0d8f851a6e52/go.mod h1:TGGzSA5ziWpfLsKvqOzgdPGEZ7SJIQjHbcJw6lWoyHU= github.com/PubMatic-OpenWrap/prebid-openrtb/v20 v20.0.0-20240222072752-2d647d1707ef h1:CXsyYtEgZz/0++fiug6QZXrRYG6BBrl9IGveCxsnhiE= github.com/PubMatic-OpenWrap/prebid-openrtb/v20 v20.0.0-20240222072752-2d647d1707ef/go.mod h1:hLBrA/APkSrxs5MaW639l+y/EAHivDfRagO2TX/wbSc= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= @@ -520,7 +519,6 @@ github.com/yudai/gojsondiff v1.0.0/go.mod h1:AY32+k2cwILAkW1fbgxQ5mUmMiZFgLIV+FB github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82 h1:BHyfKlQyqbsFN5p3IfnEUduWvb9is428/nNb5L3U01M= github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82/go.mod h1:lgjkn3NuSvDfVJdfcVVdX+jpBxNmX4rDAzaS45IcYoM= github.com/yudai/pp v2.0.1+incompatible h1:Q4//iY4pNF6yPLZIigmvcl7k/bPgrcTPIFIcmawg5bI= -github.com/yudai/pp v2.0.1+incompatible/go.mod h1:PuxR/8QJ7cyCkFp/aUDS+JY727OFEZkTdatxwunjIkc= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= diff --git a/modules/pubmatic/openwrap/tracker/video_test.go b/modules/pubmatic/openwrap/tracker/video_test.go index 4ee35615e4d..61368cd6112 100644 --- a/modules/pubmatic/openwrap/tracker/video_test.go +++ b/modules/pubmatic/openwrap/tracker/video_test.go @@ -289,7 +289,7 @@ func TestInjectVideoCreativeTrackers(t *testing.T) { }, }, }, - wantAdm: ` `, + wantAdm: ``, wantErr: false, }, { @@ -449,7 +449,7 @@ func TestInjectVideoCreativeTrackers(t *testing.T) { }, }, wantBurl: "Tracker URL&owsspburl=https%3A%2F%2Fburl.com", - wantAdm: ` `, + wantAdm: ``, wantErr: false, }, }