From 9174411b867c64d3a8479f5f409a3c8253ad9d24 Mon Sep 17 00:00:00 2001 From: Bill Pugh Date: Tue, 31 May 2022 10:15:49 -0400 Subject: [PATCH] Add estimate of total number of notifications for all users Smooth estimate of total number of users and ENPA opt-in using median --- AnalyticsAnalyzer.xcodeproj/project.pbxproj | 16 ++-- AnalyticsAnalyzer/TabularData.swift | 90 ++++++++++++++++++- GAEN Analytics/AnalysisState.swift | 25 ++++-- .../Est. scaled notifications per day.md | 5 ++ .../ENPA charts/Est. scaled notifications.md | 5 -- .../docs/ENPA charts/Estimated users.md | 5 ++ 6 files changed, 123 insertions(+), 23 deletions(-) create mode 100644 GAEN Analytics/docs/ENPA charts/Est. scaled notifications per day.md delete mode 100644 GAEN Analytics/docs/ENPA charts/Est. scaled notifications.md diff --git a/AnalyticsAnalyzer.xcodeproj/project.pbxproj b/AnalyticsAnalyzer.xcodeproj/project.pbxproj index e84b9aa..129197b 100644 --- a/AnalyticsAnalyzer.xcodeproj/project.pbxproj +++ b/AnalyticsAnalyzer.xcodeproj/project.pbxproj @@ -78,7 +78,7 @@ 16856EB027C2F58200F44960 /* functionality.swift in Sources */ = {isa = PBXBuildFile; fileRef = 16515AA525C9939200C97BB8 /* functionality.swift */; }; 16856EB127C2F5BB00F44960 /* configuration.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1673F54027B002B80037D340 /* configuration.swift */; }; 16856EB227C2F70100F44960 /* TabularData.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1673F56427B0A7C80037D340 /* TabularData.swift */; }; - 168933ED282952E600A3FDC2 /* Est. scaled notifications.md in Resources */ = {isa = PBXBuildFile; fileRef = 168933EC282952D900A3FDC2 /* Est. scaled notifications.md */; }; + 168933ED282952E600A3FDC2 /* Est. scaled notifications per day.md in Resources */ = {isa = PBXBuildFile; fileRef = 168933EC282952D900A3FDC2 /* Est. scaled notifications per day.md */; }; 169FB8592642F0C10082DDD2 /* main.swift in Sources */ = {isa = PBXBuildFile; fileRef = 169FB8582642F0C10082DDD2 /* main.swift */; }; 169FB85E2642F1300082DDD2 /* CSV in Frameworks */ = {isa = PBXBuildFile; productRef = 169FB85D2642F1300082DDD2 /* CSV */; }; 169FB8602642F1360082DDD2 /* ArgumentParser in Frameworks */ = {isa = PBXBuildFile; productRef = 169FB85F2642F1360082DDD2 /* ArgumentParser */; }; @@ -220,7 +220,7 @@ 16856E9827BFEFFA00F44960 /* main.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = main.swift; sourceTree = ""; }; 16856EA027C04EBE00F44960 /* SmokeTest.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = SmokeTest.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; 16856EA227C04EBE00F44960 /* SmokeTest.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SmokeTest.swift; sourceTree = ""; }; - 168933EC282952D900A3FDC2 /* Est. scaled notifications.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = "Est. scaled notifications.md"; sourceTree = ""; }; + 168933EC282952D900A3FDC2 /* Est. scaled notifications per day.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = "Est. scaled notifications per day.md"; sourceTree = ""; }; 169FB8562642F0C10082DDD2 /* AnalyticsTool */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = AnalyticsTool; sourceTree = BUILT_PRODUCTS_DIR; }; 169FB8582642F0C10082DDD2 /* main.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = main.swift; sourceTree = ""; }; 16A292E7282062F40068F29B /* ENCV API key.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = "ENCV API key.md"; sourceTree = ""; }; @@ -471,7 +471,7 @@ 1615FA5627EE09B10089F0B6 /* Excess secondary attack rate 3.md */, 1615FA5827EE09CB0089F0B6 /* Excess secondary attack rate 4.md */, 16AF2DB127E39D59000B1529 /* Users with notifications.md */, - 168933EC282952D900A3FDC2 /* Est. scaled notifications.md */, + 168933EC282952D900A3FDC2 /* Est. scaled notifications per day.md */, ); path = "ENPA charts"; sourceTree = ""; @@ -730,7 +730,7 @@ 16B2B5E327D40F86000F763A /* combined analysis.md in Resources */, 16856E6827BC021900F44960 /* tokens claimed.md in Resources */, 16A292EB2821C4920068F29B /* privacy policy.md in Resources */, - 168933ED282952E600A3FDC2 /* Est. scaled notifications.md in Resources */, + 168933ED282952E600A3FDC2 /* Est. scaled notifications per day.md in Resources */, 161A123527DA3D650026C53D /* Secondary attack rate.md in Resources */, 16B2B5E127D40F80000F763A /* ENCV data.md in Resources */, 16856E6627BC021000F44960 /* ENPA opt in.md in Resources */, @@ -1064,7 +1064,7 @@ CLANG_CXX_LANGUAGE_STANDARD = "gnu++17"; CODE_SIGN_ENTITLEMENTS = "GAEN Analytics/GAEN Analytics.entitlements"; CODE_SIGN_STYLE = Automatic; - CURRENT_PROJECT_VERSION = 2; + CURRENT_PROJECT_VERSION = 1; DEVELOPMENT_ASSET_PATHS = "\"GAEN Analytics/Preview Content\""; DEVELOPMENT_TEAM = H2Z73245NN; ENABLE_PREVIEWS = YES; @@ -1084,7 +1084,7 @@ "$(inherited)", "@executable_path/Frameworks", ); - MARKETING_VERSION = 0.3.1; + MARKETING_VERSION = 0.3.2; PRODUCT_BUNDLE_IDENTIFIER = com.ninjamonkeycoders.GAENAnalytics; PRODUCT_NAME = "$(TARGET_NAME)"; SDKROOT = iphoneos; @@ -1103,7 +1103,7 @@ CLANG_CXX_LANGUAGE_STANDARD = "gnu++17"; CODE_SIGN_ENTITLEMENTS = "GAEN Analytics/GAEN Analytics.entitlements"; CODE_SIGN_STYLE = Automatic; - CURRENT_PROJECT_VERSION = 2; + CURRENT_PROJECT_VERSION = 1; DEVELOPMENT_ASSET_PATHS = "\"GAEN Analytics/Preview Content\""; DEVELOPMENT_TEAM = H2Z73245NN; ENABLE_PREVIEWS = YES; @@ -1123,7 +1123,7 @@ "$(inherited)", "@executable_path/Frameworks", ); - MARKETING_VERSION = 0.3.1; + MARKETING_VERSION = 0.3.2; PRODUCT_BUNDLE_IDENTIFIER = com.ninjamonkeycoders.GAENAnalytics; PRODUCT_NAME = "$(TARGET_NAME)"; SDKROOT = iphoneos; diff --git a/AnalyticsAnalyzer/TabularData.swift b/AnalyticsAnalyzer/TabularData.swift index 45dbd95..59734f9 100644 --- a/AnalyticsAnalyzer/TabularData.swift +++ b/AnalyticsAnalyzer/TabularData.swift @@ -405,7 +405,46 @@ extension DataFrame { append(column: Column(name: giving, contents: resultData)) logger.info("added column \(giving, privacy: .public)") } - + mutating func addRollingMedianInt(_ name1: String, giving: String, days: Int) { + logger.info("addRollingMedianInt(\(name1, privacy: .public), giving \(giving, privacy: .public))") + guard requireColumn(name1, Int.self) else { + return + } + let column1 : [Int?] = self[name1, Int.self] + let resultData = rollingMedian(column1, length: days) + append(column: Column(name: giving, contents: resultData)) + logger.info("added column \(giving, privacy: .public)") + } + mutating func addRollingMedianDouble(_ name1: String, giving: String, days: Int) { + logger.info("addRollingMedianDouble(\(name1, privacy: .public), giving \(giving, privacy: .public))") + guard requireColumn(name1, Double.self) else { + return + } + let column1 : [Double?] = self[name1, Double.self] + let resultData = rollingMedian(column1, length: days) + append(column: Column(name: giving, contents: resultData)) + logger.info("added column \(giving, privacy: .public)") + } + mutating func addRollingSumInt(_ name1: String, giving: String) { + logger.info("addRollingMedianInt(\(name1, privacy: .public), giving \(giving, privacy: .public))") + guard requireColumn(name1, Int.self) else { + return + } + let column1 : [Int?] = self[name1, Int.self] + let resultData = rollingSum(column1) + append(column: Column(name: giving, contents: resultData)) + logger.info("added column \(giving, privacy: .public)") + } + mutating func addRollingSumDouble(_ name1: String, giving: String) { + logger.info("addRollingMedianInt(\(name1, privacy: .public), giving \(giving, privacy: .public))") + guard requireColumn(name1, Double.self) else { + return + } + let column1 : [Double?] = self[name1, Double.self] + let resultData = rollingSum(column1) + append(column: Column(name: giving, contents: resultData)) + logger.info("added column \(giving, privacy: .public)") + } mutating func addColumnPercentage(_ name1: String, _ name2: String, giving: String) { logger.info("addColumnPercentage(\(name1, privacy: .public), \(name2, privacy: .public), giving \(giving, privacy: .public))") guard requireColumn(name1, Int.self), requireColumn(name2, Int.self) else { @@ -451,6 +490,55 @@ func makeColumn(_ name: String, _ value: T) -> AnyColumn { return Column(name: name, contents: [value]).eraseToAnyColumn() } +func rollingSum(_ a: [Int?]) -> [Int] { + var total = 0 + return a.map { total += ($0 ?? 0); return total} + +} +func rollingSum(_ a: [Double?]) -> [Double] { + var total = 0.0 + return a.map { total += ($0 ?? 0); return total} + +} + +func median(_ a: ArraySlice) -> T? where T: Numeric, T : Comparable { + + if a.isEmpty { + return nil + } + let sorted = a.sorted() + let count = sorted.count + // 0 - nil + // 1 - 0 + // 2 - 0, 1 + // 3 - 1 + // 4 - 1,2 + if count % 2 == 0 { + // Even number of items - return the mean of two middle values + let leftIndex = count / 2 - 1 + let leftValue = sorted[leftIndex] + return leftValue + } else { + // Odd number of items - take the middle item. + return sorted[count/2] + } +} + + + + +func median(_ a: [T?], ending: Int, count: Int) -> T? where T : Numeric, T : Comparable { + let values = a[0...ending].compactMap( { $0 }).suffix(count) + + let result = median(values) + return result +} + + +func rollingMedian(_ a: [T?], length: Int) -> [T?] where T : Numeric, T : Comparable { + return (0 ..< a.count).map( { median(a, ending: $0, count: length) } ) +} + class TextBuffer { var text: [String] = [] diff --git a/GAEN Analytics/AnalysisState.swift b/GAEN Analytics/AnalysisState.swift index ac5308e..802ccdd 100644 --- a/GAEN Analytics/AnalysisState.swift +++ b/GAEN Analytics/AnalysisState.swift @@ -415,8 +415,9 @@ class AnalysisState: NSObject, ObservableObject { } } -func computeEstimatedDevices(_ codesClaimed: Int?, _ cv: Double?) -> Int? { - guard let codesClaimed = codesClaimed, let cv = cv, cv >= 1 else { +func computeEstimatedDevices(_ codesClaimed: Int?, _ cvData: (Double?, Double?)) -> Int? { + let (cv, cvstd) = cvData + guard let codesClaimed = codesClaimed, let cv = cv, let cvstd = cvstd, cv >= 3.0*cvstd else { return nil } return Int((Double(codesClaimed * 100_000) / cv).rounded()) @@ -460,7 +461,8 @@ func computeEstimatedUsers(platform: String, encv: DataFrame, _ encvColumn: Stri logger.log("added encv data to enpa data") let vc = result[enpaColumn, Double.self] - let estUsers = zip(newEncvColumn, vc).map { computeEstimatedDevices($0.0, $0.1) } + let vcstd = result[enpaColumn+" std", Double.self] + let estUsers = zip(newEncvColumn, zip(vc, vcstd)).map { computeEstimatedDevices($0.0, $0.1) } let estUsersColumnName = "est \(platform)users from \(enpaColumn)" logger.log("\(estUsersColumnName) computed") let c = Column(name: estUsersColumnName, contents: estUsers) @@ -515,8 +517,15 @@ actor AnalysisTask { var worksheet: DataFrame if let encv = encvAverage { combinedDataFrame = computeEstimatedUsers(platform: "", encv: encv, "codes claimed", enpa: combinedDataFrame, "vc") + combinedDataFrame = computeEstimatedUsers(platform: "", encv: encv, "publish requests", enpa: combinedDataFrame, "ku") - combinedDataFrame.addColumnComputation("nt", "est users from vc", giving: "est scaled notifications", estimatedNotifications) + combinedDataFrame.addRollingMedianInt("est users from vc", giving: "est users", days: 14) + combinedDataFrame.addRollingMedianDouble("vc ENPA %", giving: "ENPA %", days: 14) + combinedDataFrame.addColumnComputation("nt", "est users", giving: "est scaled notifications/day", estimatedNotifications) + combinedDataFrame.addRollingSumDouble("est scaled notifications/day", giving: "est total notifications") + + + iOSDataFrame = computeEstimatedUsers(platform: "iOS ", encv: encv, "publish requests ios", enpa: iOSDataFrame, "ku") androidDataFrame = computeEstimatedUsers(platform: "Android ", encv: encv, "publish requests android", enpa: androidDataFrame, "ku") combinedDataFrame.requireColumns("date", "vc count", "vc", "ku", "nt", "codes issued", "est users from vc", "vc ENPA %") @@ -668,8 +677,6 @@ struct ChartOptions: Identifiable { } self.data = data.selecting(columnNames: ["date"] + columns) - let dates = self.data["date", Date.self] - print("\(dates.first!!)") self.columns = columns self.maxBound = maxBound } @@ -754,12 +761,12 @@ func arrivingPromptly(enpa: DataFrame, config: Configuration) -> ChartOptions { // est. users func estimatedUsers(enpa: DataFrame, config _: Configuration) -> ChartOptions? { - ChartOptions.maybe(title: "Estimated users", data: enpa, columns: ["est users from vc"]) + ChartOptions.maybe(title: "Estimated users", data: enpa, columns: ["est users"]) } // est. users func scaledNotifications(enpa: DataFrame, config _: Configuration) -> ChartOptions? { - ChartOptions.maybe(title: "Est. scaled notifications", data: enpa, columns: ["est scaled notifications"]) + ChartOptions.maybe(title: "Est. scaled notifications per day", data: enpa, columns: ["est scaled notifications/day"]) } func showingNotifications(enpa: DataFrame, config: Configuration) -> ChartOptions? { @@ -770,7 +777,7 @@ func showingNotifications(enpa: DataFrame, config: Configuration) -> ChartOption // est. users func enpaOptIn(enpa: DataFrame, config _: Configuration) -> ChartOptions? { - ChartOptions.maybe(title: "ENPA opt in", data: enpa, columns: ["vc ENPA %"], maxBound: 1.0) + ChartOptions.maybe(title: "ENPA opt in", data: enpa, columns: ["ENPA %"], maxBound: 1.0) } // codes claimed/consent diff --git a/GAEN Analytics/docs/ENPA charts/Est. scaled notifications per day.md b/GAEN Analytics/docs/ENPA charts/Est. scaled notifications per day.md new file mode 100644 index 0000000..6a1ed9c --- /dev/null +++ b/GAEN Analytics/docs/ENPA charts/Est. scaled notifications per day.md @@ -0,0 +1,5 @@ +The graph shows an estimate of the total number of notifications sent each day to all EN users. + +It is calculated as: + +(# of notifications per 100K ENPA users)/100K * (est users) diff --git a/GAEN Analytics/docs/ENPA charts/Est. scaled notifications.md b/GAEN Analytics/docs/ENPA charts/Est. scaled notifications.md deleted file mode 100644 index b7b8ee4..0000000 --- a/GAEN Analytics/docs/ENPA charts/Est. scaled notifications.md +++ /dev/null @@ -1,5 +0,0 @@ -The graph shows an estimate of the actual number of notifications sent each day to all EN users. - -It is calculated as: - -(# of notifications per 100K ENPA users)/100K * (est users from vc) diff --git a/GAEN Analytics/docs/ENPA charts/Estimated users.md b/GAEN Analytics/docs/ENPA charts/Estimated users.md index 1048cf4..c2abe7d 100644 --- a/GAEN Analytics/docs/ENPA charts/Estimated users.md +++ b/GAEN Analytics/docs/ENPA charts/Estimated users.md @@ -2,5 +2,10 @@ Shows an estimate of the total number of active ENX users (users running ENX for This uses both the ENPA and ENCV. It assumes that users who have enabled ENPA verify codes at the same rate as users who haven't verified codes. The calculation used is: + EPNA opt in = (# of ENPA users who verify codes)/(# of codes claimed on ENCV server) Total users = (# of ENPA users)/(ENPA opt in) + +These calculations can be very noisy when then the # of ENPA users who verify codes is close to the standard deviation. Thus, we only calculate the opt in rate when the number of ENPA users who verify codes is at 3 times the standard deviation, and the actual value for both ENPA opt-in and total users is the median of the last 14 days of reported values. + +