From 030439deb42ed267611a1cc5e7d97ab21135b415 Mon Sep 17 00:00:00 2001 From: Stelios Petrakis Date: Wed, 1 May 2024 15:59:35 +0200 Subject: [PATCH 1/2] Full String Catalog support With the introduction of the String Catalogs file format in Xcode 15, developers have now the option to create localizations that vary by device and/or by plural. On top of that, multiple tokens can be added in a localized phrase (also known as substitutions) that can also support the aforementioned variations. In order to support all those new formats as well as the old substitution rules found in the Strings Dictionary format (`.stringsdict`), the advanced rules from the CLI tool are formatted as an intermediate XML structure and pushed to CDS. Once those XML structures are pulled from CDS by the application, they need to be properly parsed and formatted in a way so that they are ready for presentation when needed by the application. During the phase when the SDK populates its cache, the XML structures are either evaluated based on the device that they currently run (in case of device variations) or they are converted to intermediate ICU rules (if they contain plural variations) so that they are ready to be evaluated when arguments are passed to them during runtime. For that to happen, the logic that devices the plural rule extraction has been heavily refactored to account for all possible cases (simple plural rules, multiple plural rules, plural rules with extra arguments, and so on), while making sure that the right argument is used on the right rule by respecting the positional specifiers on each substitution phrase. More unit tests have been added to test those cases as well as edge-case scenarios and existing unit tests have been improved. --- Sources/Transifex/Cache.swift | 29 +- Sources/Transifex/Core.swift | 14 +- Sources/Transifex/Plurals.swift | 661 +++++++++++++++++++++- Sources/Transifex/RenderingStrategy.swift | 132 ++++- Tests/TransifexTests/TransifexTests.swift | 259 ++++++++- 5 files changed, 1039 insertions(+), 56 deletions(-) diff --git a/Sources/Transifex/Cache.swift b/Sources/Transifex/Cache.swift index 43eece9..a5a9c1a 100644 --- a/Sources/Transifex/Cache.swift +++ b/Sources/Transifex/Cache.swift @@ -173,7 +173,34 @@ public final class TXDiskCacheProvider: NSObject, TXCacheProvider { return nil } - return storedTranslations + return filterXMLPlurals(storedTranslations) + } + + // Process XML stored translations (device variations, substitutions, etc). + private static func filterXMLPlurals(_ translations: TXTranslations?) -> TXTranslations? { + guard var translations = translations else { + return nil + } + for (localeKey, localeStrings) in translations { + for (sourceStringKey, stringInfo) in localeStrings { + guard let sourceString = stringInfo[TXDecoratorCache.STRING_KEY] else { + continue + } + // Detect if the string begins with the CDS root XML tag: + // `` + if (!sourceString.hasPrefix("<\(TXNative.CDS_XML_ROOT_TAG_NAME)>")) { + continue + } + // Process it and synthesize the final rule. + guard let processedString = XMLPluralParser.extract(pluralString: sourceString) else { + Logger.error("\(#function) Error attempting to extract source string with key \(sourceStringKey)") + continue + } + // Replace the source string with the processed value + translations[localeKey]?[sourceStringKey]?[TXDecoratorCache.STRING_KEY] = processedString + } + } + return translations } public func getTranslations() -> TXTranslations? { diff --git a/Sources/Transifex/Core.swift b/Sources/Transifex/Core.swift index 4c96e45..34daefe 100644 --- a/Sources/Transifex/Core.swift +++ b/Sources/Transifex/Core.swift @@ -365,7 +365,19 @@ public final class TXNative : NSObject { /// The filename of the file that holds the translated strings and it's bundled inside the app. public static let STRINGS_FILENAME = "txstrings.json" - + + /// XML name to be used for the root XML element when the pluralization rule is not supported by CDS + /// and has to be uploaded as XML instead of the ICU format. + public static let CDS_XML_ROOT_TAG_NAME = "cds-root" + + /// XML name to be used for the child XML elements when the pluralization rule is not supported by + /// CDS and has to be uploaded as XML instead of the ICU format. + public static let CDS_XML_TAG_NAME = "cds-unit" + + /// XML attribute to be used in the CDS_XML_TAG_NAME elements when the pluralization rule is not + /// supported by CDS and has to be uploaded as XML instead of the ICU format. + public static let CDS_XML_ID_ATTRIBUTE = "id" + /// An instance of the core class that handles all the work private static var tx : NativeCore? diff --git a/Sources/Transifex/Plurals.swift b/Sources/Transifex/Plurals.swift index 4bb14b3..94b372e 100644 --- a/Sources/Transifex/Plurals.swift +++ b/Sources/Transifex/Plurals.swift @@ -7,6 +7,9 @@ // import Foundation +#if canImport(UIKit) +import UIKit +#endif enum PluralizationRule : String { case unspecified = "unspecified" @@ -18,40 +21,650 @@ enum PluralizationRule : String { case other = "other" } +struct ICUPluralResult: Equatable { + var extractedPlurals: [PluralizationRule: String] +} + extension String { + private static let ICU_RULE_PATTERN = #"\{([^\s]*?), plural, [^*]*?\}\}"# + + private static let PLURALIZATION_RULE_PATTERN = #"(zero|one|two|few|many|other)\s*(\{[^}]*\})"# - /// Extracts plural rules from strings that follow the ICU Message Format, - /// e.g. "one", "few", "other" etc + /// Extracts plural rules from strings that follow the ICU Message Format. /// - /// The strings need to be structured like this: - /// "{cnt, plural, one {There is %d table} other {There are %d tables}}" - /// "{???, plural, one {There is %d table} other {There are %d tables}}" + /// The strings must contain ICU rules that need to be structured like this: + /// ``` + /// {cnt, plural, one {There is %d table} other {There are %d tables}} + /// ``` + /// or + /// ``` + /// {???, plural, one {There is %d table} other {There are %d tables}} + /// ``` /// (the latter is how pluralized strings arrive from CDS). /// - /// Strings that use "{var}" placeholders, like the following, are not supported: - /// {cnt, plural, one {There is {cnt}} table} other {There are {cnt}} tables}} + /// The method can extract multiple ICU rules from the given string. + /// + /// For example, the following string: + /// + /// ``` + /// There %1$#@{term1, plural, one {is %d person} other {are %d people}}@ sitting in %2$#@{term2, plural, one {%d table} two {a couple of tables} other {%d tables}}@ in this restaurant. + /// ``` + /// + /// Produces the following result: + /// ``` + /// [ + /// "{term1, plural, one {is %d person} other {are %d people}}" : ICUPluralResult( + /// extractedPlurals: [ + /// .one: "is %d person", + /// .other: "are %d people" + /// ] + /// ), + /// "{term2, plural, one {%d table} two {a couple of tables} other {%d tables}}": ICUPluralResult( + /// extractedPlurals: [ + /// .one: "%d table", + /// .two: "a couple of tables", + /// .other: "%d tables" + /// ] + /// ) + /// ] + /// ``` /// /// - Parameter string: The pluralized string - /// - Returns: A dictionary that holds all plural strings found in the given string, - /// or nil if the string does not follow the ICU Message Format - /// and the pluralized format in particular - func extractICUPlurals() -> [PluralizationRule: String]? { - guard self.contains(", plural, ") else { + /// - Returns: A dictionary that has a size equal to the number of ICU rules found in the current + /// string. Each element contains the extracted ICU rule as a key and an ICUPluralResult structure with + /// the extracted rules as a value. + func extractICUPlurals() -> [String: ICUPluralResult] { + // Bail fast if the string does not contain a plural rule. + guard contains(", plural, ") else { + return [:] + } + + // Extract the ICU rules from the strings + var regex: NSRegularExpression + + do { + regex = try NSRegularExpression(pattern: Self.ICU_RULE_PATTERN, + options: []) + } + catch { + return [:] + } + + var matchingICURules: [String:ICUPluralResult] = [:] + + regex + .matches(in: self, + options: [], + range: NSRange(location: 0, + length: count)) + .forEach { + guard $0.numberOfRanges == 2 else { + return + } + let icuRuleRange = $0.range(at: 0) + guard !NSEqualRanges(icuRuleRange, NSMakeRange(NSNotFound, 0)) else { + return + } + let icuRule = (self as NSString).substring(with: icuRuleRange) + let results = icuRule.capturedGroups(withRegex: Self.PLURALIZATION_RULE_PATTERN) + var plurals: [PluralizationRule: String] = [:] + + results.forEach { matchedPair in + // Convert strings like "few" to the respective enum + guard let rule = PluralizationRule(rawValue: matchedPair[0]) else { + return + } + // Remove the curly braces from the matched string + // e.g. "{%d tables}" -> "%d tables" + plurals[rule] = matchedPair[1].removeFirstAndLastCharacters() + } + + matchingICURules.updateValue(ICUPluralResult(extractedPlurals: plurals), + forKey: icuRule) + } + + return matchingICURules + } +} + +/// Class responsible for parsing the collection of CDS XML elements, filtering the proper rules for the device +/// and generating (if needed) the final ICU string to be used by the SDK. +final class XMLPluralParser: NSObject { + private static let CDS_XML_ID_ATTRIBUTE_DEVICE_TOKEN = "device" + private static let CDS_XML_ID_ATTRIBUTE_SUBSTITUTIONS_TOKEN = "substitutions" + + private static let ICU_RULE_MISSING_TOKEN = "???" + private static let ICU_RULE_PLURAL_TOKEN = "plural" + + private static let CDS_XML_ID_ATTRIBUTE_PLURAL_TOKEN = "plural" + private static let CDS_XML_ID_ATTRIBUTE_DELIMITER = "." + + private static let FIRST_POSITIONAL_SPECIFIER = "%1$" + private static let VARIABLE_PREFIX: Character = "%" + private static let POSITIONAL_SPECIFIER_SUFFIX = "$" + + // Constants that should match the device variation strings of the + // .xcstrings file. + private static let DEVICE_NAME_IPHONE = "iphone" + private static let DEVICE_NAME_IPAD = "ipad" + private static let DEVICE_NAME_IPOD = "ipod" + private static let DEVICE_NAME_MAC = "mac" + private static let DEVICE_NAME_WATCH = "applewatch" + private static let DEVICE_NAME_VISION = "applevision" + private static let DEVICE_NAME_APPLETV = "appletv" + private static let DEVICE_NAME_OTHER = "other" + + private var parser: XMLParser + private var parsedResults: [String: String] = [:] + private var pendingCDSUnitID: String? + private var pendingString: String = "" + + required internal init?(pluralString: String) { + self.parser = XMLParser(data: Data(pluralString.utf8)) + super.init() + self.parser.delegate = self + } + + /// Parses the provided plural string XML and generates the final rule. + /// + /// - Parameter deviceName: The device name. + /// - Returns: The final rule to be used. + private func extract(_ deviceName: String) -> String? { + if !parser.parse() { + return nil + } + + return processParsedResults(deviceName) + } + + /// - Parameter deviceName: The device name, nil for the general device rule `device.` + /// - Returns: The synthesized device rule + private static func deviceRule(with deviceName: String? = nil) -> String { + return "\(CDS_XML_ID_ATTRIBUTE_DEVICE_TOKEN)\(CDS_XML_ID_ATTRIBUTE_DELIMITER)\(deviceName ?? "")" + } + + /// - Parameter results: The parsed XML results + /// - Returns: True if the provided results contain at least one device rule, false otherwise. + private static func containsDeviceRules(_ results: [String: String]) -> Bool { + return containsRules(withPrefix: deviceRule(), + results: results) + } + + /// - Parameters: + /// - deviceName: The device name + /// - results: The parsed XML results + /// - Returns: True if the provided results contain at least one device rule for the provided device + /// name, false otherwise. + private static func containsDeviceRules(for deviceName: String, + results: [String: String]) -> Bool { + return containsRules(withPrefix: deviceRule(with: deviceName), + results: results) + } + + /// - Parameters: + /// - prefix: The prefix to search for + /// - results: The parsed XML results + /// - Returns: Looks up the parsed XML results and returns true if the prefix is found at least once, + /// false otherwise. + private static func containsRules(withPrefix prefix: String, + results: [String: String]) -> Bool { + for (key, _) in results { + if key.hasPrefix(prefix) { + return true + } + } + return false + } + + /// Given the extracted results of the XML parser, generate the final rule to be used or return nil if there + /// was an error. + /// + /// - Parameter deviceName: The device name. + /// - Returns: The final rule to be used. + private func processParsedResults(_ deviceName: String) -> String? { + guard parsedResults.count > 0 else { + return nil + } + + var finalResults = parsedResults + var deviceNameRuleFound = false + var finalDeviceName = deviceName + var finalDeviceKey = Self.deviceRule(with: deviceName) + + // If device rules exist in the parsed results, then perform some + // extra processing. + if Self.containsDeviceRules(parsedResults) { + // If the parsed device results include rules for the provided + // deviceName, everything is OK. + if Self.containsDeviceRules(for: deviceName, + results: parsedResults) { + deviceNameRuleFound = true + } + // If the provided deviceName cannot be detected in the parsed + // device results, find a fallback. + else { + // For the iPad deviceName, if not found in the rules, fallback: + // * Firstly to the `iphone` device rules, if found + // * Otherwise to the `other` device rules, if found. + if deviceName == Self.DEVICE_NAME_IPAD, + Self.containsDeviceRules(for: Self.DEVICE_NAME_IPHONE, + results: parsedResults) { + deviceNameRuleFound = true + finalDeviceName = Self.DEVICE_NAME_IPHONE + finalDeviceKey = Self.deviceRule(with: Self.DEVICE_NAME_IPHONE) + } + + // For any other deviceName, fallback to `other` device rules. + if !deviceNameRuleFound, + Self.containsDeviceRules(for: Self.DEVICE_NAME_OTHER, + results: parsedResults) { + deviceNameRuleFound = true + finalDeviceName = Self.DEVICE_NAME_OTHER + finalDeviceKey = Self.deviceRule(with: Self.DEVICE_NAME_OTHER) + } + } + + // Filter the parsed results and keep: + // * The device rules for the found device (provided or fallback). + // * The substitution rules. + finalResults = parsedResults.filter { + // If a device name rule has been found, keep those rules + (deviceNameRuleFound && $0.key.hasPrefix(finalDeviceKey)) + // Do not filter out substitutions as they do not begin with the + // `device*` schema, but they are still needed. + || $0.key.hasPrefix(Self.CDS_XML_ID_ATTRIBUTE_SUBSTITUTIONS_TOKEN) + } + } + + // If there are no items after filtering, then bail. + guard finalResults.count > 0 else { + return nil + } + + // If only one item is left after filtering (typical for device + // variation rules), then just return that immediately. + if finalResults.count == 1 { + return finalResults.first?.value + } + + // The rest of the cases must be two: + // * Substitutions (having a main phrase that contains two or more + // tokens). + // * Device specific plural rules. + + // Case 1: Substitutions. + // The main phrase that contains the substitutions will be found in: + // * The element having a "substitutions" key if there are no device + // variations, or + // * The element having the "device.finalDeviceName" key if there are + // also device variations. + // It should be one or the other, if both elements can be found, + // then something is wrong. + if finalResults[Self.CDS_XML_ID_ATTRIBUTE_SUBSTITUTIONS_TOKEN] != nil + || (deviceNameRuleFound && finalResults[finalDeviceKey] != nil) { + var mainPhrase = finalResults[Self.CDS_XML_ID_ATTRIBUTE_SUBSTITUTIONS_TOKEN] + + if mainPhrase == nil, deviceNameRuleFound { + mainPhrase = finalResults[finalDeviceKey] + } + + guard var mainPhrase = mainPhrase else { + return nil + } + + // Process the main phrase, adding positional specifiers if needed, + // so that they can later be used to locate the position of the rule + // in the argument list. + // + // The main phrase is expected to be either: + // * XCStrings: "This iPhone contains %1$#@token1@ with %2$#@token2@" + // * Strings Dict: "This iPhone contains %#@token1@ with %#@token2@" + // The processPhrase() method normalizes that so that even Strings + // Dict phrases will have positional specifiers (1$, 2$ etc) + mainPhrase = Self.processPhrase(mainPhrase) + + // Extract tokens from the main phrase. + PluralUtils.extractTokens(from: mainPhrase).forEach { processedTokenResult in + // Tokens should be: "%1$#@token1@", "%2$#@token2@", ... + let token = processedTokenResult.0 + // Token prefix should be: "1$", "2$", ... + let tokenPrefix = processedTokenResult.1 + // Cleaned tokens should be: "token1", "token2", ... + let cleanedToken = processedTokenResult.2 + let pluralRules = Self.parsePluralRules(finalResults, + firstExpectedComponent: Self.CDS_XML_ID_ATTRIBUTE_SUBSTITUTIONS_TOKEN, + secondExpectedComponent: cleanedToken, + cleanValueCharacters: tokenPrefix) + + // Generate ICU rule from the plural rules + if let icuRule = Self.generateICURule(with: cleanedToken, + pluralRules: pluralRules) { + // Leave the token prefix and suffix intact, as they will be + // needed when the final string will be rendered in the UI. + let tokenReadyICURule = PluralUtils.buildToken(with: tokenPrefix, + token: icuRule) + mainPhrase = mainPhrase.replacingOccurrences(of: token, + with: tokenReadyICURule) + } + } + + // Return the final synthesized main phrase that now contains ICU + // rules. + return mainPhrase + } + // Case 2: Device specific plural rules + else if deviceNameRuleFound { + // In this case, the plural rules are expected to have the + // following format: + // "device.finalDeviceName.plural.pluralRule" + let pluralRules = Self.parsePluralRules(finalResults, + firstExpectedComponent: Self.CDS_XML_ID_ATTRIBUTE_DEVICE_TOKEN, + secondExpectedComponent: finalDeviceName) + + // Generate ICU rule from the plural rules + return Self.generateICURule(with: nil, + pluralRules: pluralRules) + } + + // Something unexpected happened that the logic could not handle. + // + // This can happen if neither the provided nor a fallback device name + // could be found, but there are still substitution rules for other + // device names. As it is not possible to figure out which rule to use, + // return nil. + return nil + } + + /// Add positional specifiers to a variable / token phrase that does not contain them. + /// + /// - Parameter phrase: The original phrase. + /// - Returns: The phrase with added positional specifiers. + private class func processPhrase(_ phrase: String) -> String { + // If the phrase already contains positional specifiers, bail. + guard !phrase.contains(FIRST_POSITIONAL_SPECIFIER) else { + return phrase + } + + var result = phrase + + // Positional specifiers always start from index 1 + var positionalSpecifier = 1 + var currentIndex = result.startIndex + + // Look for the `%` characters that signify variables and tokens. + while let range = result.range(of: String(VARIABLE_PREFIX), + range: currentIndex.. String? { + guard pluralRules.count > 0 else { return nil } - - let pattern = #"(zero|one|two|few|many|other)\s*(\{[^}]*\})"# - let results = self.capturedGroups(withRegex: pattern) - var plurals: [PluralizationRule: String] = [:] - - results.forEach { matchedPair in - // Convert strings like "few" to the respective enum - let rule = PluralizationRule(rawValue: matchedPair[0])! - // Remove the braces from the matched string, - // e.g. "{%d tables}" -> "%d tables" - plurals[rule] = matchedPair[1].removeFirstAndLastCharacters() + + var icuRules: [String] = [] + + for (pluralRule, value) in pluralRules { + icuRules.append("\(pluralRule) {\(value)}") + } + + return "{\(token ?? Self.ICU_RULE_MISSING_TOKEN), \(Self.ICU_RULE_PLURAL_TOKEN), \(icuRules.joined(separator: " "))}" + } + + /// Validate and parse plural rules. + /// + /// - Parameter parsedResults: The parsed results + /// - Parameter firstExpectedComponent: The first expected component of the key. + /// - Parameter secondExpectedComponent: The second expected component of the key. + /// - Returns: An array containing tuples with the plural rule as the first element and the string as + /// the second one. The array is sorted in respect to the order each key must appear on the final ICU + /// rule. + private class func parsePluralRules(_ parsedResults: [String:String], + firstExpectedComponent: String, + secondExpectedComponent: String, + cleanValueCharacters: String? = nil) -> [(PluralizationRule,String)] { + var pluralRules: [PluralizationRule:String] = [:] + + parsedResults.forEach { (key, value) in + let components = key.components(separatedBy: Self.CDS_XML_ID_ATTRIBUTE_DELIMITER) + // Sanity check + guard components.count == 4, + components[0] == firstExpectedComponent, + components[1] == secondExpectedComponent, + components[2] == Self.CDS_XML_ID_ATTRIBUTE_PLURAL_TOKEN else { + return + } + + guard let pluralRule = PluralizationRule(rawValue: components[3]) else { + return + } + + if let cleanValueCharacters = cleanValueCharacters, + cleanValueCharacters.count > 0 { + pluralRules[pluralRule] = value.replacingOccurrences(of: cleanValueCharacters, + with: "") + } + else { + pluralRules[pluralRule] = value + } + } + + // Sort rules as they appear in the PluralizationRule enum + return pluralRules.sorted { $0.key.rawValue < $1.key.rawValue } + } + + /// Returns the current device name in the form used by the `.xcstrings` file type. + /// + /// - Returns: The current device name + private class func currentDeviceName() -> String { +#if os(iOS) + // For iOS, figure out whether the current device is an iPhone, an iPad + // or an iPod. + #if canImport(UIKit) + let currentDevice = UIDevice.current + if currentDevice.userInterfaceIdiom == UIUserInterfaceIdiom.pad { + return DEVICE_NAME_IPAD + } + else { + return currentDevice.model.hasPrefix("iPod") ? DEVICE_NAME_IPOD : DEVICE_NAME_IPHONE + } + #else + return DEVICE_NAME_IPHONE + #endif +#elseif os(macOS) + return DEVICE_NAME_MAC +#elseif os(watchOS) + return DEVICE_NAME_WATCH +#elseif os(visionOS) + return DEVICE_NAME_VISION +#elseif os(tvOS) + return DEVICE_NAME_APPLETV +#else + return DEVICE_NAME_OTHER +#endif + } + + /// Extract and generate (if needed) the rule that the collection of XML plural tags from the plural string + /// contains. + /// + /// - Parameters: + /// - pluralString: The plural string containing a number of XML plural tags. + /// - deviceName: The device name (optional). + /// - Returns: The final rule to be used, nil if there was an error. + public class func extract(pluralString: String, + deviceName: String = currentDeviceName()) -> String? { + return self.init(pluralString: pluralString)?.extract(deviceName) + } +} + +extension XMLPluralParser : XMLParserDelegate { + public func parser(_ parser: XMLParser, didStartElement elementName: String, + namespaceURI: String?, qualifiedName qName: String?, + attributes attributeDict: [String : String] = [:]) { + guard elementName == TXNative.CDS_XML_TAG_NAME, + let id = attributeDict[TXNative.CDS_XML_ID_ATTRIBUTE] else { + return + } + + pendingCDSUnitID = id + pendingString = "" + } + + public func parser(_ parser: XMLParser, didEndElement elementName: String, + namespaceURI: String?, qualifiedName qName: String?) { + guard let cdsUnitID = pendingCDSUnitID else { + return } - return plurals + parsedResults[cdsUnitID] = pendingString + pendingCDSUnitID = nil + pendingString = "" + } + + public func parser(_ parser: XMLParser, foundCharacters string: String) { + guard let _ = pendingCDSUnitID else { + return + } + pendingString += string } } +/// Utility class that allows SDK clients to leverage the logic used for pluralization rules. +public final class PluralUtils { + private static let SUBSTITUTION_TOKEN_PATTERN = #"%\d*\$*#@[^@]+@"# + private static let CDS_XML_TOKEN_DELIMITER = "@" + + /// For a given substitutions phrase, it returns an array with the parsed tokens. + /// + /// ## String Catalogs (`.xcstrings`) + /// ``` + /// This iPhone contains %1$#@token1@ with %2$#@token2@ + /// ``` + /// The extracted tokens will be: + /// * `("%1$#@token1@", "1$", "token1")` + /// * `("%2$#@token2@", "2$", "token2")` + /// + /// ## Strings Dictionary Files (`.stringsdict`) + /// ``` + /// This iPhone contains %#@token1@ with %#@token2@ + /// ``` + /// The extracted tokens will be: + /// * `("%#@token1@", "", "token1")` + /// * `("%#@token2@", "", "token2")` + /// + /// - Parameter substitutionsPhrase: The substitutions phrase + /// - Returns: The array of extracted token tuples. A tuple of three elements: The first one is the + /// original token, the second is the exported prefix (positional specifier) and the cleaned up version of + /// the token without the specifiers and the delimiters. + public class func extractTokens(from substitutionsPhrase: String) -> [(String, String, String)] { + // Bail fast if no token prefix is found. + guard substitutionsPhrase.contains("#@") else { + return [] + } + + // Extract the ICU rules from the strings + var regex: NSRegularExpression + + do { + regex = try NSRegularExpression(pattern: Self.SUBSTITUTION_TOKEN_PATTERN, + options: []) + } + catch { + return [] + } + + var tokens: [(String, String, String)] = [] + + regex + .matches(in: substitutionsPhrase, + options: [], + range: NSRange(location: 0, + length: substitutionsPhrase.count)) + .forEach { + let tokenRange = $0.range(at: 0) + guard !NSEqualRanges(tokenRange, NSMakeRange(NSNotFound, 0)) else { + return + } + let token = (substitutionsPhrase as NSString).substring(with: tokenRange) + if let processedTokenResult = process(token: token) { + tokens.append(processedTokenResult) + } + } + + return tokens + } + + /// Processes a token, exposing certain of its parameters + /// + /// - Parameter token: The input token to be processed. + /// - Returns: A tuple of three elements: The first one is the original token, the second is the + /// exported prefix (positional specifier) and the cleaned up version of the token without the specifiers + /// and the delimiters. + private class func process(token: String) -> (String, String, String)? { + let tokenComponents = token.components(separatedBy: CDS_XML_TOKEN_DELIMITER) + guard tokenComponents.count == 3 else { + return nil + } + // Token prefix should be: + // * XCStrings: "1$", "2$", ... + // * Strings Dict: "" + let tokenPrefix = String(tokenComponents[0].dropFirst().dropLast()) + // Cleaned tokens should be "token1", "token2", etc + let cleanedToken = tokenComponents[1] + return (token, tokenPrefix, cleanedToken) + } + + /// Wraps the provided token with the proper delimiters and with the specified prefix, preparing it for + /// placement in the intermediate ICU rule. + /// + /// - Parameters: + /// - tokenPrefix: The prefix of the token. + /// - token: The actual token to be wrapped. + /// - Returns: The final wrapped token. + fileprivate class func buildToken(with tokenPrefix: String, + token: String) -> String { + return "%\(tokenPrefix)#\(CDS_XML_TOKEN_DELIMITER)\(token)\(CDS_XML_TOKEN_DELIMITER)" + } +} diff --git a/Sources/Transifex/RenderingStrategy.swift b/Sources/Transifex/RenderingStrategy.swift index 3e0fd57..61a766c 100644 --- a/Sources/Transifex/RenderingStrategy.swift +++ b/Sources/Transifex/RenderingStrategy.swift @@ -49,13 +49,13 @@ class PlatformFormat : RenderingStrategyFormatter { /// This way, this method finds the rule without implementing the complex CLDR /// business logic from scratch. static func extractPluralizationRule(locale: Locale, - arguments: [CVarArg]) -> PluralizationRule { + argument: CVarArg) -> PluralizationRule { let key = NSLocalizedString("Transifex.StringsDict.TestKey.%d", bundle: Bundle.module, comment: "") let pluralizationRule = String(format: key, locale: locale, - arguments: arguments) + arguments: [argument]) switch pluralizationRule { case "zero": return .zero @@ -81,40 +81,134 @@ class PlatformFormat : RenderingStrategyFormatter { // Check if the provided parameters contain an argument array // and it can be converted to a [CVarArg] array. guard let args = params[Swizzler.PARAM_ARGUMENTS_KEY] as? [Any], - let cArgs = args as? [CVarArg] else { + var arguments = args as? [CVarArg] else { return stringToRender } - + let locale = Locale(identifier: localeCode) // Extract all plurals based on the ICU Message Format - guard let plurals = stringToRender.extractICUPlurals() else { - return String.init(format: stringToRender, locale: locale, - arguments: cArgs) + let plurals = stringToRender.extractICUPlurals() + + // No plural rules were found in the original string, fallback to the + // typical string format. + guard plurals.count > 0 else { + return String.init(format: stringToRender, + locale: locale, + arguments: arguments) } - + + // Find and extract the tokens, if any. + let tokens = PluralUtils.extractTokens(from: stringToRender) + + // If no tokens were found, then expect a single plural rule. + // If there are more than 1 plural rules, pick and process the first one. + if tokens.count == 0 { + guard let icuPluralResult = plurals.first?.value, + let arg = arguments.first, + let resultingString = Self.process(icuPluralResult: icuPluralResult, + locale: locale, + arg: arg) else { + return String.init(format: stringToRender, locale: locale, + arguments: arguments) + } + + return resultingString + } + + // In this case there are multiple tokens and potentially other format + // specifiers, like so: + // "Device contains %1$#@ICU1@ and %2$#@ICU2@ in %3$ld folders" + // + // Ref: https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/Strings/Articles/formatSpecifiers.html + var format = stringToRender + + // Enumerate the extracted tokens + tokens.forEach { processedTokenResult in + // Cleaned tokens should be: "ICU1", "ICU2", ... + let cleanedToken = processedTokenResult.2 + + // If the ICU plural result cannot be found, skip this token. + guard let icuPluralResult = plurals[cleanedToken] else { + return + } + + // Token prefix should be: "1$", "2$", ... + let tokenPrefix = processedTokenResult.1 + + // Extract the positional specifier from the tokenPrefix + guard let positionalSpecifier = Int(String(tokenPrefix.dropLast())) else { + return + } + + // Subtract 1, as positional specifiers always start from 1. + let index = positionalSpecifier - 1 + + // Expect to find the index in the `arguments` array. + guard index >= 0, + index < arguments.count else { + return + } + + // Process the ICU rule with the correct argument, generating the + // final string for that rule. + guard let resultingString = Self.process(icuPluralResult: icuPluralResult, + locale: locale, + arg: arguments[index]) else { + return + } + + // Replace the specifier for that ICU rule transforming it from + // `%1$#@ICU1@` to `%1$@`, respecting the positional specifier and + // adding an Objective-C object format specifier, so that the whole + // token will be replaced by the resultingString in the end. + format = format.replacingOccurrences(of: "#@\(cleanedToken)", + with: "") + + // Replace the original argument for that position with the + // resultingString, so that it will be used instead of the number in + // the final string. + arguments[index] = resultingString + } + + return String.init(format: format, + locale: locale, + arguments: arguments) + } + + /// Given an ICUPluralResult, the locale and the argument, construct the final string for that ICU rule. + /// + /// - Parameters: + /// - icuPluralResult: The ICUPluralResult structure. + /// - locale: The current locale. + /// - arg: The argument to be passed, in order to locate the proper plural rule. + /// - Returns: The final string for that ICU rule. + private static func process(icuPluralResult: ICUPluralResult, + locale: Locale, + arg: CVarArg) -> String? { // Detect which rule to use let rule = extractPluralizationRule(locale: locale, - arguments: cArgs) + argument: arg) + + var chosenFormat: String? - var chosenFormat : String? - // Use the proper format based on the extracted rule - if let formatRule = plurals[rule] { + if let formatRule = icuPluralResult.extractedPlurals[rule] { chosenFormat = formatRule } // Otherwise fallback to the "other" rule else { - chosenFormat = plurals[.other] + chosenFormat = icuPluralResult.extractedPlurals[.other] } - + + // If the chosen format cannot be found, bail. guard let format = chosenFormat else { - return String.init(format: stringToRender, locale: locale, - arguments: cArgs) + return nil } - - return String.init(format: format, locale: locale, - arguments: cArgs) + + return String.init(format: format, + locale: locale, + arguments: [arg]) } } diff --git a/Tests/TransifexTests/TransifexTests.swift b/Tests/TransifexTests/TransifexTests.swift index 769d139..38e97b3 100644 --- a/Tests/TransifexTests/TransifexTests.swift +++ b/Tests/TransifexTests/TransifexTests.swift @@ -140,11 +140,13 @@ final class TransifexTests: XCTestCase { comment: "Test comment", characterLimit: 10, tags: ["test"]) - - let jsonData = try! JSONEncoder().encode(sourceStringMeta) + + let encoder = JSONEncoder() + encoder.outputFormatting = .sortedKeys + let jsonData = try! encoder.encode(sourceStringMeta) let jsonString = String(data: jsonData, encoding: .utf8) - let expectedJsonString = "{\"character_limit\":10,\"tags\":[\"test\"],\"developer_comment\":\"Test comment\",\"context\":[\"test\"]}" + let expectedJsonString = "{\"character_limit\":10,\"context\":[\"test\"],\"developer_comment\":\"Test comment\",\"tags\":[\"test\"]}" XCTAssertEqual(jsonString, expectedJsonString) } @@ -170,31 +172,104 @@ final class TransifexTests: XCTestCase { key:"testkey", meta: sourceStringMeta) - let jsonData = try! JSONEncoder().encode(sourceString) + let encoder = JSONEncoder() + encoder.outputFormatting = .sortedKeys + let jsonData = try! encoder.encode(sourceString) let jsonString = String(data: jsonData, encoding: .utf8) - let expectedJsonString = "{\"string\":\"test string\",\"meta\":{\"character_limit\":10,\"tags\":[\"test\"],\"developer_comment\":\"Test comment\",\"context\":[\"test\"]}}" + let expectedJsonString = "{\"meta\":{\"character_limit\":10,\"context\":[\"test\"],\"developer_comment\":\"Test comment\",\"tags\":[\"test\"]},\"string\":\"test string\"}" XCTAssertEqual(jsonString, expectedJsonString) } + func testExtractMultipleICUPlurals() { + XCTAssertEqual( + "There {term1, plural, one {is %d person} other {are %d people}} sitting in {term2, plural, one {%d table} two {a couple of tables} other {%d tables}} in this restaurant".extractICUPlurals(), + [ + "{term1, plural, one {is %d person} other {are %d people}}" : ICUPluralResult( + extractedPlurals: [ + .one: "is %d person", + .other: "are %d people" + ]), + "{term2, plural, one {%d table} two {a couple of tables} other {%d tables}}" : ICUPluralResult( + extractedPlurals: [ + .one: "%d table", + .two: "a couple of tables", + .other: "%d tables" + ]) + ] + ) + } + func testExtractICUPlurals() { XCTAssertEqual( "{???, plural, one {One table} two {A couple of tables} other {%d tables}}".extractICUPlurals(), [ - PluralizationRule.one: "One table", - PluralizationRule.two: "A couple of tables", - PluralizationRule.other: "%d tables" + "{???, plural, one {One table} two {A couple of tables} other {%d tables}}" : ICUPluralResult( + extractedPlurals: [ + .one: "One table", + .two: "A couple of tables", + .other: "%d tables" + ]) ] ) XCTAssertEqual( "{cnt, plural, other {%d tables}}".extractICUPlurals(), - [PluralizationRule.other: "%d tables"] + [ + "{cnt, plural, other {%d tables}}" : ICUPluralResult( + extractedPlurals: [ + .other: "%d tables" + ]) + ] ) XCTAssertEqual("{cnt, plural, }".extractICUPlurals(), [:]) - XCTAssertEqual("{something}".extractICUPlurals(), nil) + XCTAssertEqual("{something}".extractICUPlurals(), [:]) } - + + func testPlatformFormatMultiple() { + // As per documentation [^1]: + // + // > The meaning of the plural categories is language-dependent, and + // > not all languages have the same categories. + // > For example, the English language only requires the one and other + // > categories to represent plural forms, and zero is optional. + // > Arabic has different plural forms for the zero, one, two, few, + // > many, and other categories. + // > Although Russian also uses the many category, the rules for which + // > numbers are in the many category aren’t the same as the Arabic + // > rules. + // + // [^1]: https://developer.apple.com/documentation/xcode/localizing-strings-that-contain-plurals#Localize-the-strings-dictionary-file-in-the-development-language + + XCTAssertEqual(try PlatformFormat.format(stringToRender: "There %1$#@{term1, plural, one {is %d person} other {are %d people}}@ sitting in %2$#@{term2, plural, one {%d table} two {a couple of tables} other {%d tables}}@ in this restaurant.", + localeCode: "en", + params: [Swizzler.PARAM_ARGUMENTS_KEY: [3,5]]), + "There are 3 people sitting in 5 tables in this restaurant.") + + XCTAssertEqual(try PlatformFormat.format(stringToRender: "There %1$#@{term1, plural, zero {is noone} one {is %d person} other {are %d people}}@ sitting in %2$#@{term2, plural, zero {any tables} one {%d table} other {%d tables}}@ in this restaurant", + localeCode: "en", + params: [Swizzler.PARAM_ARGUMENTS_KEY: [0,0]]), + "There is noone sitting in any tables in this restaurant") + + XCTAssertEqual(try PlatformFormat.format(stringToRender: "There %1$#@{term1, plural, zero {is noone} one {is %d person} other {are %d people}}@ sitting in %2$#@{term2, plural, one {%d table} other {%d tables}}@ in this restaurant", + localeCode: "en", + params: [Swizzler.PARAM_ARGUMENTS_KEY: [0,2]]), + "There is noone sitting in 2 tables in this restaurant") + + // Two rule works in Arabic locale, not in English + XCTAssertEqual(try PlatformFormat.format(stringToRender: "There %1$#@{term1, plural, zero {is noone} one {is %d person} other {are %d people}}@ sitting in %2$#@{term2, plural, one {%d table} two {a couple of tables} other {%d tables}}@ in this restaurant", + localeCode: "ar", + params: [Swizzler.PARAM_ARGUMENTS_KEY: [0,2]]), + "There is noone sitting in a couple of tables in this restaurant") + } + + func testPlatformFormat() { + XCTAssertEqual(try PlatformFormat.format(stringToRender: "{cnt, plural, one {One table} other {%d tables}}", + localeCode: "en", + params: [Swizzler.PARAM_ARGUMENTS_KEY: [1]]), + "One table") + } + func testTXNativeFetchTranslationsWithStatus() { let mockResponse1 = MockResponse(url: URL(string: "https://cds.svc.transifex.net/content/en?filter%5Bstatus%5D=translated")!, data: "{\"data\":{\"testkey1\":{\"string\":\"test string 1\"},\"testkey2\":{\"string\":\"test string 2\"}}}".data(using: .utf8)) @@ -872,14 +947,167 @@ final class TransifexTests: XCTestCase { XCTAssertEqual(pluralsResultOther, translatedStringPluralOther) } + + func testXMLPluralParserDeviceVariation() { + let parseResult = XMLPluralParser.extract(pluralString: """ +This is Apple VisionThis is an Apple WatchThis is an iPhoneThis is a MacThis is a device +""", deviceName: "mac") + XCTAssertEqual(parseResult, "This is a Mac") + } + + func testXMLPluralParserDeviceVariationiPadFallbackiPhone() { + let parseResult = XMLPluralParser.extract(pluralString: """ +This is Apple VisionThis is an Apple WatchThis is an iPhoneThis is a MacThis is a device +""", deviceName: "ipad") + XCTAssertEqual(parseResult, "This is an iPhone") + } + + func testXMLPluralParserDeviceVariationiPadFallbackOther() { + let parseResult = XMLPluralParser.extract(pluralString: """ +This is Apple VisionThis is an Apple WatchThis is an iPhoneThis is a MacThis is a device +""", deviceName: "ipad") + XCTAssertEqual(parseResult, "This is a device") + } + + func testXMLPluralParserDevicePluralVariation() { + let parseResult = XMLPluralParser.extract(pluralString: """ +iPhone has %d itemiPhone has %d itemsMac has %d itemsWe have %d items +""", + deviceName: "iphone") + XCTAssertEqual(parseResult, "{???, plural, one {iPhone has %d item} other {iPhone has %d items}}") + } + + func testXMLPluralParserSimpleSubstitutions() { + let parseResult = XMLPluralParser.extract(pluralString: """ +Found %1$#@arg1@ having %2$#@arg2@%1$ld user%1$ld users%2$ld device%2$ld devices +""") + XCTAssertEqual(parseResult, "Found %1$#@{arg1, plural, one {%ld user} other {%ld users}}@ having %2$#@{arg2, plural, one {%ld device} other {%ld devices}}@") + + XCTAssertEqual( + parseResult!.extractICUPlurals(), + [ + "{arg1, plural, one {%ld user} other {%ld users}}" : ICUPluralResult( + extractedPlurals: [ + .one: "%ld user", + .other: "%ld users" + ]), + "{arg2, plural, one {%ld device} other {%ld devices}}" : ICUPluralResult( + extractedPlurals: [ + .one: "%ld device", + .other: "%ld devices" + ]) + ] + ) + } + func testXMLPluralParserSimpleSubstitutionsStringsDict() { + let parseResult = XMLPluralParser.extract(pluralString: """ +Found %#@arg1@ having %#@arg2@%ld user%ld users%ld device%ld devices +""") + XCTAssertEqual(parseResult, "Found %1$#@{arg1, plural, one {%ld user} other {%ld users}}@ having %2$#@{arg2, plural, one {%ld device} other {%ld devices}}@") + + XCTAssertEqual( + parseResult!.extractICUPlurals(), + [ + "{arg1, plural, one {%ld user} other {%ld users}}" : ICUPluralResult( + extractedPlurals: [ + .one: "%ld user", + .other: "%ld users" + ]), + "{arg2, plural, one {%ld device} other {%ld devices}}" : ICUPluralResult( + extractedPlurals: [ + .one: "%ld device", + .other: "%ld devices" + ]) + ] + ) + } + + func testXMLPluralParserSimpleSubstitutionsStringsDictAlt() { + let parseResult = XMLPluralParser.extract(pluralString: """ +%#@num_people_in_room@ in %#@room@Only %d personSome peopleNo people%d room%d roomsno room +""") + XCTAssertEqual(parseResult, "%1$#@{num_people_in_room, plural, one {Only %d person} other {Some people} zero {No people}}@ in %2$#@{room, plural, one {%d room} other {%d rooms} zero {no room}}@") + + XCTAssertEqual( + parseResult!.extractICUPlurals(), + [ + "{num_people_in_room, plural, one {Only %d person} other {Some people} zero {No people}}" : ICUPluralResult( + extractedPlurals: [ + .one: "Only %d person", + .other: "Some people", + .zero: "No people" + ]), + "{room, plural, one {%d room} other {%d rooms} zero {no room}}" : ICUPluralResult( + extractedPlurals: [ + .one: "%d room", + .other: "%d rooms", + .zero: "no room" + ]) + ] + ) + } + + func testXMLPluralParserDeviceAndSubstitutions() { + let parseResult = XMLPluralParser.extract(pluralString: """ +This iPhone contains %1$#@user_iphone@ with %2$#@folder_iphone@ This Mac contains %1$#@user_mac@ with %2$#@folder_mac@ %2$ld folder%2$ld folders%2$ld folder%2$ld folders%1$ld user%1$ld users%1$ld user%1$ld users +""", + deviceName: "mac") + XCTAssertEqual(parseResult, "This Mac contains %1$#@{user_mac, plural, one {%ld user} other {%ld users}}@ with %2$#@{folder_mac, plural, one {%ld folder} other {%ld folders}}@ ") + + XCTAssertEqual( + parseResult!.extractICUPlurals(), + [ + "{user_mac, plural, one {%ld user} other {%ld users}}" : ICUPluralResult( + extractedPlurals: [ + .one: "%ld user", + .other: "%ld users" + ]), + "{folder_mac, plural, one {%ld folder} other {%ld folders}}" : ICUPluralResult( + extractedPlurals: [ + .one: "%ld folder", + .other: "%ld folders" + ]) + ] + ) + } + + func testXMLDeviceSubstitutionSpecial() { + let parseResult1 = XMLPluralParser.extract(pluralString: """ +Device has %1$#@arg1_iphone@ in %2$ld foldersDevice has %ld users in %ld folders%1$ld user%1$ld users +""", + deviceName: "mac") + XCTAssertEqual(parseResult1, "Device has %1$ld users in %2$ld folders") + + let parseResult2 = XMLPluralParser.extract(pluralString: """ +Device has %1$#@arg1_iphone@ in %2$ld foldersDevice has %ld users in %ld folders%1$ld user%1$ld users +""", + deviceName: "iphone") + let expectedResult2 = "Device has %1$#@{arg1_iphone, plural, one {%ld user} other {%ld users}}@ in %2$ld folders" + XCTAssertEqual(parseResult2, expectedResult2) + + XCTAssertEqual( + expectedResult2.extractICUPlurals(), + [ + "{arg1_iphone, plural, one {%ld user} other {%ld users}}" : ICUPluralResult( + extractedPlurals: [ + .one: "%ld user", + .other: "%ld users" + ]) + ] + ) + } + static var allTests = [ ("testDuplicateLocaleFiltering", testDuplicateLocaleFiltering), ("testCurrentLocaleProvider", testCurrentLocaleProvider), ("testEncodingSourceStringMeta", testEncodingSourceStringMeta), ("testEncodingSourceString", testEncodingSourceString), ("testEncodingSourceStringWithMeta", testEncodingSourceStringWithMeta), + ("testExtractMultipleICUPlurals", testExtractMultipleICUPlurals), ("testExtractICUPlurals", testExtractICUPlurals), + ("testPlatformFormatMultiple", testPlatformFormatMultiple), + ("testPlatformFormat", testPlatformFormat), ("testTXNativeFetchTranslationsWithStatus", testTXNativeFetchTranslationsWithStatus), ("testTXNativeFetchTranslationsWithTags", testTXNativeFetchTranslationsWithTags), ("testCDSHandlerFetchTranslationsWithStatus", testCDSHandlerFetchTranslationsWithStatus), @@ -898,5 +1126,14 @@ final class TransifexTests: XCTestCase { ("testCurrentLocaleNotAnyPreference", testCurrentLocaleNotAnyPreference), ("testSourceLocalePosition", testSourceLocalePosition), ("testTranslateWithSourceStringsInCache", testTranslateWithSourceStringsInCache), + ("testXMLPluralParserDeviceVariation", testXMLPluralParserDeviceVariation), + ("testXMLPluralParserDeviceVariationiPadFallbackiPhone", testXMLPluralParserDeviceVariationiPadFallbackiPhone), + ("testXMLPluralParserDeviceVariationiPadFallbackOther", testXMLPluralParserDeviceVariationiPadFallbackOther), + ("testXMLPluralParserDevicePluralVariation", testXMLPluralParserDevicePluralVariation), + ("testXMLPluralParserSimpleSubstitutions", testXMLPluralParserSimpleSubstitutions), + ("testXMLPluralParserSimpleSubstitutionsStringsDict", testXMLPluralParserSimpleSubstitutionsStringsDict), + ("testXMLPluralParserSimpleSubstitutionsStringsDictAlt", testXMLPluralParserSimpleSubstitutionsStringsDictAlt), + ("testXMLPluralParserDeviceAndSubstitutions", testXMLPluralParserDeviceAndSubstitutions), + ("testXMLDeviceSubstitutionSpecial", testXMLDeviceSubstitutionSpecial), ] } From 4fd48bcab44c8623cb3fdee19ecec9af4e0e2ac4 Mon Sep 17 00:00:00 2001 From: Stelios Petrakis Date: Wed, 29 May 2024 08:42:03 +0200 Subject: [PATCH 2/2] Bump version to 2.0.2 * Bumps `TXNative.version` to 2.0.2. * Updates CHANGELOG with the changes implemented for 2.0.2. --- CHANGELOG.md | 8 ++++++++ Sources/Transifex/Core.swift | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f7f8c2..e3dc07d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -109,3 +109,11 @@ bundled source locale translations, in case the target translations was not found, was trying to access the file by using the format that Transifex uses (e.g. `en_US`) instead of the one that iOS and Xcode use (e.g. `en-US`). The logic now normalizes the locale name to match the format that iOS accepts. + +## Transifex iOS SDK 2.0.2 + +*May 29, 2024* + +- Adds full support for String Catalogs support. +- Adds support for substitution phrases on old Strings Dictionary file format. +- Updates unit tests. diff --git a/Sources/Transifex/Core.swift b/Sources/Transifex/Core.swift index 34daefe..a044833 100644 --- a/Sources/Transifex/Core.swift +++ b/Sources/Transifex/Core.swift @@ -361,7 +361,7 @@ render '\(stringToRender)' locale code: \(localeCode) params: \(params). Error: /// A static class that is the main point of entry for all the functionality of Transifex Native throughout the SDK. public final class TXNative : NSObject { /// The SDK version - internal static let version = "2.0.1" + internal static let version = "2.0.2" /// The filename of the file that holds the translated strings and it's bundled inside the app. public static let STRINGS_FILENAME = "txstrings.json"