Skip to content

Commit

Permalink
Implement swift-bridged html parser in rust
Browse files Browse the repository at this point in the history
  • Loading branch information
tmolitor-stud-tu committed Oct 14, 2024
1 parent 5d2fd80 commit 26081ed
Show file tree
Hide file tree
Showing 12 changed files with 796 additions and 63 deletions.
48 changes: 20 additions & 28 deletions Monal/Classes/MLOgHtmlParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,40 +6,32 @@
// Copyright © 2022 Monal.im. All rights reserved.
//

import SwiftSoup;

@objc class MLOgHtmlParser: NSObject {
var og_title: String?
var og_image_url: URL?

@objc init(html: String, andBaseUrl baseUrl: URL?) {
super.init()
do {
let parsedSite: Document = try SwiftSoup.parse(html)

self.og_title = try parsedSite.select("meta[property=og:title]").first()?.attr("content")
if self.og_title == nil {
self.og_title = try parsedSite.select("html head title").first()?.text()
}
if self.og_title == nil {
DDLogWarn("Could not find any site title")
}

if let image_url = try parsedSite.select("meta[property=og:image]").first()?.attr("content").removingPercentEncoding {
self.og_image_url = self.parseUrl(image_url, baseUrl)
} else if let image_url = try parsedSite.select("html head link[rel=apple-touch-icon]").first()?.attr("href").removingPercentEncoding {
self.og_image_url = self.parseUrl(image_url, baseUrl)
} else if let image_url = try parsedSite.select("html head link[rel=icon]").first()?.attr("href").removingPercentEncoding {
self.og_image_url = self.parseUrl(image_url, baseUrl)
} else if let image_url = try parsedSite.select("html head link[rel=shortcut icon]").first()?.attr("href").removingPercentEncoding {
self.og_image_url = self.parseUrl(image_url, baseUrl)
} else {
DDLogWarn("Could not find any site image")
}
} catch Exception.Error(let type, let message) {
DDLogWarn("Could not parse html og elements: \(message) type: \(type)")
} catch {
DDLogWarn("Could not parse html og elements: unhandled exception")
let parsedSite = HtmlParserBridge(html:html)

self.og_title = try? parsedSite.select("meta[property=og:title]", attribute:"content").first
if self.og_title == nil {
self.og_title = try? parsedSite.select("html head title").first
}
if self.og_title == nil {
DDLogWarn("Could not find any site title")
}

if let image_url = try? parsedSite.select("meta[property=og:image]", attribute:"content").first?.removingPercentEncoding {
self.og_image_url = self.parseUrl(image_url, baseUrl)
} else if let image_url = try? parsedSite.select("html head link[rel=apple-touch-icon]", attribute:"href").first?.removingPercentEncoding {
self.og_image_url = self.parseUrl(image_url, baseUrl)
} else if let image_url = try? parsedSite.select("html head link[rel=icon]", attribute:"href").first?.removingPercentEncoding {
self.og_image_url = self.parseUrl(image_url, baseUrl)
} else if let image_url = try? parsedSite.select("html head link[rel=shortcut icon]", attribute:"href").first?.removingPercentEncoding {
self.og_image_url = self.parseUrl(image_url, baseUrl)
} else {
DDLogWarn("Could not find any site image in html")
}
}

Expand Down
42 changes: 36 additions & 6 deletions Monal/Classes/SwiftHelpers.swift
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,32 @@ public class SwiftHelpers: NSObject {
}
}

//TODO: remove this
extension UIImage {
public func thumbnail(size: CGSize) -> UIImage? {
UIGraphicsBeginImageContextWithOptions(size, false, 0.0)
defer { UIGraphicsEndImageContext() }
draw(in: CGRect(origin: .zero, size: size))
return UIGraphicsGetImageFromCurrentImageContext()
}
}

// **********************************************
// **************** rust bridges ****************
// **********************************************

fileprivate extension RustVec {
func intoArray() -> [T] {
var array: [T] = []
for _ in 0..<self.len() {
array.append(self.pop()!)
}
return array.reversed()
}
}

extension RustString: Error {}

@objcMembers
public class JingleSDPBridge : NSObject {
@objc(getJingleStringForSDPString:withInitiator:)
Expand All @@ -460,11 +486,15 @@ public class JingleSDPBridge : NSObject {
}
}

extension UIImage {
public func thumbnail(size: CGSize) -> UIImage? {
UIGraphicsBeginImageContextWithOptions(size, false, 0.0)
defer { UIGraphicsEndImageContext() }
draw(in: CGRect(origin: .zero, size: size))
return UIGraphicsGetImageFromCurrentImageContext()
@objcMembers
public class HtmlParserBridge : NSObject {
var document: MonalHtmlParser

public init(html: String) {
self.document = MonalHtmlParser(html)
}

public func select(_ selector: String, attribute: String? = nil) throws -> [String] {
return self.document.select(selector, attribute).intoArray().map { $0.toString() }
}
}
13 changes: 6 additions & 7 deletions Monal/Classes/chatViewController.m
Original file line number Diff line number Diff line change
Expand Up @@ -3010,7 +3010,7 @@ -(void) loadPreviewWithUrlForRow:(NSIndexPath *) indexPath withResultHandler:(mo
return;
}
//limit to 512KB of html
if(contentLength.intValue > 65536)
if(contentLength.intValue > 524288)
{
DDLogWarn(@"Now loading preview HTML for %@ with byte range 0-512k...", row.url);
[self downloadPreviewWithRow:indexPath usingByterange:YES andResultHandler:resultHandler];
Expand Down Expand Up @@ -3050,7 +3050,7 @@ -(void) downloadPreviewWithRow:(NSIndexPath*) indexPath usingByterange:(BOOL) us
request.requiresDNSSECValidation = YES;
[request setValue:@"facebookexternalhit/1.1" forHTTPHeaderField:@"User-Agent"]; //required on some sites for og tags e.g. youtube
if(useByterange)
[request setValue:@"bytes=0-65536" forHTTPHeaderField:@"Range"];
[request setValue:@"bytes=0-524288" forHTTPHeaderField:@"Range"];
request.timeoutInterval = 10;
NSURLSession* session = [HelperTools createEphemeralURLSession];
[[session dataTaskWithRequest:request completionHandler:^(NSData* _Nullable data, NSURLResponse* _Nullable response, NSError* _Nullable error) {
Expand All @@ -3062,11 +3062,10 @@ -(void) downloadPreviewWithRow:(NSIndexPath*) indexPath usingByterange:(BOOL) us
MLOgHtmlParser* ogParser = nil;
NSString* text = nil;
NSURL* image = nil;
if([body length] <= 65536)
{
NSURL* baseURL = [NSURL URLWithString:[NSString stringWithFormat:@"%@://%@%@", row.url.scheme, row.url.host, row.url.path]];
ogParser = [[MLOgHtmlParser alloc] initWithHtml:body andBaseUrl:baseURL];
}
if([body length] > 524288)
body = [body substringToIndex:524288];
NSURL* baseURL = [NSURL URLWithString:[NSString stringWithFormat:@"%@://%@%@", row.url.scheme, row.url.host, row.url.path]];
ogParser = [[MLOgHtmlParser alloc] initWithHtml:body andBaseUrl:baseURL];
if(ogParser != nil)
{
text = [ogParser getOgTitle];
Expand Down
21 changes: 2 additions & 19 deletions Monal/Monal.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,6 @@
C1C839DE24F15DF800BBCF17 /* MLOMEMO.m in Sources */ = {isa = PBXBuildFile; fileRef = C1C839DC24F15DF800BBCF17 /* MLOMEMO.m */; };
C1D7D7AF283FB4E500401389 /* Images.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 26B2A4BA1B73061400272E63 /* Images.xcassets */; };
C1D7D7B0283FB4E700401389 /* Media.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 26470F511835C4080069E3E0 /* Media.xcassets */; };
C1E1EC7B286A025F0097EC74 /* SwiftSoup in Frameworks */ = {isa = PBXBuildFile; productRef = C1E1EC7A286A025F0097EC74 /* SwiftSoup */; };
C1E4654824EE517000CA5AAF /* Localizable.strings in Resources */ = {isa = PBXBuildFile; fileRef = C1E4654624EE517000CA5AAF /* Localizable.strings */; };
C1E8A7F72B8E47C300760220 /* EditGroupSubject.swift in Sources */ = {isa = PBXBuildFile; fileRef = C1E8A7F62B8E47C300760220 /* EditGroupSubject.swift */; };
C1F5C7A92775DA000001F295 /* MLContactSoftwareVersionInfo.h in Headers */ = {isa = PBXBuildFile; fileRef = C1F5C7A72775DA000001F295 /* MLContactSoftwareVersionInfo.h */; };
Expand Down Expand Up @@ -788,7 +787,6 @@
files = (
8418B5672C87E0ED006FAF60 /* ExyteChat in Frameworks */,
261E542523A0A1D300394F59 /* monalxmpp.framework in Frameworks */,
C1E1EC7B286A025F0097EC74 /* SwiftSoup in Frameworks */,
84F194D12C15197200F0A994 /* FrameUp in Frameworks */,
C176F1EC2AF11C31002034E5 /* UserNotifications.framework in Frameworks */,
C1F5C7AF2777638B0001F295 /* OrderedCollections in Frameworks */,
Expand Down Expand Up @@ -1547,7 +1545,6 @@
name = Monal;
packageProductDependencies = (
C1F5C7AE2777638B0001F295 /* OrderedCollections */,
C1E1EC7A286A025F0097EC74 /* SwiftSoup */,
841898A92957712000FEC77D /* ViewExtractor */,
84F194D02C15197200F0A994 /* FrameUp */,
8418B5662C87E0ED006FAF60 /* ExyteChat */,
Expand Down Expand Up @@ -1763,7 +1760,6 @@
mainGroup = 29B97314FDCFA39411CA2CEA /* CustomTemplate */;
packageReferences = (
C1F5C7AD2777638B0001F295 /* XCRemoteSwiftPackageReference "swift-collections" */,
C1E1EC79286A025F0097EC74 /* XCRemoteSwiftPackageReference "SwiftSoup" */,
841898A82957712000FEC77D /* XCRemoteSwiftPackageReference "ViewExtractor" */,
849ADF3D2BACF0360009BCD7 /* XCRemoteSwiftPackageReference "cocoalumberjack" */,
84F194CF2C15197200F0A994 /* XCRemoteSwiftPackageReference "FrameUp" */,
Expand Down Expand Up @@ -4688,16 +4684,8 @@
isa = XCRemoteSwiftPackageReference;
repositoryURL = "https://github.com/ryanlintott/FrameUp";
requirement = {
kind = upToNextMajorVersion;
minimumVersion = 0.8.0;
};
};
C1E1EC79286A025F0097EC74 /* XCRemoteSwiftPackageReference "SwiftSoup" */ = {
isa = XCRemoteSwiftPackageReference;
repositoryURL = "https://github.com/scinfu/SwiftSoup.git";
requirement = {
kind = upToNextMajorVersion;
minimumVersion = 2.7.5;
kind = exactVersion;
version = 0.8.0;
};
};
C1F5C7AD2777638B0001F295 /* XCRemoteSwiftPackageReference "swift-collections" */ = {
Expand Down Expand Up @@ -4750,11 +4738,6 @@
package = 84F194CF2C15197200F0A994 /* XCRemoteSwiftPackageReference "FrameUp" */;
productName = FrameUp;
};
C1E1EC7A286A025F0097EC74 /* SwiftSoup */ = {
isa = XCSwiftPackageProductDependency;
package = C1E1EC79286A025F0097EC74 /* XCRemoteSwiftPackageReference "SwiftSoup" */;
productName = SwiftSoup;
};
C1F5C7AE2777638B0001F295 /* OrderedCollections */ = {
isa = XCSwiftPackageProductDependency;
package = C1F5C7AD2777638B0001F295 /* XCRemoteSwiftPackageReference "swift-collections" */;
Expand Down
Loading

0 comments on commit 26081ed

Please sign in to comment.