diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/rumble/RumbleService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/rumble/RumbleService.java index ba9ac620eb..669a3d4bca 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/rumble/RumbleService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/rumble/RumbleService.java @@ -17,11 +17,13 @@ import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.services.rumble.extractors.RumbleChannelExtractor; import org.schabi.newpipe.extractor.services.rumble.extractors.RumbleChannelTabExtractor; +import org.schabi.newpipe.extractor.services.rumble.extractors.RumbleCommentsExtractor; import org.schabi.newpipe.extractor.services.rumble.extractors.RumbleSearchExtractor; import org.schabi.newpipe.extractor.services.rumble.extractors.RumbleStreamExtractor; import org.schabi.newpipe.extractor.services.rumble.extractors.RumbleTrendingExtractor; import org.schabi.newpipe.extractor.services.rumble.linkHandler.RumbleChannelLinkHandlerFactory; import org.schabi.newpipe.extractor.services.rumble.linkHandler.RumbleChannelTabLinkHandlerFactory; +import org.schabi.newpipe.extractor.services.rumble.linkHandler.RumbleCommentsLinkHandlerFactory; import org.schabi.newpipe.extractor.services.rumble.linkHandler.RumbleSearchQueryHandlerFactory; import org.schabi.newpipe.extractor.services.rumble.linkHandler.RumbleStreamLinkHandlerFactory; import org.schabi.newpipe.extractor.services.rumble.linkHandler.RumbleTrendingLinkHandlerFactory; @@ -36,7 +38,8 @@ public class RumbleService extends StreamingService { public RumbleService(final int id) { super(id, "Rumble", asList(ServiceInfo.MediaCapability.VIDEO, - ServiceInfo.MediaCapability.AUDIO, ServiceInfo.MediaCapability.LIVE)); + ServiceInfo.MediaCapability.AUDIO, ServiceInfo.MediaCapability.COMMENTS, + ServiceInfo.MediaCapability.LIVE)); } @Override @@ -138,12 +141,12 @@ public SubscriptionExtractor getSubscriptionExtractor() { @Override public ListLinkHandlerFactory getCommentsLHFactory() { - return null; + return RumbleCommentsLinkHandlerFactory.getInstance(); } @Override public CommentsExtractor getCommentsExtractor(final ListLinkHandler urlIdHandler) throws ExtractionException { - return null; + return new RumbleCommentsExtractor(this, urlIdHandler); } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/rumble/extractors/RumbleCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/rumble/extractors/RumbleCommentsExtractor.java new file mode 100644 index 0000000000..3467bc0930 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/rumble/extractors/RumbleCommentsExtractor.java @@ -0,0 +1,185 @@ +package org.schabi.newpipe.extractor.services.rumble.extractors; + +import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonParser; +import com.grack.nanojson.JsonParserException; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.TextNode; +import org.jsoup.select.Elements; + +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.Page; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.comments.CommentsExtractor; +import org.schabi.newpipe.extractor.comments.CommentsInfoItem; +import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector; +import org.schabi.newpipe.extractor.downloader.Downloader; +import org.schabi.newpipe.extractor.downloader.Response; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.services.rumble.extractor.RumbleCommentsInfoItemExtractor; +import org.schabi.newpipe.extractor.services.rumble.RumbleParsingHelper; +import org.schabi.newpipe.extractor.utils.JsonUtils; + +import javax.annotation.Nonnull; +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static org.schabi.newpipe.extractor.NewPipe.getDownloader; + +public class RumbleCommentsExtractor extends CommentsExtractor { + private final int maxCommentsPerPage = 15; + + private Map imageMap; + + private Document doc; + + public RumbleCommentsExtractor( + final StreamingService service, + final ListLinkHandler uiHandler) { + super(service, uiHandler); + } + + public boolean isCommentsDisabled() throws ExtractionException { + return doc == null; + } + + @Nonnull + @Override + public InfoItemsPage getInitialPage() + throws IOException, ExtractionException { + Downloader downloader = NewPipe.getDownloader(); + String id = RumbleParsingHelper.getEmbedVideoId(downloader.get(getUrl()).responseBody()); + String url = "https://rumble.com/service.php?video=" + id + "&name=comment.list"; + byte[] responseBody = downloader.get(url).responseBody().getBytes(); + return getPage(new Page("1", responseBody)); + } + + @Override + public InfoItemsPage getPage(final Page page) + throws IOException, ExtractionException { + byte[] responseBody = page.getBody(); + loadFromResponseBody(responseBody); + if (isCommentsDisabled()) { + return new InfoItemsPage<>(Collections.emptyList(), null, Collections.emptyList()); + } + int[] ids = stringToIntArray(page.getUrl()); + int startIndex = ids[ids.length - 1] - 1; + int count = startIndex + maxCommentsPerPage + 1; + Element next = null; + final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector( + getServiceId()); + for (; startIndex < count; startIndex++) { + ids[ids.length - 1] = startIndex + 1; + next = getComments(ids).first(); + if (next == null || startIndex == count - 1) { + break; + } + collector.commit(new RumbleCommentsInfoItemExtractor(this, ids, responseBody)); + } + return new InfoItemsPage<>(collector, next != null ? + new Page(intArrayToString(ids), responseBody) : null); + } + + @Override + public void onFetchPage(@Nonnull final Downloader downloader) + throws IOException, ExtractionException { + } + + public Elements getComments(int[] id) { + if (doc == null) { + return null; + } + int level = 1; + StringBuilder selection = new StringBuilder(); + for (int i : id) { + if (level != 1) { + selection.append(" > div.comment-replies > "); + } + selection.append("ul.comments-").append(level++).append(" > li.comment-item"); + if (i != 0) { + selection.append(":nth-child(").append(i).append(")"); + } + } + return doc.select(selection.toString()); + } + + public String getImage(Element e) { + Element element = e.selectFirst("i.user-image"); + if (element == null || imageMap == null) { + return null; + } + String attr = element.className(); + String[] classes = attr.split(" "); + for (String name : classes) { + if (name.startsWith("user-image--img--id-") && + imageMap.containsKey(name)) { + return imageMap.get(name); + } + } + return null; + } + + public static String intArrayToString(int[] intArray) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < intArray.length; i++) { + sb.append(intArray[i]); + if (i < intArray.length - 1) { + sb.append(" "); + } + } + return sb.toString(); + } + + private static int[] stringToIntArray(String str) { + String[] stringArray = str.split(" "); + int[] intArray = new int[stringArray.length]; + for (int i = 0; i < stringArray.length; i++) { + intArray[i] = Integer.parseInt(stringArray[i]); + } + return intArray; + } + + private void initImageMap(String css) { + Pattern pattern = Pattern.compile("i\\.user-image--img--id-(\\w+)\\s*\\{\\s*background-image:\\s*url\\(([^)]+)\\)"); + Matcher matcher = pattern.matcher(css); + imageMap = new HashMap<>(); + while (matcher.find()) { + String key = "user-image--img--id-" + matcher.group(1); + String value = matcher.group(2); + imageMap.put(key, value); + } + } + + private void loadFromResponseBody(byte[] responseBody) throws ExtractionException { + try { + if (responseBody == null) { + return; + } + JsonObject info = JsonParser.object().from(new String(responseBody)); + if (info.has("html") && info.has("css_libs")) { + doc = Jsoup.parse(info.get("html").toString()); + if (doc.selectFirst("ul.comments-1") == null) { + doc = null; + return; + } + Elements createComment = doc.select("li.comment-item.comment-item.comments-create"); + if (createComment != null) { + createComment.remove(); + } + initImageMap(info.get("css_libs").toString()); + } + } catch (final JsonParserException e) { + e.printStackTrace(); + throw new ExtractionException("Could not read json from: " + getUrl()); + } + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/rumble/extractors/RumbleCommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/rumble/extractors/RumbleCommentsInfoItemExtractor.java new file mode 100644 index 0000000000..a92a4bd20b --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/rumble/extractors/RumbleCommentsInfoItemExtractor.java @@ -0,0 +1,149 @@ +package org.schabi.newpipe.extractor.services.rumble.extractor; + +import org.jsoup.nodes.Element; + +import org.schabi.newpipe.extractor.Image; +import org.schabi.newpipe.extractor.Page; +import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.localization.DateWrapper; +import org.schabi.newpipe.extractor.services.rumble.extractors.RumbleCommentsExtractor; +import org.schabi.newpipe.extractor.stream.Description; + +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; +import java.util.List; +import java.util.Locale; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import static org.schabi.newpipe.extractor.services.rumble.extractors.RumbleCommentsExtractor.intArrayToString; + +public class RumbleCommentsInfoItemExtractor implements CommentsInfoItemExtractor { + private final RumbleCommentsExtractor extractor; + private final int[] id; + private byte[] responseBody; + private Element element; + + public RumbleCommentsInfoItemExtractor(final RumbleCommentsExtractor extractor, + final int[] id, final byte[] responseBody) { + this.extractor = extractor; + this.id = id; + this.responseBody = responseBody; + this.element = extractor.getComments(id).first(); + } + + @Override + public int getLikeCount() throws ParsingException { + return Integer.parseInt(element.selectFirst("div.rumbles-vote span.rumbles-count").text()); + } + + @Override + public String getTextualLikeCount() throws ParsingException { + return element.selectFirst("div.rumbles-vote span.rumbles-count").text() + " " + + element.selectFirst("div.rumbles-vote span.rumbles-unit").text(); + } + + @Override + public Description getCommentText() { + return new Description(element.selectFirst("p.comment-text").wholeText(), + Description.PLAIN_TEXT); + } + + @Override + public String getTextualUploadDate() { + return element.selectFirst("a.comments-meta-post-time").attr("title"); + } + + @Nullable + @Override + public DateWrapper getUploadDate() throws ParsingException { + final var formatter = DateTimeFormatter.ofPattern("EEEE, MMMM d, yyyy h:mm a x", Locale.ENGLISH); + final var datetime = ZonedDateTime.parse(getTextualUploadDate(), formatter); + return new DateWrapper(datetime.toOffsetDateTime(), false); + } + + @Override + public String getCommentId() { + return element.attr("data-comment-id"); + } + + @Override + public String getUploaderUrl() { + return "https://rumble.com" + element.selectFirst("a.comments-meta-author").attr("href"); + } + + @Override + public String getUploaderName() { + return element.selectFirst("a.comments-meta-author").text(); + } + + @Nonnull + @Override + public List getUploaderAvatars() throws ParsingException { + String image = extractor.getImage(element); + if (image == null) { + return List.of(); + } + return List.of(new Image(image, + Image.HEIGHT_UNKNOWN, Image.WIDTH_UNKNOWN, Image.ResolutionLevel.UNKNOWN)); + } + + public boolean isPinned() throws ParsingException { + return element.selectFirst("> div.comments-meta > span.pinned-text") != null; + } + + public boolean isUploaderVerified() throws ParsingException { + return element.selectFirst("> div.comments-meta > div.comments-meta-user-badges > img[alt='Verified']") != null; + } + + private int[] getReplyId() { + int[] replyId = new int[id.length + 1]; + System.arraycopy(id, 0, replyId, 0, id.length); + return replyId; + } + + public int getReplyCount() throws ParsingException { + int[] replyId = getReplyId(); + replyId[id.length] = 0; + return extractor.getComments(replyId).size(); + } + + @Nullable + public Page getReplies() throws ParsingException { + int[] replyId = getReplyId(); + replyId[id.length] = 0; + if (extractor.getComments(replyId).size() == 0) { + return null; + } + replyId[id.length] = 1; + return new Page(intArrayToString(replyId), responseBody); + } + + public boolean isChannelOwner() throws ParsingException { + return element.selectFirst("> div.comments-meta > a.comments-meta-author-video-owner") != null; + } + + @Override + public String getName() throws ParsingException { + return getUploaderName(); + } + + @Override + public String getUrl() { + try { + return extractor.getUrl(); + } + catch (ParsingException e) { + return null; + } + } + + @Nonnull + @Override + public List getThumbnails() throws ParsingException { + return getUploaderAvatars(); + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/rumble/linkHandler/RumbleCommentsLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/rumble/linkHandler/RumbleCommentsLinkHandlerFactory.java new file mode 100644 index 0000000000..74ecc45410 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/rumble/linkHandler/RumbleCommentsLinkHandlerFactory.java @@ -0,0 +1,51 @@ +package org.schabi.newpipe.extractor.services.rumble.linkHandler; + +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.schabi.newpipe.extractor.search.filter.FilterItem; + +import java.util.List; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public final class RumbleCommentsLinkHandlerFactory extends ListLinkHandlerFactory { + + private static final RumbleCommentsLinkHandlerFactory INSTANCE = + new RumbleCommentsLinkHandlerFactory(); + + private RumbleCommentsLinkHandlerFactory() { + } + + public static RumbleCommentsLinkHandlerFactory getInstance() { + return INSTANCE; + } + + @Override + public String getUrl(final String id, + @Nonnull final List contentFilter, + @Nullable final List sortFilter) throws ParsingException { + return getUrl(id); + } + + @Override + public String getUrl(final String id) throws ParsingException { + return RumbleStreamLinkHandlerFactory.getInstance().getUrl(id); + } + + @Override + public String getId(final String url) throws ParsingException { + // Delegation to avoid duplicate code, as we need the same id + return RumbleStreamLinkHandlerFactory.getInstance().getId(url); + } + + @Override + public boolean onAcceptUrl(final String url) { + try { + getId(url); + return true; + } catch (final ParsingException e) { + return false; + } + } +}