Skip to content

Commit

Permalink
Add class for fetching rumble video comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Benau committed Sep 12, 2024
1 parent 9de2c95 commit 4794f57
Show file tree
Hide file tree
Showing 4 changed files with 391 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
import org.schabi.newpipe.extractor.search.SearchExtractor;
import org.schabi.newpipe.extractor.services.rumble.extractors.RumbleChannelExtractor;
import org.schabi.newpipe.extractor.services.rumble.extractors.RumbleChannelTabExtractor;
import org.schabi.newpipe.extractor.services.rumble.extractors.RumbleCommentsExtractor;
import org.schabi.newpipe.extractor.services.rumble.extractors.RumbleSearchExtractor;
import org.schabi.newpipe.extractor.services.rumble.extractors.RumbleStreamExtractor;
import org.schabi.newpipe.extractor.services.rumble.extractors.RumbleTrendingExtractor;
import org.schabi.newpipe.extractor.services.rumble.linkHandler.RumbleChannelLinkHandlerFactory;
import org.schabi.newpipe.extractor.services.rumble.linkHandler.RumbleChannelTabLinkHandlerFactory;
import org.schabi.newpipe.extractor.services.rumble.linkHandler.RumbleCommentsLinkHandlerFactory;
import org.schabi.newpipe.extractor.services.rumble.linkHandler.RumbleSearchQueryHandlerFactory;
import org.schabi.newpipe.extractor.services.rumble.linkHandler.RumbleStreamLinkHandlerFactory;
import org.schabi.newpipe.extractor.services.rumble.linkHandler.RumbleTrendingLinkHandlerFactory;
Expand All @@ -36,7 +38,8 @@ public class RumbleService extends StreamingService {

public RumbleService(final int id) {
super(id, "Rumble", asList(ServiceInfo.MediaCapability.VIDEO,
ServiceInfo.MediaCapability.AUDIO, ServiceInfo.MediaCapability.LIVE));
ServiceInfo.MediaCapability.AUDIO, ServiceInfo.MediaCapability.COMMENTS,
ServiceInfo.MediaCapability.LIVE));
}

@Override
Expand Down Expand Up @@ -138,12 +141,12 @@ public SubscriptionExtractor getSubscriptionExtractor() {

@Override
public ListLinkHandlerFactory getCommentsLHFactory() {
return null;
return RumbleCommentsLinkHandlerFactory.getInstance();
}

@Override
public CommentsExtractor getCommentsExtractor(final ListLinkHandler urlIdHandler)
throws ExtractionException {
return null;
return new RumbleCommentsExtractor(this, urlIdHandler);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
package org.schabi.newpipe.extractor.services.rumble.extractors;

import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;

import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.services.rumble.extractor.RumbleCommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.services.rumble.RumbleParsingHelper;
import org.schabi.newpipe.extractor.utils.JsonUtils;

import javax.annotation.Nonnull;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static org.schabi.newpipe.extractor.NewPipe.getDownloader;

public class RumbleCommentsExtractor extends CommentsExtractor {
private final int maxCommentsPerPage = 15;

private Map<String, String> imageMap;

private Document doc;

public RumbleCommentsExtractor(
final StreamingService service,
final ListLinkHandler uiHandler) {
super(service, uiHandler);
}

public boolean isCommentsDisabled() throws ExtractionException {
return doc == null;
}

@Nonnull
@Override
public InfoItemsPage<CommentsInfoItem> getInitialPage()
throws IOException, ExtractionException {
Downloader downloader = NewPipe.getDownloader();
String id = RumbleParsingHelper.getEmbedVideoId(downloader.get(getUrl()).responseBody());
String url = "https://rumble.com/service.php?video=" + id + "&name=comment.list";
byte[] responseBody = downloader.get(url).responseBody().getBytes();
return getPage(new Page("1", responseBody));
}

@Override
public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
throws IOException, ExtractionException {
byte[] responseBody = page.getBody();
loadFromResponseBody(responseBody);
if (isCommentsDisabled()) {
return new InfoItemsPage<>(Collections.emptyList(), null, Collections.emptyList());
}
int[] ids = stringToIntArray(page.getUrl());
int startIndex = ids[ids.length - 1] - 1;
int count = startIndex + maxCommentsPerPage + 1;
Element next = null;
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
getServiceId());
for (; startIndex < count; startIndex++) {
ids[ids.length - 1] = startIndex + 1;
next = getComments(ids).first();
if (next == null || startIndex == count - 1) {
break;
}
collector.commit(new RumbleCommentsInfoItemExtractor(this, ids, responseBody));
}
return new InfoItemsPage<>(collector, next != null ?
new Page(intArrayToString(ids), responseBody) : null);
}

@Override
public void onFetchPage(@Nonnull final Downloader downloader)
throws IOException, ExtractionException {
}

public Elements getComments(int[] id) {
if (doc == null) {
return null;
}
int level = 1;
StringBuilder selection = new StringBuilder();
for (int i : id) {
if (level != 1) {
selection.append(" > div.comment-replies > ");
}
selection.append("ul.comments-").append(level++).append(" > li.comment-item");
if (i != 0) {
selection.append(":nth-child(").append(i).append(")");
}
}
return doc.select(selection.toString());
}

public String getImage(Element e) {
Element element = e.selectFirst("i.user-image");
if (element == null || imageMap == null) {
return null;
}
String attr = element.className();
String[] classes = attr.split(" ");
for (String name : classes) {
if (name.startsWith("user-image--img--id-") &&
imageMap.containsKey(name)) {
return imageMap.get(name);
}
}
return null;
}

public static String intArrayToString(int[] intArray) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < intArray.length; i++) {
sb.append(intArray[i]);
if (i < intArray.length - 1) {
sb.append(" ");
}
}
return sb.toString();
}

private static int[] stringToIntArray(String str) {
String[] stringArray = str.split(" ");
int[] intArray = new int[stringArray.length];
for (int i = 0; i < stringArray.length; i++) {
intArray[i] = Integer.parseInt(stringArray[i]);
}
return intArray;
}

private void initImageMap(String css) {
Pattern pattern = Pattern.compile("i\\.user-image--img--id-(\\w+)\\s*\\{\\s*background-image:\\s*url\\(([^)]+)\\)");
Matcher matcher = pattern.matcher(css);
imageMap = new HashMap<>();
while (matcher.find()) {
String key = "user-image--img--id-" + matcher.group(1);
String value = matcher.group(2);
imageMap.put(key, value);
}
}

private void loadFromResponseBody(byte[] responseBody) throws ExtractionException {
try {
if (responseBody == null) {
return;
}
JsonObject info = JsonParser.object().from(new String(responseBody));
if (info.has("html") && info.has("css_libs")) {
doc = Jsoup.parse(info.get("html").toString());
if (doc.selectFirst("ul.comments-1") == null) {
doc = null;
return;
}
Elements createComment = doc.select("li.comment-item.comment-item.comments-create");
if (createComment != null) {
createComment.remove();
}
initImageMap(info.get("css_libs").toString());
}
} catch (final JsonParserException e) {
e.printStackTrace();
throw new ExtractionException("Could not read json from: " + getUrl());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
package org.schabi.newpipe.extractor.services.rumble.extractor;

import org.jsoup.nodes.Element;

import org.schabi.newpipe.extractor.Image;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.services.rumble.extractors.RumbleCommentsExtractor;
import org.schabi.newpipe.extractor.stream.Description;

import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.List;
import java.util.Locale;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import static org.schabi.newpipe.extractor.services.rumble.extractors.RumbleCommentsExtractor.intArrayToString;

public class RumbleCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
private final RumbleCommentsExtractor extractor;
private final int[] id;
private byte[] responseBody;
private Element element;

public RumbleCommentsInfoItemExtractor(final RumbleCommentsExtractor extractor,
final int[] id, final byte[] responseBody) {
this.extractor = extractor;
this.id = id;
this.responseBody = responseBody;
this.element = extractor.getComments(id).first();
}

@Override
public int getLikeCount() throws ParsingException {
return Integer.parseInt(element.selectFirst("div.rumbles-vote span.rumbles-count").text());
}

@Override
public String getTextualLikeCount() throws ParsingException {
return element.selectFirst("div.rumbles-vote span.rumbles-count").text() + " " +
element.selectFirst("div.rumbles-vote span.rumbles-unit").text();
}

@Override
public Description getCommentText() {
return new Description(element.selectFirst("p.comment-text").wholeText(),
Description.PLAIN_TEXT);
}

@Override
public String getTextualUploadDate() {
return element.selectFirst("a.comments-meta-post-time").attr("title");
}

@Nullable
@Override
public DateWrapper getUploadDate() throws ParsingException {
final var formatter = DateTimeFormatter.ofPattern("EEEE, MMMM d, yyyy h:mm a x", Locale.ENGLISH);
final var datetime = ZonedDateTime.parse(getTextualUploadDate(), formatter);
return new DateWrapper(datetime.toOffsetDateTime(), false);
}

@Override
public String getCommentId() {
return element.attr("data-comment-id");
}

@Override
public String getUploaderUrl() {
return "https://rumble.com" + element.selectFirst("a.comments-meta-author").attr("href");
}

@Override
public String getUploaderName() {
return element.selectFirst("a.comments-meta-author").text();
}

@Nonnull
@Override
public List<Image> getUploaderAvatars() throws ParsingException {
String image = extractor.getImage(element);
if (image == null) {
return List.of();
}
return List.of(new Image(image,
Image.HEIGHT_UNKNOWN, Image.WIDTH_UNKNOWN, Image.ResolutionLevel.UNKNOWN));
}

public boolean isPinned() throws ParsingException {
return element.selectFirst("> div.comments-meta > span.pinned-text") != null;
}

public boolean isUploaderVerified() throws ParsingException {
return element.selectFirst("> div.comments-meta > div.comments-meta-user-badges > img[alt='Verified']") != null;
}

private int[] getReplyId() {
int[] replyId = new int[id.length + 1];
System.arraycopy(id, 0, replyId, 0, id.length);
return replyId;
}

public int getReplyCount() throws ParsingException {
int[] replyId = getReplyId();
replyId[id.length] = 0;
return extractor.getComments(replyId).size();
}

@Nullable
public Page getReplies() throws ParsingException {
int[] replyId = getReplyId();
replyId[id.length] = 0;
if (extractor.getComments(replyId).size() == 0) {
return null;
}
replyId[id.length] = 1;
return new Page(intArrayToString(replyId), responseBody);
}

public boolean isChannelOwner() throws ParsingException {
return element.selectFirst("> div.comments-meta > a.comments-meta-author-video-owner") != null;
}

@Override
public String getName() throws ParsingException {
return getUploaderName();
}

@Override
public String getUrl() {
try {
return extractor.getUrl();
}
catch (ParsingException e) {
return null;
}
}

@Nonnull
@Override
public List<Image> getThumbnails() throws ParsingException {
return getUploaderAvatars();
}

}
Loading

0 comments on commit 4794f57

Please sign in to comment.