forked from TeamNewPipe/NewPipeExtractor
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add class for fetching rumble video comments
- Loading branch information
Showing
4 changed files
with
391 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
185 changes: 185 additions & 0 deletions
185
...java/org/schabi/newpipe/extractor/services/rumble/extractors/RumbleCommentsExtractor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
package org.schabi.newpipe.extractor.services.rumble.extractors; | ||
|
||
import com.grack.nanojson.JsonObject; | ||
import com.grack.nanojson.JsonParser; | ||
import com.grack.nanojson.JsonParserException; | ||
|
||
import org.jsoup.Jsoup; | ||
import org.jsoup.nodes.Document; | ||
import org.jsoup.nodes.Element; | ||
import org.jsoup.nodes.TextNode; | ||
import org.jsoup.select.Elements; | ||
|
||
import org.schabi.newpipe.extractor.NewPipe; | ||
import org.schabi.newpipe.extractor.Page; | ||
import org.schabi.newpipe.extractor.StreamingService; | ||
import org.schabi.newpipe.extractor.comments.CommentsExtractor; | ||
import org.schabi.newpipe.extractor.comments.CommentsInfoItem; | ||
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector; | ||
import org.schabi.newpipe.extractor.downloader.Downloader; | ||
import org.schabi.newpipe.extractor.downloader.Response; | ||
import org.schabi.newpipe.extractor.exceptions.ExtractionException; | ||
import org.schabi.newpipe.extractor.exceptions.ParsingException; | ||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; | ||
import org.schabi.newpipe.extractor.services.rumble.extractor.RumbleCommentsInfoItemExtractor; | ||
import org.schabi.newpipe.extractor.services.rumble.RumbleParsingHelper; | ||
import org.schabi.newpipe.extractor.utils.JsonUtils; | ||
|
||
import javax.annotation.Nonnull; | ||
import java.io.IOException; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
import static org.schabi.newpipe.extractor.NewPipe.getDownloader; | ||
|
||
public class RumbleCommentsExtractor extends CommentsExtractor { | ||
private final int maxCommentsPerPage = 15; | ||
|
||
private Map<String, String> imageMap; | ||
|
||
private Document doc; | ||
|
||
public RumbleCommentsExtractor( | ||
final StreamingService service, | ||
final ListLinkHandler uiHandler) { | ||
super(service, uiHandler); | ||
} | ||
|
||
public boolean isCommentsDisabled() throws ExtractionException { | ||
return doc == null; | ||
} | ||
|
||
@Nonnull | ||
@Override | ||
public InfoItemsPage<CommentsInfoItem> getInitialPage() | ||
throws IOException, ExtractionException { | ||
Downloader downloader = NewPipe.getDownloader(); | ||
String id = RumbleParsingHelper.getEmbedVideoId(downloader.get(getUrl()).responseBody()); | ||
String url = "https://rumble.com/service.php?video=" + id + "&name=comment.list"; | ||
byte[] responseBody = downloader.get(url).responseBody().getBytes(); | ||
return getPage(new Page("1", responseBody)); | ||
} | ||
|
||
@Override | ||
public InfoItemsPage<CommentsInfoItem> getPage(final Page page) | ||
throws IOException, ExtractionException { | ||
byte[] responseBody = page.getBody(); | ||
loadFromResponseBody(responseBody); | ||
if (isCommentsDisabled()) { | ||
return new InfoItemsPage<>(Collections.emptyList(), null, Collections.emptyList()); | ||
} | ||
int[] ids = stringToIntArray(page.getUrl()); | ||
int startIndex = ids[ids.length - 1] - 1; | ||
int count = startIndex + maxCommentsPerPage + 1; | ||
Element next = null; | ||
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector( | ||
getServiceId()); | ||
for (; startIndex < count; startIndex++) { | ||
ids[ids.length - 1] = startIndex + 1; | ||
next = getComments(ids).first(); | ||
if (next == null || startIndex == count - 1) { | ||
break; | ||
} | ||
collector.commit(new RumbleCommentsInfoItemExtractor(this, ids, responseBody)); | ||
} | ||
return new InfoItemsPage<>(collector, next != null ? | ||
new Page(intArrayToString(ids), responseBody) : null); | ||
} | ||
|
||
@Override | ||
public void onFetchPage(@Nonnull final Downloader downloader) | ||
throws IOException, ExtractionException { | ||
} | ||
|
||
public Elements getComments(int[] id) { | ||
if (doc == null) { | ||
return null; | ||
} | ||
int level = 1; | ||
StringBuilder selection = new StringBuilder(); | ||
for (int i : id) { | ||
if (level != 1) { | ||
selection.append(" > div.comment-replies > "); | ||
} | ||
selection.append("ul.comments-").append(level++).append(" > li.comment-item"); | ||
if (i != 0) { | ||
selection.append(":nth-child(").append(i).append(")"); | ||
} | ||
} | ||
return doc.select(selection.toString()); | ||
} | ||
|
||
public String getImage(Element e) { | ||
Element element = e.selectFirst("i.user-image"); | ||
if (element == null || imageMap == null) { | ||
return null; | ||
} | ||
String attr = element.className(); | ||
String[] classes = attr.split(" "); | ||
for (String name : classes) { | ||
if (name.startsWith("user-image--img--id-") && | ||
imageMap.containsKey(name)) { | ||
return imageMap.get(name); | ||
} | ||
} | ||
return null; | ||
} | ||
|
||
public static String intArrayToString(int[] intArray) { | ||
StringBuilder sb = new StringBuilder(); | ||
for (int i = 0; i < intArray.length; i++) { | ||
sb.append(intArray[i]); | ||
if (i < intArray.length - 1) { | ||
sb.append(" "); | ||
} | ||
} | ||
return sb.toString(); | ||
} | ||
|
||
private static int[] stringToIntArray(String str) { | ||
String[] stringArray = str.split(" "); | ||
int[] intArray = new int[stringArray.length]; | ||
for (int i = 0; i < stringArray.length; i++) { | ||
intArray[i] = Integer.parseInt(stringArray[i]); | ||
} | ||
return intArray; | ||
} | ||
|
||
private void initImageMap(String css) { | ||
Pattern pattern = Pattern.compile("i\\.user-image--img--id-(\\w+)\\s*\\{\\s*background-image:\\s*url\\(([^)]+)\\)"); | ||
Matcher matcher = pattern.matcher(css); | ||
imageMap = new HashMap<>(); | ||
while (matcher.find()) { | ||
String key = "user-image--img--id-" + matcher.group(1); | ||
String value = matcher.group(2); | ||
imageMap.put(key, value); | ||
} | ||
} | ||
|
||
private void loadFromResponseBody(byte[] responseBody) throws ExtractionException { | ||
try { | ||
if (responseBody == null) { | ||
return; | ||
} | ||
JsonObject info = JsonParser.object().from(new String(responseBody)); | ||
if (info.has("html") && info.has("css_libs")) { | ||
doc = Jsoup.parse(info.get("html").toString()); | ||
if (doc.selectFirst("ul.comments-1") == null) { | ||
doc = null; | ||
return; | ||
} | ||
Elements createComment = doc.select("li.comment-item.comment-item.comments-create"); | ||
if (createComment != null) { | ||
createComment.remove(); | ||
} | ||
initImageMap(info.get("css_libs").toString()); | ||
} | ||
} catch (final JsonParserException e) { | ||
e.printStackTrace(); | ||
throw new ExtractionException("Could not read json from: " + getUrl()); | ||
} | ||
} | ||
} |
149 changes: 149 additions & 0 deletions
149
.../schabi/newpipe/extractor/services/rumble/extractors/RumbleCommentsInfoItemExtractor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
package org.schabi.newpipe.extractor.services.rumble.extractor; | ||
|
||
import org.jsoup.nodes.Element; | ||
|
||
import org.schabi.newpipe.extractor.Image; | ||
import org.schabi.newpipe.extractor.Page; | ||
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; | ||
import org.schabi.newpipe.extractor.exceptions.ParsingException; | ||
import org.schabi.newpipe.extractor.localization.DateWrapper; | ||
import org.schabi.newpipe.extractor.services.rumble.extractors.RumbleCommentsExtractor; | ||
import org.schabi.newpipe.extractor.stream.Description; | ||
|
||
import java.time.ZonedDateTime; | ||
import java.time.format.DateTimeFormatter; | ||
import java.util.List; | ||
import java.util.Locale; | ||
|
||
import javax.annotation.Nonnull; | ||
import javax.annotation.Nullable; | ||
|
||
import static org.schabi.newpipe.extractor.services.rumble.extractors.RumbleCommentsExtractor.intArrayToString; | ||
|
||
public class RumbleCommentsInfoItemExtractor implements CommentsInfoItemExtractor { | ||
private final RumbleCommentsExtractor extractor; | ||
private final int[] id; | ||
private byte[] responseBody; | ||
private Element element; | ||
|
||
public RumbleCommentsInfoItemExtractor(final RumbleCommentsExtractor extractor, | ||
final int[] id, final byte[] responseBody) { | ||
this.extractor = extractor; | ||
this.id = id; | ||
this.responseBody = responseBody; | ||
this.element = extractor.getComments(id).first(); | ||
} | ||
|
||
@Override | ||
public int getLikeCount() throws ParsingException { | ||
return Integer.parseInt(element.selectFirst("div.rumbles-vote span.rumbles-count").text()); | ||
} | ||
|
||
@Override | ||
public String getTextualLikeCount() throws ParsingException { | ||
return element.selectFirst("div.rumbles-vote span.rumbles-count").text() + " " + | ||
element.selectFirst("div.rumbles-vote span.rumbles-unit").text(); | ||
} | ||
|
||
@Override | ||
public Description getCommentText() { | ||
return new Description(element.selectFirst("p.comment-text").wholeText(), | ||
Description.PLAIN_TEXT); | ||
} | ||
|
||
@Override | ||
public String getTextualUploadDate() { | ||
return element.selectFirst("a.comments-meta-post-time").attr("title"); | ||
} | ||
|
||
@Nullable | ||
@Override | ||
public DateWrapper getUploadDate() throws ParsingException { | ||
final var formatter = DateTimeFormatter.ofPattern("EEEE, MMMM d, yyyy h:mm a x", Locale.ENGLISH); | ||
final var datetime = ZonedDateTime.parse(getTextualUploadDate(), formatter); | ||
return new DateWrapper(datetime.toOffsetDateTime(), false); | ||
} | ||
|
||
@Override | ||
public String getCommentId() { | ||
return element.attr("data-comment-id"); | ||
} | ||
|
||
@Override | ||
public String getUploaderUrl() { | ||
return "https://rumble.com" + element.selectFirst("a.comments-meta-author").attr("href"); | ||
} | ||
|
||
@Override | ||
public String getUploaderName() { | ||
return element.selectFirst("a.comments-meta-author").text(); | ||
} | ||
|
||
@Nonnull | ||
@Override | ||
public List<Image> getUploaderAvatars() throws ParsingException { | ||
String image = extractor.getImage(element); | ||
if (image == null) { | ||
return List.of(); | ||
} | ||
return List.of(new Image(image, | ||
Image.HEIGHT_UNKNOWN, Image.WIDTH_UNKNOWN, Image.ResolutionLevel.UNKNOWN)); | ||
} | ||
|
||
public boolean isPinned() throws ParsingException { | ||
return element.selectFirst("> div.comments-meta > span.pinned-text") != null; | ||
} | ||
|
||
public boolean isUploaderVerified() throws ParsingException { | ||
return element.selectFirst("> div.comments-meta > div.comments-meta-user-badges > img[alt='Verified']") != null; | ||
} | ||
|
||
private int[] getReplyId() { | ||
int[] replyId = new int[id.length + 1]; | ||
System.arraycopy(id, 0, replyId, 0, id.length); | ||
return replyId; | ||
} | ||
|
||
public int getReplyCount() throws ParsingException { | ||
int[] replyId = getReplyId(); | ||
replyId[id.length] = 0; | ||
return extractor.getComments(replyId).size(); | ||
} | ||
|
||
@Nullable | ||
public Page getReplies() throws ParsingException { | ||
int[] replyId = getReplyId(); | ||
replyId[id.length] = 0; | ||
if (extractor.getComments(replyId).size() == 0) { | ||
return null; | ||
} | ||
replyId[id.length] = 1; | ||
return new Page(intArrayToString(replyId), responseBody); | ||
} | ||
|
||
public boolean isChannelOwner() throws ParsingException { | ||
return element.selectFirst("> div.comments-meta > a.comments-meta-author-video-owner") != null; | ||
} | ||
|
||
@Override | ||
public String getName() throws ParsingException { | ||
return getUploaderName(); | ||
} | ||
|
||
@Override | ||
public String getUrl() { | ||
try { | ||
return extractor.getUrl(); | ||
} | ||
catch (ParsingException e) { | ||
return null; | ||
} | ||
} | ||
|
||
@Nonnull | ||
@Override | ||
public List<Image> getThumbnails() throws ParsingException { | ||
return getUploaderAvatars(); | ||
} | ||
|
||
} |
Oops, something went wrong.