Skip to content

Commit

Permalink
Fix cookie caching, add fetchHomeTimeline
Browse files Browse the repository at this point in the history
  • Loading branch information
lalalune committed Sep 6, 2024
1 parent e0e0f7c commit 9860a47
Show file tree
Hide file tree
Showing 8 changed files with 158 additions and 7 deletions.
1 change: 0 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
TWITTER_USERNAME=myaccount
TWITTER_PASSWORD=MyPassword!!!
TWITTER_EMAIL=[email protected]
TWITTER_COOKIES= # Check the README for how to set this-- important if you don't want your account to get flagged
PROXY_URL= # HTTP(s) proxy for requests (optional)
1 change: 0 additions & 1 deletion .github/workflows/node.js.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ jobs:
# TWITTER_USERNAME: ${{ secrets.TWITTER_USERNAME }}
# TWITTER_PASSWORD: ${{ secrets.TWITTER_PASSWORD }}
# TWITTER_EMAIL: ${{ secrets.TWITTER_EMAIL }}
# TWITTER_COOKIES: ${{ secrets.TWITTER_COOKIES }}
# run: npm run test
- name: Build
run: npm run build
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,6 @@ dist
docs/

# Yarn lockfile
yarn.lock
yarn.lock

cookies.json
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ Configure environment variables for authentication.
TWITTER_USERNAME= # Account username
TWITTER_PASSWORD= # Account password
TWITTER_EMAIL= # Account email
TWITTER_COOKIES= # JSON-serialized array of cookies of an authenticated session
PROXY_URL= # HTTP(s) proxy for requests (necessary for browsers)
```

Expand Down
14 changes: 14 additions & 0 deletions src/scraper.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
import { Scraper } from './scraper';
import { getScraper } from './test-utils';

test('scraper can fetch home timeline', async () => {
const scraper = await getScraper();

const count = 20;
const seenTweetIds: string[] = [];

const homeTimeline = await scraper.fetchHomeTimeline(count, seenTweetIds);
console.log(homeTimeline);
expect(homeTimeline).toBeDefined();
expect(homeTimeline?.length).toBeGreaterThan(0);
expect(homeTimeline[0]?.rest_id).toBeDefined();
}, 30000);

test('scraper uses response transform when provided', async () => {
const scraper = new Scraper({
Expand Down
14 changes: 14 additions & 0 deletions src/scraper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import {
createCreateTweetRequest,
} from './tweets';
import { parseTimelineTweetsV2, TimelineV2 } from './timeline-v2';
import { fetchHomeTimeline, HomeTimelineResponse } from './timeline-home';

const twUrl = 'https://twitter.com';
const UserTweetsUrl =
Expand Down Expand Up @@ -255,6 +256,19 @@ export class Scraper {
return fetchProfileFollowers(userId, maxProfiles, this.auth, cursor);
}

/**
* Fetches the home timeline for the current user.
* @param count The number of tweets to fetch.
* @param seenTweetIds An array of tweet IDs that have already been seen.
* @returns A promise that resolves to the home timeline response.
*/
public async fetchHomeTimeline(
count: number,
seenTweetIds: string[],
): Promise<any[]> {
return await fetchHomeTimeline(count, seenTweetIds, this.auth);
}

async getUserTweets(
userId: string,
maxTweets = 200,
Expand Down
37 changes: 34 additions & 3 deletions src/test-utils.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import { HttpsProxyAgent } from 'https-proxy-agent';
import { Scraper } from './scraper';
import fs from 'fs';
import { CookieJar } from 'tough-cookie';

export interface ScraperTestOptions {
/**
Expand All @@ -17,11 +19,40 @@ export async function getScraper(
const password = process.env['TWITTER_PASSWORD'];
const email = process.env['TWITTER_EMAIL'];
const twoFactorSecret = process.env['TWITTER_2FA_SECRET'];
const cookies = process.env['TWITTER_COOKIES'];

let cookiesArray: any = null;

// try to read cookies by reading cookies.json with fs and parsing
// check if cookies.json exists
if (!fs.existsSync('./cookies.json')) {
console.error(
'cookies.json not found, using password auth - this is NOT recommended!',
);
} else {
try {
const cookiesText = fs.readFileSync('./cookies.json', 'utf8');
cookiesArray = JSON.parse(cookiesText);
} catch (e) {
console.error('Error parsing cookies.json', e);
}
}

const cookieStrings = cookiesArray?.map(
(cookie: any) =>
`${cookie.key}=${cookie.value}; Domain=${cookie.domain}; Path=${
cookie.path
}; ${cookie.secure ? 'Secure' : ''}; ${
cookie.httpOnly ? 'HttpOnly' : ''
}; SameSite=${cookie.sameSite || 'Lax'}`,
);

const proxyUrl = process.env['PROXY_URL'];
let agent: any;

if (options.authMethod === 'cookies' && !cookies) {
if (
options.authMethod === 'cookies' &&
(!cookieStrings || cookieStrings.length === 0)
) {
console.warn(
'TWITTER_COOKIES variable is not defined, reverting to password auth (not recommended)',
);
Expand Down Expand Up @@ -54,7 +85,7 @@ export async function getScraper(
if (options.authMethod === 'password') {
await scraper.login(username!, password!, email, twoFactorSecret);
} else if (options.authMethod === 'cookies') {
await scraper.setCookies(JSON.parse(cookies!));
await scraper.setCookies(cookieStrings);
}

return scraper;
Expand Down
93 changes: 93 additions & 0 deletions src/timeline-home.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import { requestApi } from './api';
import { TwitterAuth } from './auth';
import { ApiError } from './errors';
import { TimelineInstruction } from './timeline-v2';

export interface HomeTimelineResponse {
data?: {
home: {
home_timeline_urt: {
instructions: TimelineInstruction[];
};
};
};
}

export async function fetchHomeTimeline(
count: number,
seenTweetIds: string[],
auth: TwitterAuth,
): Promise<any[]> {
const variables = {
count,
includePromotedContent: true,
latestControlAvailable: true,
requestContext: 'launch',
withCommunity: true,
seenTweetIds,
};

const features = {
rweb_tipjar_consumption_enabled: true,
responsive_web_graphql_exclude_directive_enabled: true,
verified_phone_label_enabled: false,
creator_subscriptions_tweet_preview_api_enabled: true,
responsive_web_graphql_timeline_navigation_enabled: true,
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
communities_web_enable_tweet_community_results_fetch: true,
c9s_tweet_anatomy_moderator_badge_enabled: true,
articles_preview_enabled: true,
responsive_web_edit_tweet_api_enabled: true,
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
view_counts_everywhere_api_enabled: true,
longform_notetweets_consumption_enabled: true,
responsive_web_twitter_article_tweet_consumption_enabled: true,
tweet_awards_web_tipping_enabled: false,
creator_subscriptions_quote_tweet_preview_enabled: false,
freedom_of_speech_not_reach_fetch_enabled: true,
standardized_nudges_misinfo: true,
tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled:
true,
rweb_video_timestamps_enabled: true,
longform_notetweets_rich_text_read_enabled: true,
longform_notetweets_inline_media_enabled: true,
responsive_web_enhance_cards_enabled: false,
};

const res = await requestApi<HomeTimelineResponse>(
`https://x.com/i/api/graphql/HJFjzBgCs16TqxewQOeLNg/HomeTimeline?variables=${encodeURIComponent(
JSON.stringify(variables),
)}&features=${encodeURIComponent(JSON.stringify(features))}`,
auth,
'GET',
);

if (!res.success) {
if (res.err instanceof ApiError) {
console.error('Error details:', res.err.data);
}
throw res.err;
}

const home = res.value?.data?.home.home_timeline_urt?.instructions;

if (!home) {
return [];
}

const entries: any[] = [];

for (const instruction of home) {
if (instruction.type === 'TimelineAddEntries') {
for (const entry of instruction.entries ?? []) {
entries.push(entry);
}
}
}
// get the itemContnent from each entry
const tweets = entries
.map((entry) => entry.content.itemContent?.tweet_results?.result)
.filter((tweet) => tweet !== undefined);

return tweets;
}

0 comments on commit 9860a47

Please sign in to comment.