diff --git a/.env.example b/.env.example index 3f7037b..38d6249 100644 --- a/.env.example +++ b/.env.example @@ -1,5 +1,4 @@ TWITTER_USERNAME=myaccount TWITTER_PASSWORD=MyPassword!!! TWITTER_EMAIL=myemail@gmail.com -TWITTER_COOKIES= # Check the README for how to set this-- important if you don't want your account to get flagged PROXY_URL= # HTTP(s) proxy for requests (optional) \ No newline at end of file diff --git a/.github/workflows/node.js.yml b/.github/workflows/node.js.yml index 463f056..b5a921a 100644 --- a/.github/workflows/node.js.yml +++ b/.github/workflows/node.js.yml @@ -23,7 +23,6 @@ jobs: # TWITTER_USERNAME: ${{ secrets.TWITTER_USERNAME }} # TWITTER_PASSWORD: ${{ secrets.TWITTER_PASSWORD }} # TWITTER_EMAIL: ${{ secrets.TWITTER_EMAIL }} - # TWITTER_COOKIES: ${{ secrets.TWITTER_COOKIES }} # run: npm run test - name: Build run: npm run build diff --git a/.gitignore b/.gitignore index 6f44753..ebae517 100644 --- a/.gitignore +++ b/.gitignore @@ -116,4 +116,6 @@ dist docs/ # Yarn lockfile -yarn.lock \ No newline at end of file +yarn.lock + +cookies.json \ No newline at end of file diff --git a/README.md b/README.md index 76381dc..94dc3e5 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,6 @@ Configure environment variables for authentication. TWITTER_USERNAME= # Account username TWITTER_PASSWORD= # Account password TWITTER_EMAIL= # Account email -TWITTER_COOKIES= # JSON-serialized array of cookies of an authenticated session PROXY_URL= # HTTP(s) proxy for requests (necessary for browsers) ``` diff --git a/src/scraper.test.ts b/src/scraper.test.ts index d37da7b..a7f1bc2 100644 --- a/src/scraper.test.ts +++ b/src/scraper.test.ts @@ -1,4 +1,18 @@ import { Scraper } from './scraper'; +import { getScraper } from './test-utils'; + +test('scraper can fetch home timeline', async () => { + const scraper = await getScraper(); + + const count = 20; + const seenTweetIds: string[] = []; + + const homeTimeline = await scraper.fetchHomeTimeline(count, seenTweetIds); + console.log(homeTimeline); + expect(homeTimeline).toBeDefined(); + expect(homeTimeline?.length).toBeGreaterThan(0); + expect(homeTimeline[0]?.rest_id).toBeDefined(); +}, 30000); test('scraper uses response transform when provided', async () => { const scraper = new Scraper({ diff --git a/src/scraper.ts b/src/scraper.ts index 93fa868..c26f091 100644 --- a/src/scraper.ts +++ b/src/scraper.ts @@ -44,6 +44,7 @@ import { createCreateTweetRequest, } from './tweets'; import { parseTimelineTweetsV2, TimelineV2 } from './timeline-v2'; +import { fetchHomeTimeline, HomeTimelineResponse } from './timeline-home'; const twUrl = 'https://twitter.com'; const UserTweetsUrl = @@ -255,6 +256,19 @@ export class Scraper { return fetchProfileFollowers(userId, maxProfiles, this.auth, cursor); } + /** + * Fetches the home timeline for the current user. + * @param count The number of tweets to fetch. + * @param seenTweetIds An array of tweet IDs that have already been seen. + * @returns A promise that resolves to the home timeline response. + */ + public async fetchHomeTimeline( + count: number, + seenTweetIds: string[], + ): Promise { + return await fetchHomeTimeline(count, seenTweetIds, this.auth); + } + async getUserTweets( userId: string, maxTweets = 200, diff --git a/src/test-utils.ts b/src/test-utils.ts index 497d352..a2322a6 100644 --- a/src/test-utils.ts +++ b/src/test-utils.ts @@ -1,5 +1,7 @@ import { HttpsProxyAgent } from 'https-proxy-agent'; import { Scraper } from './scraper'; +import fs from 'fs'; +import { CookieJar } from 'tough-cookie'; export interface ScraperTestOptions { /** @@ -17,11 +19,40 @@ export async function getScraper( const password = process.env['TWITTER_PASSWORD']; const email = process.env['TWITTER_EMAIL']; const twoFactorSecret = process.env['TWITTER_2FA_SECRET']; - const cookies = process.env['TWITTER_COOKIES']; + + let cookiesArray: any = null; + + // try to read cookies by reading cookies.json with fs and parsing + // check if cookies.json exists + if (!fs.existsSync('./cookies.json')) { + console.error( + 'cookies.json not found, using password auth - this is NOT recommended!', + ); + } else { + try { + const cookiesText = fs.readFileSync('./cookies.json', 'utf8'); + cookiesArray = JSON.parse(cookiesText); + } catch (e) { + console.error('Error parsing cookies.json', e); + } + } + + const cookieStrings = cookiesArray?.map( + (cookie: any) => + `${cookie.key}=${cookie.value}; Domain=${cookie.domain}; Path=${ + cookie.path + }; ${cookie.secure ? 'Secure' : ''}; ${ + cookie.httpOnly ? 'HttpOnly' : '' + }; SameSite=${cookie.sameSite || 'Lax'}`, + ); + const proxyUrl = process.env['PROXY_URL']; let agent: any; - if (options.authMethod === 'cookies' && !cookies) { + if ( + options.authMethod === 'cookies' && + (!cookieStrings || cookieStrings.length === 0) + ) { console.warn( 'TWITTER_COOKIES variable is not defined, reverting to password auth (not recommended)', ); @@ -54,7 +85,7 @@ export async function getScraper( if (options.authMethod === 'password') { await scraper.login(username!, password!, email, twoFactorSecret); } else if (options.authMethod === 'cookies') { - await scraper.setCookies(JSON.parse(cookies!)); + await scraper.setCookies(cookieStrings); } return scraper; diff --git a/src/timeline-home.ts b/src/timeline-home.ts new file mode 100644 index 0000000..3011372 --- /dev/null +++ b/src/timeline-home.ts @@ -0,0 +1,93 @@ +import { requestApi } from './api'; +import { TwitterAuth } from './auth'; +import { ApiError } from './errors'; +import { TimelineInstruction } from './timeline-v2'; + +export interface HomeTimelineResponse { + data?: { + home: { + home_timeline_urt: { + instructions: TimelineInstruction[]; + }; + }; + }; +} + +export async function fetchHomeTimeline( + count: number, + seenTweetIds: string[], + auth: TwitterAuth, +): Promise { + const variables = { + count, + includePromotedContent: true, + latestControlAvailable: true, + requestContext: 'launch', + withCommunity: true, + seenTweetIds, + }; + + const features = { + rweb_tipjar_consumption_enabled: true, + responsive_web_graphql_exclude_directive_enabled: true, + verified_phone_label_enabled: false, + creator_subscriptions_tweet_preview_api_enabled: true, + responsive_web_graphql_timeline_navigation_enabled: true, + responsive_web_graphql_skip_user_profile_image_extensions_enabled: false, + communities_web_enable_tweet_community_results_fetch: true, + c9s_tweet_anatomy_moderator_badge_enabled: true, + articles_preview_enabled: true, + responsive_web_edit_tweet_api_enabled: true, + graphql_is_translatable_rweb_tweet_is_translatable_enabled: true, + view_counts_everywhere_api_enabled: true, + longform_notetweets_consumption_enabled: true, + responsive_web_twitter_article_tweet_consumption_enabled: true, + tweet_awards_web_tipping_enabled: false, + creator_subscriptions_quote_tweet_preview_enabled: false, + freedom_of_speech_not_reach_fetch_enabled: true, + standardized_nudges_misinfo: true, + tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled: + true, + rweb_video_timestamps_enabled: true, + longform_notetweets_rich_text_read_enabled: true, + longform_notetweets_inline_media_enabled: true, + responsive_web_enhance_cards_enabled: false, + }; + + const res = await requestApi( + `https://x.com/i/api/graphql/HJFjzBgCs16TqxewQOeLNg/HomeTimeline?variables=${encodeURIComponent( + JSON.stringify(variables), + )}&features=${encodeURIComponent(JSON.stringify(features))}`, + auth, + 'GET', + ); + + if (!res.success) { + if (res.err instanceof ApiError) { + console.error('Error details:', res.err.data); + } + throw res.err; + } + + const home = res.value?.data?.home.home_timeline_urt?.instructions; + + if (!home) { + return []; + } + + const entries: any[] = []; + + for (const instruction of home) { + if (instruction.type === 'TimelineAddEntries') { + for (const entry of instruction.entries ?? []) { + entries.push(entry); + } + } + } + // get the itemContnent from each entry + const tweets = entries + .map((entry) => entry.content.itemContent?.tweet_results?.result) + .filter((tweet) => tweet !== undefined); + + return tweets; +}