diff --git a/lib/config.ts b/lib/config.ts index 9227906df8b1d9..87a526083c0f74 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -351,6 +351,9 @@ export type Config = { device_id?: string; refresh_token?: string; }; + xiaohongshu: { + cookie?: string; + }; ximalaya: { token?: string; }; @@ -772,6 +775,9 @@ const calculateValue = () => { device_id: envs.XIAOYUZHOU_ID, refresh_token: envs.XIAOYUZHOU_TOKEN, }, + xiaohongshu: { + cookie: envs.XIAOHONGSHU_COOKIE, + }, ximalaya: { token: envs.XIMALAYA_TOKEN, }, diff --git a/lib/routes/xiaohongshu/notes.ts b/lib/routes/xiaohongshu/notes.ts index a4d85a5d563425..2f58aa75a5bb44 100644 --- a/lib/routes/xiaohongshu/notes.ts +++ b/lib/routes/xiaohongshu/notes.ts @@ -1,9 +1,12 @@ import { Route } from '@/types'; import cache from '@/utils/cache'; -import { getNotes, formatText, formatNote } from './util'; +import { config } from '@/config'; +import * as cheerio from 'cheerio'; +import got from '@/utils/got'; +import { formatNote, formatText, getNotes } from './util'; export const route: Route = { - path: '/user/:user_id/notes/fulltext', + path: '/user/:user_id/notes/:fulltext', radar: [ { source: ['xiaohongshu.com/user/profile/:user_id'], @@ -15,11 +18,22 @@ export const route: Route = { handler, example: '/xiaohongshu/user/52d8c541b4c4d60e6c867480/notes/fulltext', features: { + requireConfig: [ + { + name: 'XIAOHONGSHU_COOKIE', + optional: true, + description: '小红书 cookie 值,可在浏览器控制台通过`document.cookie`获取。', + }, + ], antiCrawler: true, requirePuppeteer: true, }, parameters: { user_id: 'user id, length 24 characters', + fulltext: { + description: '是否获取全文', + default: '', + }, }, }; @@ -27,13 +41,102 @@ async function handler(ctx) { const userId = ctx.req.param('user_id'); const url = `https://www.xiaohongshu.com/user/profile/${userId}`; - const { user, notes } = await getNotes(url, cache); + if (config.xiaohongshu.cookie && ctx.req.param('fulltext')) { + const user = await getUser(url, config.xiaohongshu.cookie); + const notes = await renderNotesFulltext(user.notes, url); + return { + title: `${user.userPageData.basicInfo.nickname} - 笔记 • 小红书 / RED`, + description: user.userPageData.basicInfo.desc, + image: user.userPageData.basicInfo.imageb || user.userPageData.basicInfo.images, + link: url, + item: notes, + }; + } else { + const { user, notes } = await getNotes(url, cache); + return { + title: `${user.nickname} - 笔记 • 小红书 / RED`, + description: formatText(user.desc), + image: user.imageb || user.images, + link: url, + item: notes.map((item) => formatNote(url, item)), + }; + } +} + +async function getUser(url, cookie) { + const res = await got(url, { + headers: { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', + Cookie: cookie, + }, + }); + const $ = cheerio.load(res.data); + + let script = $('script') + .filter((i, script) => { + const text = script.children[0]?.data; + return text?.startsWith('window.__INITIAL_STATE__='); + }) + .text(); + script = script.slice('window.__INITIAL_STATE__='.length); + script = script.replaceAll('undefined', 'null'); + const state = JSON.parse(script); + return state.user; +} + +async function renderNotesFulltext(notes, url) { + const data: any[] = []; + const promises = notes.flatMap((note) => + note.map(async ({ noteCard }) => { + const link = `${url}/${noteCard.noteId}`; + const { title, description, pubDate } = await getFullNote(link); + return { + title, + link, + description, + author: noteCard.user.nickName, + guid: noteCard.noteId, + pubDate, + }; + }) + ); + data.push(...(await Promise.all(promises))); + return data; +} - return { - title: `${user.nickname} - 笔记 • 小红书 / RED`, - description: formatText(user.desc), - image: user.imageb || user.images, - link: url, - item: notes.map((item) => formatNote(url, item)), - }; +async function getFullNote(link) { + const cookie = config.xiaohongshu.cookie; + const data = (await cache.tryGet(link, async () => { + const res = await got(link, { + headers: { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', + Cookie: cookie, + } as any, + }); + const $ = cheerio.load(res.data); + let script = $('script') + .filter((i, script) => { + const text = script.children[0]?.data; + return text?.startsWith('window.__INITIAL_STATE__='); + }) + .text(); + script = script.slice('window.__INITIAL_STATE__='.length); + script = script.replaceAll('undefined', 'null'); + const state = JSON.parse(script); + const note = state.note.noteDetailMap[state.note.firstNoteId].note; + const images = note.imageList.map((image) => image.urlDefault); + const title = note.title; + let desc = note.desc; + desc = desc.replaceAll(/\[.*?\]/g, ''); + desc = desc.replaceAll(/#(.*?)#/g, '#$1'); + desc = desc.replaceAll('\n', '
'); + const pubDate = new Date(note.time); + const description = `${images.map((image) => ``).join('')}
${title}
${desc}`; + return { + title, + description, + pubDate, + }; + })) as Promise<{ title: string; description: string; pubDate: Date }>; + return data; }