Skip to content

Commit

Permalink
fix(route): segmentfault (#13580)
Browse files Browse the repository at this point in the history
  • Loading branch information
TonyRL authored Oct 20, 2023
1 parent 2d44202 commit 83c0f28
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 87 deletions.
29 changes: 6 additions & 23 deletions lib/v2/segmentfault/blogs.js
Original file line number Diff line number Diff line change
@@ -1,34 +1,17 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const { parseDate } = require('@/utils/parse-date');

const host = 'https://segmentfault.com';
const { host, acw_sc__v2, parseList, parseItems } = require('./utils');

module.exports = async (ctx) => {
const tag = ctx.params.tag;
const apiURL = `https://segmentfault.com/gateway/tag/${tag}/articles?loadMoreType=pagination&initData=true&page=1&sort=newest&pageSize=30`;
const { tag } = ctx.params;
const apiURL = `${host}/gateway/tag/${tag}/articles?loadMoreType=pagination&initData=true&page=1&sort=newest&pageSize=30`;
const response = await got(apiURL);
const data = response.data.rows;

const list = data.map((item) => ({
title: item.title,
link: new URL(item.url, host).href,
author: item.user.name,
}));

const items = await Promise.all(
list.map((item) =>
ctx.cache.tryGet(item.link, async () => {
const response = await got(item.link);
const content = cheerio.load(response.data);
const list = parseList(data);

item.description = content('article').html();
item.pubDate = parseDate(content('time').attr('datetime'));
const acwScV2Cookie = await acw_sc__v2(list[0].link, ctx.cache.tryGet);

return item;
})
)
);
const items = await Promise.all(list.map((item) => parseItems(acwScV2Cookie, item, ctx.cache.tryGet)));

ctx.state.data = {
title: `segmentfault-Blogs-${tag}`,
Expand Down
48 changes: 18 additions & 30 deletions lib/v2/segmentfault/channel.js
Original file line number Diff line number Diff line change
@@ -1,45 +1,33 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const { parseDate } = require('@/utils/parse-date');

const host = 'https://segmentfault.com';
const { host, acw_sc__v2, parseList, parseItems } = require('./utils');

module.exports = async (ctx) => {
const name = ctx.params.name;
const { name } = ctx.params;

const link = `${host}/channel/${name}`;
const response = await got(link);
const $ = cheerio.load(response.data);

const channel_name = $('#leftNav > a.active').text();
const { data: pageResponse } = await got(link);
const { data: apiResponse } = await got(`${host}/gateway/articles`, {
searchParams: {
query: 'channel',
slug: name,
offset: 0,
size: ctx.query.limit ? parseInt(ctx.query.limit, 10) : 20,
mode: 'scrollLoad',
},
});

const list = $('ul.bg-transparent.list-group.list-group-flush > li')
.slice(0, 10)
.map((_, item) => ({
link: new URL($(item).find('div.content > h3.h5 > a').attr('href'), host).href,
title: $(item).find('div.content > h3.h5 > a').text(),
author: $(item).find('span.name').text(),
}))
.get();
const $ = cheerio.load(pageResponse);
const channelName = $('#leftNav > a.active').text();

const items = await Promise.all(
list.map((item) =>
ctx.cache.tryGet(item.link, async () => {
const detailResponse = await got(item.link);
const content = cheerio.load(detailResponse.data);
const list = parseList(apiResponse.rows);

item.description = content('article')
.html()
.replace(/data-src="/g, `src="${host}`);
item.pubDate = parseDate(content('time').attr('datetime'));
const acwScV2Cookie = await acw_sc__v2(list[0].link, ctx.cache.tryGet);

return item;
})
)
);
const items = await Promise.all(list.map((item) => parseItems(acwScV2Cookie, item, ctx.cache.tryGet)));

ctx.state.data = {
title: `segmentfault - ${channel_name}`,
title: `segmentfault - ${channelName}`,
link,
item: items,
};
Expand Down
38 changes: 6 additions & 32 deletions lib/v2/segmentfault/user.js
Original file line number Diff line number Diff line change
@@ -1,45 +1,19 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const { parseDate } = require('@/utils/parse-date');
const { acw_sc__v2 } = require('./utils');
const host = 'https://segmentfault.com';
const { host, acw_sc__v2, parseList, parseItems } = require('./utils');

module.exports = async (ctx) => {
const name = ctx.params.name;
const apiURL = `https://segmentfault.com/gateway/homepage/${name}/timeline?size=20&offset=`;
const { name } = ctx.params;
const apiURL = `${host}/gateway/homepage/${name}/timeline?size=20&offset=`;

const response = await got(apiURL);
const data = response.data.rows;

const author = data[0].user.name;
const list = data.map((item) => ({
title: item.title,
description: item.excerpt,
link: new URL(item.url, host).href,
author,
}));
const list = parseList(data);
const { author } = list[0];

const acwScV2Cookie = await acw_sc__v2(list[0].link, ctx.cache.tryGet);

const items = await Promise.all(
list.map((item) =>
ctx.cache.tryGet(item.link, async () => {
const response = await got(item.link, {
headers: {
cookie: `acw_sc__v2=${acwScV2Cookie};`,
},
});
const content = cheerio.load(response.data);

item.description = content('article')
.html()
.replace(/data-src="/g, `src="${host}`);
item.pubDate = parseDate(content('time').attr('datetime'));

return item;
})
)
);
const items = await Promise.all(list.map((item) => parseItems(acwScV2Cookie, item, ctx.cache.tryGet)));

ctx.state.data = {
title: `segmentfault - ${author}`,
Expand Down
32 changes: 32 additions & 0 deletions lib/v2/segmentfault/utils.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
const zlib = require('zlib');
const got = require('@/utils/got');
const cheerio = require('cheerio');
const { parseDate } = require('@/utils/parse-date');
const config = require('@/config').value;
const { getAcwScV2ByArg1 } = require('@/v2/5eplay/utils');

const host = 'https://segmentfault.com';

const acw_sc__v2 = (link, tryGet) =>
tryGet(
'segmentfault:acw_sc__v2',
Expand All @@ -29,6 +33,34 @@ const acw_sc__v2 = (link, tryGet) =>
false
);

const parseList = (data) =>
data.map((item) => ({
title: item.title,
link: new URL(item.url, host).href,
author: item.user.name,
pubDate: parseDate(item.created, 'X'),
}));

const parseItems = (cookie, item, tryGet) =>
tryGet(item.link, async () => {
const response = await got(item.link, {
headers: {
cookie: `acw_sc__v2=${cookie};`,
},
});
const content = cheerio.load(response.data);

item.description = content('article').html();
item.category = content('.badge-tag')
.toArray()
.map((item) => content(item).text());

return item;
});

module.exports = {
host,
acw_sc__v2,
parseList,
parseItems,
};
4 changes: 2 additions & 2 deletions website/docs/routes/programming.md
Original file line number Diff line number Diff line change
Expand Up @@ -826,7 +826,7 @@ Subscribe to the updates (threads and submission) from a paritcular Hacker News

<Route author="p7e4" example="/sec-wiki/weekly" path="/sec-wiki/weekly" />

## segmentfault {#segmentfault}
## SegmentFault {#segmentfault}

### 频道 {#segmentfault-pin-dao}

Expand All @@ -838,7 +838,7 @@ Subscribe to the updates (threads and submission) from a paritcular Hacker News

### 博客 {#segmentfault-bo-ke}

<Route author="shiluanzzz" example="/segmentfault/blogs/go" path="/segmentfault/blogs/:tag" paramsDesc={['标签名称, 在 https://segmentfault.com/tags 中可以找到']} radar="1"/>
<Route author="shiluanzzz" example="/segmentfault/blogs/go" path="/segmentfault/blogs/:tag" paramsDesc={['标签名称,在 [标签](https://segmentfault.com/tags) 中可以找到']} radar="1"/>

## Smashing Magazine {#smashing-magazine}

Expand Down

0 comments on commit 83c0f28

Please sign in to comment.