Skip to content

Commit

Permalink
update entity replacement method to avoid errors with foreign characters
Browse files Browse the repository at this point in the history
  • Loading branch information
kbravh committed Aug 31, 2022
1 parent f9c2209 commit aec0004
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 58 deletions.
2 changes: 1 addition & 1 deletion manifest.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"id": "obsidian-tweet-to-markdown",
"name": "Tweet to Markdown",
"version": "2.10.2",
"version": "2.10.3",
"minAppVersion": "0.12.17",
"description": "Save tweets as Markdown files, along with their images, polls, etc.",
"author": "kbravh",
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "obsidian-tweet-to-markdown",
"version": "2.10.2",
"version": "2.10.3",
"description": "Save tweets as beautiful markdown files in Obsidian (https://obsidian.md)",
"main": "main.js",
"engines": {
Expand Down
122 changes: 67 additions & 55 deletions src/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,16 @@ import {
TAbstractFile,
} from 'obsidian'
import {createDownloadManager, DownloadManager} from './downloadManager'
import type {Media, Poll, Tweet, User} from './types/tweet'
import type {
Entities,
Media,
Mention,
Poll,
Tag,
Tweet,
TweetURL,
User,
} from './types/tweet'
import {decode} from 'html-entities'
import {moment} from 'obsidian'
import {TimestampFormat} from './types/plugin'
Expand Down Expand Up @@ -306,6 +315,62 @@ export const createMediaElements = (
.filter(medium => !!medium)
}

type GenericEntity = Pick<
Mention & Tag & TweetURL & {replacement: string},
'start' | 'end' | 'replacement'
>
/**
* replace any mentions, hashtags, cashtags, urls with links
*/
export const replaceEntities = (entities: Entities, text: string): string => {
/**
* Each entity comes with start and end indices. However, if we were to replace
* them in the order they occur, the indices further down the line would be shifted
* and inaccurate. So we sort them in reverse order and work up from the end of the tweet.
*/
const allEntities: GenericEntity[] = [
...(entities?.mentions ?? []).map(mention => ({
...mention,
replacement: `[@${mention.username}](https://twitter.com/${mention.username})`,
})),
...(entities?.hashtags ?? []).map(hashtag => ({
...hashtag,
replacement: `[#${hashtag.tag}](https://twitter.com/hashtag/${hashtag.tag})`,
})),
...(entities?.cashtags ?? []).map(cashtag => ({
...cashtag,
replacement: `[$${cashtag.tag}](https://twitter.com/search?q=%24${cashtag.tag})`,
})),
// Sort in reverse order
].sort((a, b) => b.start - a.start)

const urlSet = new Set()
const urls = (entities?.urls ?? []).filter(url => {
if (urlSet.has(url.expanded_url)) {
return false
} else {
urlSet.add(url.expanded_url)
return true
}
})

for (const entity of allEntities) {
text =
text.substring(0, entity.start) +
entity.replacement +
text.substring(entity.end)
}

urls.forEach(url => {
text = text.replace(
new RegExp(url.url, 'g'),
`[${url.display_url}](${url.expanded_url})`
)
})

return text
}

/**
* Creates the entire Markdown string of the provided tweet
*/
Expand Down Expand Up @@ -343,60 +408,7 @@ export const buildMarkdown = async (
* replace entities with markdown links
*/
if (tweet.data?.entities && plugin.settings.includeLinks) {
/**
* replace any mentions, hashtags, cashtags, urls with links
*/
/**
* replace any mentions, hashtags, cashtags, urls with links
*/
const mentions = [
...new Set(
(tweet.data.entities?.mentions ?? []).map(mention => mention.username)
),
]
const tags = [
...new Set(
(tweet.data.entities?.hashtags ?? []).map(hashtag => hashtag.tag)
),
]
const cashtags = [
...new Set(
(tweet.data.entities?.cashtags ?? []).map(cashtag => cashtag.tag)
),
]
const urlSet = new Set()
const urls = (tweet.data.entities?.urls ?? []).filter(url => {
if (urlSet.has(url.expanded_url)) {
return false
} else {
urlSet.add(url.expanded_url)
return true
}
})
mentions.forEach(username => {
text = text.replace(
new RegExp(`@${username}`, 'g'),
`[@${username}](https://twitter.com/${username})`
)
})
tags.forEach(tag => {
text = text.replace(
new RegExp(`#${tag}`, 'g'),
`[#${tag}](https://twitter.com/hashtag/${tag}) `
)
})
cashtags.forEach(tag => {
text = text.replace(
new RegExp(`$${tag}`, 'g'),
`[$${tag}](https://twitter.com/search?q=%24${tag})`
)
})
urls.forEach(url => {
text = text.replace(
new RegExp(url.url, 'g'),
`[${url.display_url}](${url.expanded_url})`
)
})
text = replaceEntities(tweet.data.entities, text)
}

const date = formatTimestamp(tweet.data.created_at, {
Expand Down
2 changes: 1 addition & 1 deletion versions.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"2.10.2": "0.12.17"
"2.10.3": "0.12.17"
}

0 comments on commit aec0004

Please sign in to comment.