Created
June 4, 2025 12:47
-
-
Save alanhamlett/a087352389d118f7c2a25157b9adbb7b to your computer and use it in GitHub Desktop.
wonderful.dev markdown parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import type { TimelineTemplate } from '@acme/db/schema'; | |
import { makeExternalUrl, truncate, validateTLD } from './helpers'; | |
export const MENTION_REGEX = /@[a-zA-Z][\w\d][\w\d][\w\d-]+/g; | |
const REPO_REGEX = /\b[a-zA-Z][\w.-]*\/[\w.-]+/g; | |
const REPO_ISSUE_REGEX = /\b[a-zA-Z]+[\w.-]*\/[\w.-]+#\d+/g; | |
export const HASHTAG_REGEX = /#[\w][\w+-]*/g; | |
const URL_REGEX = /(?:^|\s)(https?:\/\/)?[a-zA-Z][-a-zA-Z0-9@:%._+~#=]{0,253}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_+.~#()?&//=]*)/g; | |
const URL_REGEX_IN_MARKDOWN = /\]\(((https?:\/\/)?[a-zA-Z][-a-zA-Z0-9@:%._+~#=]{0,253}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_+.~#()?&//=]*))\)/g; | |
export const getUsernamesFromPostContent = (content: string | null): string[] => { | |
const usernames = content?.match(MENTION_REGEX); | |
if (!usernames) { | |
return []; | |
} | |
return usernames.map((username) => username.substring(1)); | |
}; | |
export const getReposFromPostContent = (content: string | null): string[] => { | |
const repos = content?.match(REPO_REGEX) ?? []; | |
if (content === null || repos.length === 0) { | |
return []; | |
} | |
let offset = 0; | |
const filtered: string[] = []; | |
for (const repo of repos) { | |
const i = content.substring(offset).indexOf(repo); | |
if (i + offset === 0) { | |
filtered.push(repo); | |
offset = offset + i + repo.length; | |
continue; | |
} | |
if (content.charAt(i + offset - 1).match(/\s/g)) { | |
filtered.push(repo); | |
} | |
offset = offset + i + repo.length; | |
} | |
return filtered; | |
}; | |
export const getRepoIssuesFromPostContent = (content: string | null): string[] => { | |
const issues = content?.match(REPO_ISSUE_REGEX) ?? []; | |
if (content === null || issues.length === 0) { | |
return []; | |
} | |
let offset = 0; | |
const filtered: string[] = []; | |
for (const issue of issues) { | |
const i = content.substring(offset).indexOf(issue); | |
if (i + offset === 0) { | |
filtered.push(issue); | |
offset = offset + i + issue.length; | |
continue; | |
} | |
if (content.charAt(i + offset - 1).match(/\s/g)) { | |
filtered.push(issue); | |
} | |
offset = offset + i + issue.length; | |
} | |
return filtered; | |
}; | |
export const getFirstLinkFromPostContent = (content: string | null) => { | |
const links = getLinksFromPostContentIncludingMarkdown(content); | |
if (links.length > 0) { | |
return links[0]; | |
} | |
return undefined; | |
}; | |
export const getLinksFromPostContent = (content: string | null) => { | |
const links = | |
content | |
?.match(URL_REGEX) | |
?.map((url) => validateTLD(url)) | |
.filter(Boolean) ?? []; | |
return links as string[]; | |
}; | |
export const getLinksFromPostContentIncludingMarkdown = (content: string | null) => { | |
const links = getLinksFromPostContent(content); | |
const linksInMarkdown = | |
content | |
?.match(URL_REGEX_IN_MARKDOWN) | |
?.map((url) => { | |
const match = URL_REGEX_IN_MARKDOWN.exec(url); | |
return match ? validateTLD(match[1]) : null; | |
}) | |
.filter(Boolean) ?? []; | |
return links.concat(linksInMarkdown as string[]).filter((u, i, all) => all.indexOf(u) === i); | |
}; | |
export const truncatePostTemplates = ( | |
templates: TimelineTemplate[], | |
maxChars = 600, | |
): { isTruncated: boolean; templates: TimelineTemplate[] } => { | |
let charCount = 0; | |
let isTruncated = false; | |
const newTemplates: TimelineTemplate[] = []; | |
for (const template of templates) { | |
switch (template.type) { | |
case 'avatar': | |
case 'icon_svg': | |
if (!isTruncated) { | |
newTemplates.push(template); | |
} | |
break; | |
case 'code': | |
case 'code_multi': | |
case 'mention': | |
case 'mention_company': | |
case 'h1': | |
case 'h2': | |
case 'h3': | |
case 'text': | |
case 'quote': { | |
if (isTruncated) { | |
break; | |
} | |
const oldCount = charCount; | |
const newlines = Array.from(template.text.matchAll(/\n/g)).length; | |
charCount += template.text.length + newlines * 60; | |
if (charCount > maxChars) { | |
const expanded = template.text.replaceAll('\n', '\n'.repeat(60)); | |
const truncateTo = maxChars - oldCount; | |
const truncated = truncate(expanded, truncateTo); | |
if (truncated !== expanded) { | |
template.text = truncated.replaceAll(/\n+/g, '\n'); | |
isTruncated = true; | |
} | |
} | |
newTemplates.push(template); | |
break; | |
} | |
case 'bold': | |
case 'italic': | |
case 'boldanditalic': | |
case 'strikethrough': | |
case 'link': { | |
if (isTruncated) { | |
break; | |
} | |
const text = template.children; | |
if (typeof text !== 'string') { | |
const { isTruncated: _truncated, templates: _templates } = truncatePostTemplates(text, maxChars); | |
isTruncated = _truncated; | |
template.children = _templates; | |
newTemplates.push(template); | |
break; | |
} | |
const oldCount = charCount; | |
const newlines = Array.from(text.matchAll(/\n/g)).length; | |
charCount += text.length + newlines * 60; | |
if (charCount > maxChars) { | |
const expanded = text.replaceAll('\n', '\n'.repeat(60)); | |
const truncateTo = maxChars - oldCount; | |
const truncated = truncate(expanded, truncateTo); | |
if (truncated !== expanded) { | |
template.children = truncated.replaceAll(/\n+/g, '\n'); | |
isTruncated = true; | |
} | |
} | |
newTemplates.push(template); | |
break; | |
} | |
default: | |
throw new Error(`Missing implementation for template type: ${(template as TimelineTemplate).type}`); | |
} | |
} | |
return { isTruncated, templates: newTemplates }; | |
}; | |
type PostParserFunction = ( | |
content: string | null, | |
mentions: string[], | |
companyMentions: string[], | |
languages: string[], | |
repos: string[], | |
issues: string[], | |
) => TimelineTemplate[]; | |
export const parsePostContent: PostParserFunction = ( | |
content: string | null, | |
mentions: string[], | |
companyMentions: string[], | |
languages: string[], | |
repos: string[], | |
issues: string[], | |
): TimelineTemplate[] => { | |
if (!content) { | |
return []; | |
} | |
return parsePostContentForCode(content, mentions, companyMentions, languages, repos, issues); | |
}; | |
const parsePostContentForCode: PostParserFunction = ( | |
content: string | null, | |
mentions: string[], | |
companyMentions: string[], | |
languages: string[], | |
repos: string[], | |
issues: string[], | |
): TimelineTemplate[] => { | |
if (!content) { | |
return []; | |
} | |
const templates: TimelineTemplate[] = []; | |
const len = content.length; | |
let index = 0; | |
let last = 0; | |
// eslint-disable-next-line no-constant-condition | |
while (true) { | |
if (index >= len) { | |
if (last < len) { | |
templates.push(...parsePostContentForQuote(content.substring(last), mentions, companyMentions, languages, repos, issues)); | |
} | |
break; | |
} | |
const inline = content.indexOf('`', index); | |
const multi = content.indexOf('```', index); | |
if (multi < 0 && inline < 0) { | |
templates.push(...parsePostContentForQuote(content.substring(last), mentions, companyMentions, languages, repos, issues)); | |
break; | |
} | |
if (inline >= 0 && (inline < multi || multi < 0)) { | |
const end = content.indexOf('`', inline + 1); | |
if (end > inline + 1) { | |
const linebreak = content.indexOf('\n', inline + 1); | |
if (linebreak < 0 || linebreak > end) { | |
if (inline > last) { | |
templates.push( | |
...parsePostContentForQuote(content.substring(last, inline), mentions, companyMentions, languages, repos, issues), | |
); | |
} | |
templates.push({ text: content.substring(inline + 1, end), type: 'code' }); | |
index = end + 1; | |
last = end + 1; | |
continue; | |
} | |
} | |
} | |
if (multi >= 0) { | |
const end = content.indexOf('```', multi + 3); | |
if (end > multi + 3) { | |
if (multi > last) { | |
templates.push(...parsePostContentForQuote(content.substring(last, multi), mentions, companyMentions, languages, repos, issues)); | |
} | |
const linebreak = content.indexOf('\n', multi + 3); | |
if (linebreak > end) { | |
templates.push({ text: content.substring(multi + 3, end), type: 'code_multi' }); | |
} else { | |
// TODO: highlight code according to language | |
// const language = content.substring(multi + 3, linebreak).trim(); | |
// console.log(`language='${language}'`); | |
templates.push({ text: content.substring(linebreak + 1, end), type: 'code_multi' }); | |
} | |
const step = content.charAt(end + 3) === '\n' ? 4 : 3; | |
index = end + step; | |
last = end + step; | |
continue; | |
} | |
} | |
index = inline > multi ? inline + 1 : multi + 3; | |
} | |
return templates; | |
}; | |
const parsePostContentForQuote: PostParserFunction = ( | |
content: string | null, | |
mentions: string[], | |
companyMentions: string[], | |
languages: string[], | |
repos: string[], | |
issues: string[], | |
): TimelineTemplate[] => { | |
if (!content) { | |
return []; | |
} | |
const templates: TimelineTemplate[] = []; | |
const len = content.length; | |
let index = 0; | |
let last = 0; | |
// eslint-disable-next-line no-constant-condition | |
while (true) { | |
if (index >= len) { | |
if (last < len) { | |
templates.push(...parsePostContentForBoldAndItalic(content.substring(last), mentions, companyMentions, languages, repos, issues)); | |
} | |
break; | |
} | |
const start = content.indexOf('> ', index); | |
if (start < 0) { | |
templates.push(...parsePostContentForBoldAndItalic(content.substring(last), mentions, companyMentions, languages, repos, issues)); | |
break; | |
} | |
if ((start === 0 || content.charAt(start - 1) === '\n') && content.charAt(start + 2) !== '\n') { | |
const newline = content.indexOf('\n\n', start); | |
const end = newline < 0 ? len : newline; | |
if (end > start + 2) { | |
if (start > last) { | |
templates.push( | |
...parsePostContentForBoldAndItalic(content.substring(last, start), mentions, companyMentions, languages, repos, issues), | |
); | |
} | |
templates.push({ text: content.substring(start + 2, end).replaceAll('\n> ', '\n'), type: 'quote' }); | |
index = end + 2; | |
last = end + 2; | |
continue; | |
} | |
} | |
index = start + 2; | |
} | |
return templates; | |
}; | |
const parsePostContentForBoldAndItalic: PostParserFunction = ( | |
content: string | null, | |
mentions: string[], | |
companyMentions: string[], | |
languages: string[], | |
repos: string[], | |
issues: string[], | |
): TimelineTemplate[] => { | |
if (!content) { | |
return []; | |
} | |
if (content.length === 0) { | |
return []; | |
} | |
const c = '***'; | |
const templates: TimelineTemplate[] = []; | |
let index = 0; | |
while (index < content.length) { | |
const next = content.indexOf(c, index); | |
if (next < 0) { | |
if (index < content.length) { | |
if (index == 0) { | |
templates.push(...parsePostContentForBold(content.substring(index), mentions, companyMentions, languages, repos, issues)); | |
} else { | |
templates.push(...parsePostContentForBold(content.substring(index), mentions, companyMentions, languages, repos, issues)); | |
} | |
} | |
return templates; | |
} | |
const end = content.indexOf(c, next + c.length); | |
const doubleNewline = content.indexOf('\n\n', next + c.length); | |
if (end < 0 || (doubleNewline > -1 && doubleNewline < end)) { | |
templates.push(...parsePostContentForBold(content.substring(index), mentions, companyMentions, languages, repos, issues)); | |
return templates; | |
} | |
const innerContent = content.substring(next + c.length, end); | |
if (innerContent.length === 0) { | |
templates.push(...parsePostContentForBold(content.substring(index), mentions, companyMentions, languages, repos, issues)); | |
return templates; | |
} | |
if (index < next) { | |
templates.push(...parsePostContentForBold(content.substring(index, next), mentions, companyMentions, languages, repos, issues)); | |
} | |
templates.push({ | |
children: parsePostContentForBold(innerContent, mentions, companyMentions, languages, repos, issues), | |
type: 'boldanditalic', | |
}); | |
index = index + c.length + end + c.length; | |
} | |
return templates; | |
}; | |
const parsePostContentForBold: PostParserFunction = ( | |
content: string | null, | |
mentions: string[], | |
companyMentions: string[], | |
languages: string[], | |
repos: string[], | |
issues: string[], | |
): TimelineTemplate[] => { | |
if (!content) { | |
return []; | |
} | |
if (content.length === 0) { | |
return []; | |
} | |
const templates: TimelineTemplate[] = []; | |
let index = 0; | |
while (index < content.length) { | |
const next1 = content.indexOf('**', index); | |
const next2 = content.indexOf('__', index); | |
const c = next2 >= 0 && next2 < next1 ? '__' : '**'; | |
const next = c === '__' ? next2 : next1; | |
if (next < 0) { | |
if (index < content.length) { | |
templates.push(...parsePostContentForItalic(content.substring(index), mentions, companyMentions, languages, repos, issues)); | |
} | |
return templates; | |
} | |
const end = content.indexOf(c, next + c.length); | |
const doubleNewline = content.indexOf('\n\n', next + c.length); | |
if (end < 0 || (doubleNewline > -1 && doubleNewline < end)) { | |
templates.push(...parsePostContentForItalic(content.substring(index), mentions, companyMentions, languages, repos, issues)); | |
return templates; | |
} | |
const innerContent = content.substring(next + c.length, end); | |
if (innerContent.length === 0) { | |
templates.push(...parsePostContentForItalic(content.substring(index), mentions, companyMentions, languages, repos, issues)); | |
return templates; | |
} | |
if (index < next) { | |
templates.push(...parsePostContentForItalic(content.substring(index, next), mentions, companyMentions, languages, repos, issues)); | |
} | |
templates.push({ | |
children: parsePostContentForItalic(innerContent, mentions, companyMentions, languages, repos, issues), | |
type: 'bold', | |
}); | |
index = end + c.length; | |
} | |
return templates; | |
}; | |
const parsePostContentForItalic: PostParserFunction = ( | |
content: string | null, | |
mentions: string[], | |
companyMentions: string[], | |
languages: string[], | |
repos: string[], | |
issues: string[], | |
): TimelineTemplate[] => { | |
if (!content) { | |
return []; | |
} | |
if (content.length === 0) { | |
return []; | |
} | |
const templates: TimelineTemplate[] = []; | |
let index = 0; | |
while (index < content.length) { | |
const next1 = content.indexOf('*', index); | |
const next2 = content.indexOf('_', index); | |
const c = next2 >= 0 && next2 < next1 ? '_' : '*'; | |
const next = c === '_' ? next2 : next1; | |
if (next < 0) { | |
if (index < content.length) { | |
if (index == 0) { | |
templates.push( | |
...parsePostContentForStrikethrough(content.substring(index), mentions, companyMentions, languages, repos, issues), | |
); | |
} else { | |
templates.push( | |
...parsePostContentForStrikethrough(content.substring(index), mentions, companyMentions, languages, repos, issues), | |
); | |
} | |
} | |
return templates; | |
} | |
const end = content.indexOf(c, next + c.length); | |
const doubleNewline = content.indexOf('\n\n', next + c.length); | |
const space = content.indexOf(' ', next + c.length); | |
if (end < 0 || (doubleNewline !== -1 && doubleNewline < end) || (space !== -1 && space === next + 1)) { | |
templates.push(...parsePostContentForStrikethrough(content.substring(index), mentions, companyMentions, languages, repos, issues)); | |
return templates; | |
} | |
const innerContent = content.substring(next + c.length, end); | |
if (innerContent.length === 0) { | |
templates.push(...parsePostContentForStrikethrough(content.substring(index), mentions, companyMentions, languages, repos, issues)); | |
return templates; | |
} | |
if (index < next) { | |
if (index == 0) { | |
templates.push( | |
...parsePostContentForStrikethrough(content.substring(index, next), mentions, companyMentions, languages, repos, issues), | |
); | |
} else { | |
templates.push( | |
...parsePostContentForStrikethrough(content.substring(index, next), mentions, companyMentions, languages, repos, issues), | |
); | |
} | |
} | |
templates.push({ | |
children: parsePostContentForStrikethrough(innerContent, mentions, companyMentions, languages, repos, issues), | |
type: 'italic', | |
}); | |
index = end + c.length; | |
} | |
return templates; | |
}; | |
const parsePostContentForStrikethrough: PostParserFunction = ( | |
content: string | null, | |
mentions: string[], | |
companyMentions: string[], | |
languages: string[], | |
repos: string[], | |
issues: string[], | |
): TimelineTemplate[] => { | |
if (!content) { | |
return []; | |
} | |
if (content.length === 0) { | |
return []; | |
} | |
const c = '~~'; | |
const templates: TimelineTemplate[] = []; | |
let index = 0; | |
while (index < content.length) { | |
const next = content.indexOf(c, index); | |
if (next < 0) { | |
if (index < content.length) { | |
templates.push( | |
...parsePostContentForThirdLevelHeadings(content.substring(index), mentions, companyMentions, languages, repos, issues), | |
); | |
} | |
return templates; | |
} | |
const end = content.indexOf(c, next + c.length); | |
const doubleNewline = content.indexOf('\n\n', next + c.length); | |
if (end < 0 || (doubleNewline > -1 && doubleNewline < end)) { | |
templates.push( | |
...parsePostContentForThirdLevelHeadings(content.substring(index), mentions, companyMentions, languages, repos, issues), | |
); | |
return templates; | |
} | |
const innerContent = content.substring(next + c.length, end); | |
if (innerContent.length === 0) { | |
templates.push( | |
...parsePostContentForThirdLevelHeadings(content.substring(index), mentions, companyMentions, languages, repos, issues), | |
); | |
return templates; | |
} | |
if (index < next) { | |
templates.push( | |
...parsePostContentForThirdLevelHeadings(content.substring(index, next), mentions, companyMentions, languages, repos, issues), | |
); | |
} | |
templates.push({ | |
children: parsePostContentForThirdLevelHeadings(innerContent, mentions, companyMentions, languages, repos, issues), | |
type: 'strikethrough', | |
}); | |
index = end + c.length; | |
} | |
return templates; | |
}; | |
const parsePostContentForThirdLevelHeadings: PostParserFunction = ( | |
content: string | null, | |
mentions: string[], | |
companyMentions: string[], | |
languages: string[], | |
repos: string[], | |
issues: string[], | |
): TimelineTemplate[] => { | |
if (!content) { | |
return []; | |
} | |
const c = '### '; | |
const templates: TimelineTemplate[] = []; | |
const len = content.length; | |
let index = 0; | |
let last = 0; | |
// eslint-disable-next-line no-constant-condition | |
while (true) { | |
if (index >= len) { | |
if (last < len) { | |
templates.push( | |
...parsePostContentForSecondLevelHeadings(content.substring(last), mentions, companyMentions, languages, repos, issues), | |
); | |
} | |
break; | |
} | |
const start = content.indexOf(c, index); | |
if (start < 0) { | |
templates.push( | |
...parsePostContentForSecondLevelHeadings(content.substring(last), mentions, companyMentions, languages, repos, issues), | |
); | |
break; | |
} | |
if ((start === 0 || content.charAt(start - 1) === '\n') && content.charAt(start + c.length) !== '\n') { | |
const newline = content.indexOf('\n', start); | |
const end = newline < 0 ? len : newline; | |
if (end > start + c.length) { | |
if (start > last) { | |
templates.push( | |
...parsePostContentForSecondLevelHeadings(content.substring(last, start), mentions, companyMentions, languages, repos, issues), | |
); | |
} | |
templates.push({ text: content.substring(start + c.length, end).replaceAll(`\n${c}`, '\n'), type: 'h3' }); | |
index = end + 1; | |
last = end + 1; | |
continue; | |
} | |
} | |
index = start + c.length; | |
} | |
return templates; | |
}; | |
const parsePostContentForSecondLevelHeadings: PostParserFunction = ( | |
content: string | null, | |
mentions: string[], | |
companyMentions: string[], | |
languages: string[], | |
repos: string[], | |
issues: string[], | |
): TimelineTemplate[] => { | |
if (!content) { | |
return []; | |
} | |
const c = '## '; | |
const templates: TimelineTemplate[] = []; | |
const len = content.length; | |
let index = 0; | |
let last = 0; | |
// eslint-disable-next-line no-constant-condition | |
while (true) { | |
if (index >= len) { | |
if (last < len) { | |
templates.push( | |
...parsePostContentForFirstLevelHeadings(content.substring(last), mentions, companyMentions, languages, repos, issues), | |
); | |
} | |
break; | |
} | |
const start = content.indexOf(c, index); | |
if (start < 0) { | |
templates.push( | |
...parsePostContentForFirstLevelHeadings(content.substring(last), mentions, companyMentions, languages, repos, issues), | |
); | |
break; | |
} | |
if ((start === 0 || content.charAt(start - 1) === '\n') && content.charAt(start + c.length) !== '\n') { | |
const newline = content.indexOf('\n', start); | |
const end = newline < 0 ? len : newline; | |
if (end > start + c.length) { | |
if (start > last) { | |
templates.push( | |
...parsePostContentForFirstLevelHeadings(content.substring(last, start), mentions, companyMentions, languages, repos, issues), | |
); | |
} | |
templates.push({ text: content.substring(start + c.length, end).replaceAll(`\n${c}`, '\n'), type: 'h2' }); | |
index = end + 1; | |
last = end + 1; | |
continue; | |
} | |
} | |
index = start + c.length; | |
} | |
return templates; | |
}; | |
const parsePostContentForFirstLevelHeadings: PostParserFunction = ( | |
content: string | null, | |
mentions: string[], | |
companyMentions: string[], | |
languages: string[], | |
repos: string[], | |
issues: string[], | |
): TimelineTemplate[] => { | |
if (!content) { | |
return []; | |
} | |
const c = '# '; | |
const templates: TimelineTemplate[] = []; | |
const len = content.length; | |
let index = 0; | |
let last = 0; | |
// eslint-disable-next-line no-constant-condition | |
while (true) { | |
if (index >= len) { | |
if (last < len) { | |
templates.push(...parsePostContentForLinks(content.substring(last), mentions, companyMentions, languages, repos, issues)); | |
} | |
break; | |
} | |
const start = content.indexOf(c, index); | |
if (start < 0) { | |
templates.push(...parsePostContentForLinks(content.substring(last), mentions, companyMentions, languages, repos, issues)); | |
break; | |
} | |
if ((start === 0 || content.charAt(start - 1) === '\n') && content.charAt(start + c.length) !== '\n') { | |
const newline = content.indexOf('\n', start); | |
const end = newline < 0 ? len : newline; | |
if (end > start + c.length) { | |
if (start > last) { | |
templates.push(...parsePostContentForLinks(content.substring(last, start), mentions, companyMentions, languages, repos, issues)); | |
} | |
templates.push({ text: content.substring(start + c.length, end).replaceAll(`\n${c}`, '\n'), type: 'h1' }); | |
index = end + 1; | |
last = end + 1; | |
continue; | |
} | |
} | |
index = start + c.length; | |
} | |
return templates; | |
}; | |
const parsePostContentForLinks: PostParserFunction = ( | |
content: string | null, | |
mentions: string[], | |
companyMentions: string[], | |
languages: string[], | |
repos: string[], | |
issues: string[], | |
): TimelineTemplate[] => { | |
if (!content) { | |
return []; | |
} | |
const c = '['; | |
const e = ']('; | |
const templates: TimelineTemplate[] = []; | |
const len = content.length; | |
let index = 0; | |
let last = 0; | |
// eslint-disable-next-line no-constant-condition | |
while (true) { | |
if (index >= len) { | |
if (last < len) { | |
templates.push(...parsePostContentForUrls(content.substring(last), mentions, companyMentions, languages, repos, issues)); | |
} | |
break; | |
} | |
const start = content.indexOf(c, index); | |
if (start === -1) { | |
templates.push(...parsePostContentForUrls(content.substring(last), mentions, companyMentions, languages, repos, issues)); | |
break; | |
} | |
const end = content.indexOf(e, start + c.length + 1); | |
if (end === -1) { | |
templates.push(...parsePostContentForUrls(content.substring(last), mentions, companyMentions, languages, repos, issues)); | |
break; | |
} | |
// not valid link when two newlines found | |
const newlines = content.indexOf('\n\n', start); | |
if (newlines !== -1 && newlines < end) { | |
templates.push(...parsePostContentForUrls(content.substring(last), mentions, companyMentions, languages, repos, issues)); | |
break; | |
} | |
const endUrl = content.indexOf(')', end + e.length + 1); | |
if (endUrl === -1) { | |
templates.push(...parsePostContentForUrls(content.substring(last), mentions, companyMentions, languages, repos, issues)); | |
break; | |
} | |
// not valid link when two newlines found | |
const newlinesInUrl = content.indexOf('\n\n', end); | |
if (newlinesInUrl !== -1 && newlinesInUrl < endUrl) { | |
templates.push(...parsePostContentForUrls(content.substring(last), mentions, companyMentions, languages, repos, issues)); | |
break; | |
} | |
// add any text before the link | |
if (start > last) { | |
templates.push(...parsePostContentForUrls(content.substring(last, start), mentions, companyMentions, languages, repos, issues)); | |
} | |
const text = content.substring(start + c.length, end); | |
const href = content.substring(end + e.length, endUrl); | |
const sanitized = text.startsWith('http:') || text.startsWith('https:'); | |
templates.push({ children: sanitized ? href : text, href, type: 'link' }); | |
index = endUrl + 1; | |
last = endUrl + 1; | |
} | |
return templates; | |
}; | |
export const parsePostContentForUrls: PostParserFunction = ( | |
content: string | null, | |
mentions: string[], | |
companyMentions: string[], | |
languages: string[], | |
repos: string[], | |
issues: string[], | |
): TimelineTemplate[] => { | |
if (!content) { | |
return []; | |
} | |
if (content.length === 0) { | |
return []; | |
} | |
const urls = getLinksFromPostContent(content); | |
if (urls.length === 0) { | |
return parsePostContentForUsernames(content, mentions, companyMentions, languages, repos, issues); | |
} | |
const templates: TimelineTemplate[] = []; | |
let offset = 0; | |
for (const url of urls) { | |
const i = _indexOfStartWithWhitespace(content, url, offset); | |
if (i < 0) { | |
continue; | |
} | |
if (i + offset === 0) { | |
templates.push(_getLinkTemplateFromUrl(url)); | |
offset = offset + i + url.length; | |
continue; | |
} | |
if (i > 0) { | |
templates.push( | |
...parsePostContentForUsernames(content.substring(offset, offset + i), mentions, companyMentions, languages, repos, issues), | |
); | |
} | |
if (!content.charAt(i + offset - 1).match(/\s/g)) { | |
templates.push( | |
...parsePostContentForUsernames( | |
content.substring(offset + i, offset + i + url.length), | |
mentions, | |
companyMentions, | |
languages, | |
repos, | |
issues, | |
), | |
); | |
} else { | |
templates.push(_getLinkTemplateFromUrl(url)); | |
} | |
offset = offset + i + url.length; | |
} | |
if (offset < content.length) { | |
templates.push(...parsePostContentForUsernames(content.substring(offset), mentions, companyMentions, languages, repos, issues)); | |
} | |
return templates; | |
}; | |
const parsePostContentForUsernames: PostParserFunction = ( | |
content: string | null, | |
mentions: string[], | |
companyMentions: string[], | |
languages: string[], | |
repos: string[], | |
issues: string[], | |
): TimelineTemplate[] => { | |
if (!content) { | |
return []; | |
} | |
if (content.length === 0) { | |
return []; | |
} | |
if (!mentions.length && !companyMentions.length) { | |
return parsePostContentForRepoIssues(content, mentions, companyMentions, languages, repos, issues); | |
} | |
const usernames = content.match(MENTION_REGEX); | |
if (!usernames) { | |
return parsePostContentForRepoIssues(content, mentions, companyMentions, languages, repos, issues); | |
} | |
return content | |
.split(MENTION_REGEX) | |
.map((str, index) => { | |
const match = usernames.at(index) ?? ''; | |
const username = match.substring(1); | |
if (!username) { | |
return parsePostContentForRepoIssues(`${str}${match}`, mentions, companyMentions, languages, repos, issues); | |
} | |
if (!mentions.includes(username.toLowerCase())) { | |
if (!companyMentions.includes(username.toLowerCase())) { | |
return parsePostContentForRepoIssues(`${str}${match}`, mentions, companyMentions, languages, repos, issues); | |
} | |
const templates = parsePostContentForRepoIssues(str, mentions, companyMentions, languages, repos, issues); | |
templates.push({ text: `@${username}`, type: 'mention_company', slug: username }); | |
return templates; | |
} | |
const templates = parsePostContentForRepoIssues(str, mentions, companyMentions, languages, repos, issues); | |
templates.push({ text: `@${username}`, type: 'mention', username: username }); | |
return templates; | |
}) | |
.flat(1); | |
}; | |
const parsePostContentForRepoIssues: PostParserFunction = ( | |
content: string | null, | |
mentions: string[], | |
companyMentions: string[], | |
languages: string[], | |
repos: string[], | |
issues: string[], | |
): TimelineTemplate[] => { | |
if (!content) { | |
return []; | |
} | |
if (content.length === 0) { | |
return []; | |
} | |
if (issues.length === 0) { | |
return parsePostContentForRepos(content, mentions, companyMentions, languages, repos, issues); | |
} | |
const templates: TimelineTemplate[] = []; | |
let offset = 0; | |
for (const issue of issues) { | |
const i = _indexOfStartWithWhitespace(content, issue, offset); | |
if (i < 0) { | |
continue; | |
} | |
if (i + offset === 0) { | |
const parts = issue.split('#'); | |
templates.push({ children: issue, href: `https://github.com/${parts[0]}/issues/${parts[1]}`, type: 'link' }); | |
offset = offset + i + issue.length; | |
continue; | |
} | |
if (i > 0) { | |
const x = parsePostContentForRepos(content.substring(offset, offset + i), mentions, companyMentions, languages, repos, issues); | |
if (x.length > 0) { | |
templates.push(...x); | |
} | |
} | |
if (!content.charAt(i + offset - 1).match(/\s/g)) { | |
const x = parsePostContentForRepos( | |
content.substring(offset + i, offset + i + issue.length), | |
mentions, | |
companyMentions, | |
languages, | |
repos, | |
issues, | |
); | |
if (x.length > 0) { | |
templates.push(...x); | |
} | |
} else { | |
const parts = issue.split('#'); | |
templates.push({ children: issue, href: `https://github.com/${parts[0]}/issues/${parts[1]}`, type: 'link' }); | |
} | |
offset = offset + i + issue.length; | |
} | |
if (offset < content.length) { | |
const x = parsePostContentForRepos(content.substring(offset), mentions, companyMentions, languages, repos, issues); | |
if (x.length > 0) { | |
templates.push(...x); | |
} | |
} | |
return templates; | |
}; | |
const parsePostContentForRepos: PostParserFunction = ( | |
content: string | null, | |
mentions: string[], | |
companyMentions: string[], | |
languages: string[], | |
repos: string[], | |
issues: string[], | |
): TimelineTemplate[] => { | |
if (!content) { | |
return []; | |
} | |
if (content.length === 0) { | |
return []; | |
} | |
if (repos.length === 0) { | |
return parsePostContentForHashtags(content, mentions, companyMentions, languages, repos, issues); | |
} | |
const templates: TimelineTemplate[] = []; | |
let offset = 0; | |
for (const repo of repos) { | |
const i = _indexOfStartWithWhitespace(content, repo, offset); | |
if (i < 0) { | |
continue; | |
} | |
if (i + offset === 0) { | |
templates.push({ children: repo, href: `https://github.com/${repo}`, type: 'link' }); | |
offset = offset + i + repo.length; | |
continue; | |
} | |
if (i > 0) { | |
const x = parsePostContentForHashtags(content.substring(offset, offset + i), mentions, companyMentions, languages, repos, issues); | |
if (x.length > 0) { | |
templates.push(...x); | |
} | |
} | |
if (!content.charAt(i + offset - 1).match(/\s/g)) { | |
const x = parsePostContentForHashtags( | |
content.substring(offset + i, offset + i + repo.length), | |
mentions, | |
companyMentions, | |
languages, | |
repos, | |
issues, | |
); | |
if (x.length > 0) { | |
templates.push(...x); | |
} | |
} else { | |
templates.push({ children: repo, href: `https://github.com/${repo}`, type: 'link' }); | |
} | |
offset = offset + i + repo.length; | |
} | |
if (offset < content.length) { | |
const x = parsePostContentForHashtags(content.substring(offset), mentions, companyMentions, languages, repos, issues); | |
if (x.length > 0) { | |
templates.push(...x); | |
} | |
} | |
return templates; | |
}; | |
const parsePostContentForHashtags: PostParserFunction = ( | |
content: string | null, | |
mentions: string[], | |
companyMentions: string[], | |
languages: string[], | |
repos: string[], | |
issues: string[], | |
): TimelineTemplate[] => { | |
if (!content) { | |
return []; | |
} | |
if (content.length === 0) { | |
return []; | |
} | |
if (!languages.length) { | |
return parsePostContentForText(content, mentions, companyMentions, languages, repos, issues); | |
} | |
const hashtags = content.match(HASHTAG_REGEX); | |
if (!hashtags) { | |
return parsePostContentForText(content, mentions, companyMentions, languages, repos, issues); | |
} | |
return content | |
.split(HASHTAG_REGEX) | |
.map((str, index) => { | |
const match = hashtags.at(index) ?? ''; | |
const hashtag = match.substring(1); | |
if (!hashtag || !languages.includes(hashtag)) { | |
return parsePostContentForText(`${str}${match}`, mentions, companyMentions, languages, repos, issues); | |
} | |
const templates = parsePostContentForText(str, mentions, companyMentions, languages, repos, issues); | |
templates.push({ children: `#${hashtag}`, href: `/language/${hashtag}`, type: 'link' }); | |
return templates; | |
}) | |
.flat(1); | |
}; | |
const parsePostContentForText: PostParserFunction = ( | |
content: string | null, | |
_mentions: string[], | |
_companyMentions: string[], | |
_languages: string[], | |
_repos: string[], | |
_issues: string[], | |
): TimelineTemplate[] => { | |
if (!content) { | |
return []; | |
} | |
if (content.length === 0) { | |
return []; | |
} | |
return [{ text: content, type: 'text' }]; | |
}; | |
const _getLinkTemplateFromUrl = (url: string): TimelineTemplate => { | |
const href = makeExternalUrl(url); | |
const { host, pathname } = new URL(href); | |
let children = host; | |
if (pathname !== '/') { | |
children += truncate(pathname, 16); | |
} | |
if (children.startsWith('www.')) { | |
children = children.substring('www.'.length); | |
} | |
return { children, href, type: 'link' }; | |
}; | |
const _indexOfStartWithWhitespace = (content: string, searchStr: string, offset: number) => { | |
while (offset < content.length) { | |
const i = content.substring(offset).indexOf(searchStr); | |
if (i + offset === 0) { | |
return i; | |
} | |
if (content.charAt(i + offset - 1).match(/\s/g)) { | |
return i; | |
} | |
offset = offset + i + searchStr.length; | |
} | |
return -1; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment