Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions packages/api/src/rich-text/detection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,19 @@ export function detectFacets(text: UnicodeString): Facet[] | undefined {
// links
const re = URL_REGEX
while ((match = re.exec(text.utf16))) {
let uri = match[2]
if (!uri.startsWith('http')) {
let uri = match.groups?.uri
const protocol = match.groups?.protocol
const tld = match.groups?.tld
if (protocol === undefined) {
const domain = match.groups?.domain
if (!domain || !isValidDomain(domain)) {
if (!domain || (tld !== undefined && !isValidDomain(domain))) {
continue
}
uri = `https://${uri}`
}
const start = text.utf16.indexOf(match[2], match.index)
const index = { start, end: start + match[2].length }
// strip ending puncuation
const start = text.utf16.indexOf(match.groups?.uri, match.index)
const index = { start, end: start + match.groups?.uri.length }
// strip ending punctuation
if (/[.,;:!?]$/.test(uri)) {
uri = uri.slice(0, -1)
index.end--
Expand Down
4 changes: 3 additions & 1 deletion packages/api/src/rich-text/util.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
export const MENTION_REGEX = /(^|\s|\()(@)([a-zA-Z0-9.-]+)(\b)/g
// inspired by https://gist.github.com/dperini/729294 (2018/09/12 version)
// gist credit: Diego Perini
Copy link
Contributor

@matthieusieben matthieusieben Aug 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that simply referencing this does not satisfy the license terms.

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Omg, you are right. I will include this text this afternoon.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also added a comment of the same length of the regex to visualize better the parts of it:

export const URL_REGEX =
  /(?:^|\s|\()(?<uri>(?<protocol>https?:\/\/)?(?<domain>(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}\.(?:1\d\d|2[0-4]\d|25[0-4]|[1-9]\d?)|(?:(?:[a-z0-9\u00a1-\uffff][a-z0-9\u00a1-\uffff_-]*)?[a-z0-9\u00a1-\uffff]\.)+(?<tld>[a-z\u00a1-\uffff]{2,}))(?::\d{2,5})?(?:[/?#]\S*)?)/gim
//-(-prefix--)(uri---(-------protocol-------)-(domain---(not-private-and-loopback-ips)(---not-system-and-class-c-private-ips--)(----------not-class-b-private-ips-----------)(----------ip-1st-oct------------)(----------ip-2nd-and-3rd-oct---------)--(-----------ip-4th-oct------------)-(--------------------------------dns-domain---------------------------------)-(-------------tld------------))(---port---)-(---path---)-)

export const URL_REGEX =
/(^|\s|\()((https?:\/\/[\S]+)|((?<domain>[a-z][a-z0-9]*(\.[a-z0-9]+)+)[\S]*))/gim
/(?:^|\s|\()(?<uri>(?<protocol>https?:\/\/)?(?<domain>(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}\.(?:1\d\d|2[0-4]\d|25[0-4]|[1-9]\d?)|(?:(?:[a-z0-9\u00a1-\uffff][a-z0-9\u00a1-\uffff_-]*)?[a-z0-9\u00a1-\uffff]\.)+(?<tld>[a-z\u00a1-\uffff]{2,})\.?)(?::\d{2,5})?(?:[/?#]\S*)?)/gim
export const TRAILING_PUNCTUATION_REGEX = /\p{P}+$/gu

/**
Expand Down
22 changes: 22 additions & 0 deletions packages/api/tests/rich-text-detection.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,15 @@ describe('detectFacets', () => {
'punctuation https://foo.com, https://bar.com/whatever; https://baz.com.',
'parenthentical (https://foo.com)',
'except for https://foo.com/thing_(cool)',
'HTTPS://google.com',
'https://google.COM',
'ko-fi.com',
'日本語.jp',
'GOOGLE.com',
'https://34.64.0.52',
'198.185.159.145',
'invalid IPs: http://127.0.0.1 https://255.255.255.255 https://0.0.0.0 https://169.254.1.1 https://1.1.1.011',
'invalid URIs: https://google.a https://localhost',
]
const outputs: string[][][] = [
[['no mention']],
Expand Down Expand Up @@ -212,6 +221,19 @@ describe('detectFacets', () => {
['except for '],
['https://foo.com/thing_(cool)', 'https://foo.com/thing_(cool)'],
],
[['HTTPS://google.com', 'HTTPS://google.com']],
[['https://google.COM', 'https://google.COM']],
[['ko-fi.com', 'https://ko-fi.com']],
[['日本語.jp', 'https://日本語.jp']],
[['GOOGLE.com', 'https://GOOGLE.com']],
[['https://34.64.0.52', 'https://34.64.0.52']],
[['198.185.159.145', 'https://198.185.159.145']],
[
[
'invalid IPs: http://127.0.0.1 https://255.255.255.255 https://0.0.0.0 https://169.254.1.1 https://1.1.1.011',
],
],
[['invalid URIs: https://google.a https://localhost']],
]
it('correctly handles a set of text inputs', async () => {
for (let i = 0; i < inputs.length; i++) {
Expand Down