From dafe564e5c9291565e487da65a5110e7830b8bb3 Mon Sep 17 00:00:00 2001
From: Arturo Fonseca <arturofonsecas32@gmail.com>
Date: Sat, 30 Aug 2025 00:31:44 -0300
Subject: [PATCH 1/2] Fix URL regex in api/rich-text

---
 packages/api/src/rich-text/detection.ts       | 14 +++++++-----
 packages/api/src/rich-text/util.ts            |  4 +++-
 .../api/tests/rich-text-detection.test.ts     | 22 +++++++++++++++++++
 3 files changed, 33 insertions(+), 7 deletions(-)
diff --git a/packages/api/src/rich-text/detection.ts b/packages/api/src/rich-text/detection.ts
index f4b190e10fc..611979a3e19 100644
--- a/packages/api/src/rich-text/detection.ts
+++ b/packages/api/src/rich-text/detection.ts
@@ -41,17 +41,19 @@ export function detectFacets(text: UnicodeString): Facet[] | undefined {
     // links
     const re = URL_REGEX
     while ((match = re.exec(text.utf16))) {
-      let uri = match[2]
-      if (!uri.startsWith('http')) {
+      let uri = match.groups?.uri
+      const protocol = match.groups?.protocol
+      const tld = match.groups?.tld
+      if (protocol === undefined) {
         const domain = match.groups?.domain
-        if (!domain || !isValidDomain(domain)) {
+        if (!domain || (tld !== undefined && !isValidDomain(domain))) {
           continue
         }
         uri = `https://${uri}`
       }
-      const start = text.utf16.indexOf(match[2], match.index)
-      const index = { start, end: start + match[2].length }
-      // strip ending puncuation
+      const start = text.utf16.indexOf(match.groups?.uri, match.index)
+      const index = { start, end: start + match.groups?.uri.length }
+      // strip ending punctuation
       if (/[.,;:!?]$/.test(uri)) {
         uri = uri.slice(0, -1)
         index.end--
diff --git a/packages/api/src/rich-text/util.ts b/packages/api/src/rich-text/util.ts
index cafd93d84e0..185d3c45298 100644
--- a/packages/api/src/rich-text/util.ts
+++ b/packages/api/src/rich-text/util.ts
@@ -1,6 +1,8 @@
 export const MENTION_REGEX = /(^|\s|\()(@)([a-zA-Z0-9.-]+)(\b)/g
+// inspired by https://gist.github.com/dperini/729294 (2018/09/12 version)
+// gist credit: Diego Perini
 export const URL_REGEX =
-  /(^|\s|\()((https?:\/\/[\S]+)|((?<domain>[a-z][a-z0-9]*(\.[a-z0-9]+)+)[\S]*))/gim
+  /(?:^|\s|\()(?<uri>(?<protocol>https?:\/\/)?(?<domain>(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}\.(?:1\d\d|2[0-4]\d|25[0-4]|[1-9]\d?)|(?:(?:[a-z0-9\u00a1-\uffff][a-z0-9\u00a1-\uffff_-]*)?[a-z0-9\u00a1-\uffff]\.)+(?<tld>[a-z\u00a1-\uffff]{2,})\.?)(?::\d{2,5})?(?:[/?#]\S*)?)/gim
 export const TRAILING_PUNCTUATION_REGEX = /\p{P}+$/gu
 
 /**
diff --git a/packages/api/tests/rich-text-detection.test.ts b/packages/api/tests/rich-text-detection.test.ts
index bb55f8222b1..92b2054fda2 100644
--- a/packages/api/tests/rich-text-detection.test.ts
+++ b/packages/api/tests/rich-text-detection.test.ts
@@ -60,6 +60,15 @@ describe('detectFacets', () => {
     'punctuation https://foo.com, https://bar.com/whatever; https://baz.com.',
     'parenthentical (https://foo.com)',
     'except for https://foo.com/thing_(cool)',
+    'HTTPS://google.com',
+    'https://google.COM',
+    'ko-fi.com',
+    '日本語.jp',
+    'GOOGLE.com',
+    'https://34.64.0.52',
+    '198.185.159.145',
+    'invalid IPs: http://127.0.0.1 https://255.255.255.255 https://0.0.0.0 https://169.254.1.1 https://1.1.1.011',
+    'invalid URIs: https://google.a https://localhost',
   ]
   const outputs: string[][][] = [
     [['no mention']],
@@ -212,6 +221,19 @@ describe('detectFacets', () => {
       ['except for '],
       ['https://foo.com/thing_(cool)', 'https://foo.com/thing_(cool)'],
     ],
+    [['HTTPS://google.com', 'HTTPS://google.com']],
+    [['https://google.COM', 'https://google.COM']],
+    [['ko-fi.com', 'https://ko-fi.com']],
+    [['日本語.jp', 'https://日本語.jp']],
+    [['GOOGLE.com', 'https://GOOGLE.com']],
+    [['https://34.64.0.52', 'https://34.64.0.52']],
+    [['198.185.159.145', 'https://198.185.159.145']],
+    [
+      [
+        'invalid IPs: http://127.0.0.1 https://255.255.255.255 https://0.0.0.0 https://169.254.1.1 https://1.1.1.011',
+      ],
+    ],
+    [['invalid URIs: https://google.a https://localhost']],
   ]
   it('correctly handles a set of text inputs', async () => {
     for (let i = 0; i < inputs.length; i++) {

From 23009d2ebd92b1e545c1524b41bec9dadb2d00b1 Mon Sep 17 00:00:00 2001
From: Arturo Fonseca <arturofonsecas32@gmail.com>
Date: Sat, 30 Aug 2025 22:10:19 -0300
Subject: [PATCH 2/2] add license text to url regex in api/rich-text

---
 packages/api/src/rich-text/util.ts            | 40 +++++++++++++++++--
 .../api/tests/rich-text-detection.test.ts     |  6 +++
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/packages/api/src/rich-text/util.ts b/packages/api/src/rich-text/util.ts
index 185d3c45298..9ce24dd55c1 100644
--- a/packages/api/src/rich-text/util.ts
+++ b/packages/api/src/rich-text/util.ts
@@ -1,8 +1,5 @@
 export const MENTION_REGEX = /(^|\s|\()(@)([a-zA-Z0-9.-]+)(\b)/g
-// inspired by https://gist.github.com/dperini/729294 (2018/09/12 version)
-// gist credit: Diego Perini
-export const URL_REGEX =
-  /(?:^|\s|\()(?<uri>(?<protocol>https?:\/\/)?(?<domain>(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}\.(?:1\d\d|2[0-4]\d|25[0-4]|[1-9]\d?)|(?:(?:[a-z0-9\u00a1-\uffff][a-z0-9\u00a1-\uffff_-]*)?[a-z0-9\u00a1-\uffff]\.)+(?<tld>[a-z\u00a1-\uffff]{2,})\.?)(?::\d{2,5})?(?:[/?#]\S*)?)/gim
+
 export const TRAILING_PUNCTUATION_REGEX = /\p{P}+$/gu
 
 /**
@@ -12,3 +9,38 @@ export const TRAILING_PUNCTUATION_REGEX = /\p{P}+$/gu
 export const TAG_REGEX =
   // eslint-disable-next-line no-misleading-character-class
   /(^|\s)[#＃]((?!\ufe0f)[^\s\u00AD\u2060\u200A\u200B\u200C\u200D\u20e2]*[^\d\s\p{P}\u00AD\u2060\u200A\u200B\u200C\u200D\u20e2]+[^\s\u00AD\u2060\u200A\u200B\u200C\u200D\u20e2]*)?/gu
+
+// The RegEx below is inspired by https://gist.github.com/dperini/729294 (accessed in 2025/08/30)
+// Regular Expression for URL validation
+//
+// Author: Diego Perini
+// Created: 2010/12/05
+// Updated: 2018/09/12
+// License: MIT
+//
+// Copyright (c) 2010-2018 Diego Perini (http://www.iport.it)
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+
+export const URL_REGEX =
+  /(?:^|\s|\()(?<uri>(?<protocol>https?:\/\/)?(?<domain>(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}\.(?:1\d\d|2[0-4]\d|25[0-4]|[1-9]\d?)|(?:(?:[a-z0-9\u00a1-\uffff][a-z0-9\u00a1-\uffff_-]*)?[a-z0-9\u00a1-\uffff]\.)+(?<tld>[a-z\u00a1-\uffff]{2,}))(?::\d{2,5})?(?:[/?#]\S*)?)/gim
+//-(-prefix--)(uri---(-------protocol-------)-(domain---(not-private-and-loopback-ips)(---not-system-and-class-c-private-ips--)(----------not-class-b-private-ips-----------)(----------ip-1st-oct------------)(----------ip-2nd-and-3rd-oct---------)--(-----------ip-4th-oct------------)-(--------------------------------dns-domain---------------------------------)-(-------------tld------------))(---port---)-(---path---)-)
diff --git a/packages/api/tests/rich-text-detection.test.ts b/packages/api/tests/rich-text-detection.test.ts
index 92b2054fda2..3c5a04aa06d 100644
--- a/packages/api/tests/rich-text-detection.test.ts
+++ b/packages/api/tests/rich-text-detection.test.ts
@@ -69,6 +69,7 @@ describe('detectFacets', () => {
     '198.185.159.145',
     'invalid IPs: http://127.0.0.1 https://255.255.255.255 https://0.0.0.0 https://169.254.1.1 https://1.1.1.011',
     'invalid URIs: https://google.a https://localhost',
+    'this is a website: google.com. The final dot it is not part of it',
   ]
   const outputs: string[][][] = [
     [['no mention']],
@@ -234,6 +235,11 @@ describe('detectFacets', () => {
       ],
     ],
     [['invalid URIs: https://google.a https://localhost']],
+    [
+      ['this is a website: '],
+      ['google.com', 'https://google.com'],
+      ['. The final dot it is not part of it'],
+    ],
   ]
   it('correctly handles a set of text inputs', async () => {
     for (let i = 0; i < inputs.length; i++) {