ether · JohnMcLear · May 16, 2026 · May 16, 2026
diff --git a/src/node/db/HistoricalAuthorDataCache.ts b/src/node/db/HistoricalAuthorDataCache.ts
@@ -0,0 +1,96 @@
+// Per-pad cache for the `{authorId -> {name, colorId}}` map used by
+// PadMessageHandler.handleClientReady to populate clientVars
+// (#7756 connect-handshake investigation).
+//
+// At 200+ authors a burst of 50 simultaneous CLIENT_READY handshakes
+// would otherwise each do Promise.all(authors.map(getAuthor)) =
+// 50 * 200 = 10 000 ueberdb cache lookups inside the join hot path,
+// competing for the event loop. This cache collapses that to one
+// computation shared across the simultaneous joins.
+//
+// Extracted into its own module (rather than nested inside Pad) so it can
+// be unit-tested without standing up the full pad / DB stack.
+
+export type AuthorRecord = {name: string; colorId: string};
+export type GetAuthorFn = (id: string) => Promise<AuthorRecord | null | undefined>;
+export type OnMissingAuthorFn = (id: string) => void;
+
+type CacheState = {
+  /** Resolved data. Empty `{}` until the first compute() resolves. */
+  data: {[id: string]: AuthorRecord};
+  /** Set iff a compute() is currently in flight. New callers await this same
+   *  promise rather than starting a duplicate compute. Cleared on resolve. */
+  promise?: Promise<{[id: string]: AuthorRecord}>;
+  /** Wall-clock time the current data was committed. Used for TTL only. */
+  builtAt: number;
+};
+
+export class HistoricalAuthorDataCache {
+  private state: CacheState | null = null;
+
+  constructor(
+    private readonly listAuthorIds: () => string[],
+    private readonly getAuthor: GetAuthorFn,
+    private readonly ttlMs: number = 5_000,
+    private readonly now: () => number = Date.now,
+    /** Called once per author id that the fetcher returns falsy for.
+     *  Lets the consumer preserve the error log that lived in the
+     *  previous inline Promise.all loop. Optional. */
+    private readonly onMissingAuthor: OnMissingAuthorFn = () => {},
+  ) {}
+
+  async get(): Promise<{[id: string]: AuthorRecord}> {
+    const now = this.now();
+    const s = this.state;
+    // In-flight compute: piggyback on it regardless of TTL — never start a
+    // second compute on top of a running one. The previous version could
+    // race two computes if the first ran past ttlMs, and the older
+    // resolution would clobber the newer cached value.
+    if (s?.promise) return cloneData(await s.promise);
+    if (s && now - s.builtAt < this.ttlMs) return cloneData(s.data);
+    return cloneData(await this.refresh(now));
+  }
+
+  /** Force the next get() to refetch. PadMessageHandler can call this when
+   *  a new author commits, if we add hookable author-add events later. */
+  invalidate(): void { this.state = null; }
+
+  private refresh(now: number): Promise<{[id: string]: AuthorRecord}> {
+    const promise = this.compute();
+    this.state = {data: {}, promise, builtAt: now};
+    promise.then(
+      (data) => {
+        // Only commit if our promise is still the one the state references —
+        // covers the (unlikely) case where invalidate() ran during compute.
+        if (this.state?.promise === promise) {
+          this.state = {data, builtAt: this.now()};
+        }
+      },
+      () => { if (this.state?.promise === promise) this.state = null; },
+    );
+    return promise;
+  }
+
+  private async compute(): Promise<{[id: string]: AuthorRecord}> {
+    const ids = this.listAuthorIds();
+    const out: {[id: string]: AuthorRecord} = {};
+    await Promise.all(ids.map(async (id) => {
+      const a = await this.getAuthor(id);
+      if (a) out[id] = {name: a.name, colorId: a.colorId};
+      else this.onMissingAuthor(id);
+    }));
+    return out;
+  }
+}
+
+// Defensive shallow copy on every get(). Callers (notably handleClientReady,
+// which embeds the result in clientVars and exposes it to the clientVars
+// hook) historically received a fresh object per call; preserving that
+// here so a mutation by one join can't bleed into the next.
+const cloneData = (
+  src: {[id: string]: AuthorRecord},
+): {[id: string]: AuthorRecord} => {
+  const out: {[id: string]: AuthorRecord} = {};
+  for (const k in src) out[k] = {name: src[k]!.name, colorId: src[k]!.colorId};
+  return out;
+};
diff --git a/src/node/db/Pad.ts b/src/node/db/Pad.ts
@@ -22,6 +22,9 @@ const padMessageHandler = require('../handler/PadMessageHandler');
 const groupManager = require('./GroupManager');
 const CustomError = require('../utils/customError');
 import readOnlyManager from './ReadOnlyManager';
+import {HistoricalAuthorDataCache} from './HistoricalAuthorDataCache';
+import log4js from 'log4js';
+const padMessageLogger = log4js.getLogger('message');
 import randomString from '../utils/randomstring';
 const hooks = require('../../static/js/pluginfw/hooks');
 import pad_utils from "../../static/js/pad_utils";
@@ -102,6 +105,10 @@ class Pad {
     private id: string;
     private savedRevisions: any[];
     private padSettings: PadSettings;
+    // Per-pad cache for handleClientReady's historicalAuthorData map. Lazily
+    // initialised on first call so we don't touch authorManager during pad
+    // construction. See HistoricalAuthorDataCache for the rationale (#7756).
+    private historicalAuthorDataCache: HistoricalAuthorDataCache | null = null;
   /**
    * @param id
    * @param [database] - Database object to access this pad's records (and only this pad's records;
@@ -326,6 +333,34 @@ class Pad {
     return authorIds;
   }
 
+  /**
+   * Returns the `{authorId -> {name, colorId}}` map used by handleClientReady
+   * to populate clientVars.collab_client_vars.historicalAuthorData. Cached
+   * per pad with a short TTL so a burst of simultaneous joins share one
+   * computation. Writes from `authorManager.setAuthorName` /
+   * `setAuthorColorId` become visible within at most the cache TTL (5s).
+   */
+  async getHistoricalAuthorData(): Promise<{[authorId: string]: {name: string; colorId: string}}> {
+    if (this.historicalAuthorDataCache == null) {
+      this.historicalAuthorDataCache = new HistoricalAuthorDataCache(
+        () => this.getAllAuthors(),
+        (id: string) => authorManager.getAuthor(id),
+        5_000,
+        Date.now,
+        (id: string) => {
+          // Preserves the explicit error log emitted by the previous inline
+          // Promise.all loop in handleClientReady before this cache landed.
+          // Don't drop missing-author logs silently — they point at
+          // https://github.com/ether/etherpad-lite/issues/2802.
+          padMessageLogger.error(
+            `There is no author for authorId: ${id}. ` +
+            'This is possibly related to https://github.com/ether/etherpad-lite/issues/2802');
+        },
+      );
+    }
+    return this.historicalAuthorDataCache.get();
+  }
+
   async getInternalRevisionAText(targetRev: number) {
     const keyRev = this.getKeyRevisionNumber(targetRev);
     const headRev = this.getHeadRevisionNumber();

diff --git a/src/node/handler/PadMessageHandler.ts b/src/node/handler/PadMessageHandler.ts
@@ -1084,27 +1084,19 @@ const handleClientReady = async (socket:any, message: ClientReadyMessage) => {
     await pad.saveToDatabase();
   }
 
-  // these db requests all need the pad object (timestamp of latest revision, author data)
-  const authors = pad.getAllAuthors();
-
   // get timestamp of latest revision needed for timeslider
   const currentTime = await pad.getRevisionDate(pad.getHeadRevisionNumber());
 
-  // get all author data out of the database (in parallel)
-  const historicalAuthorData:MapArrayType<{
+  // Per-pad cached author lookup (#7756 connect-handshake cliff). At 200+
+  // authors a fresh burst of 50 simultaneous joiners would otherwise do
+  // 50 * 200 = 10000 ueberdb cache lookups inside the join hot path,
+  // competing for the same event loop as the existing authors' USER_CHANGES
+  // traffic. The Pad-level cache collapses that to a single computation
+  // shared across the simultaneous joins (5-second TTL).
+  const historicalAuthorData: MapArrayType<{
     name: string;
     colorId: string;
-  }> = {};
-  await Promise.all(authors.map(async (authorId: string) => {
-    const author = await authorManager.getAuthor(authorId);
-    if (!author) {
-      messageLogger.error(`There is no author for authorId: ${authorId}. ` +
-          'This is possibly related to https://github.com/ether/etherpad-lite/issues/2802');
-    } else {
-      // Filter author attribs (e.g. don't send author's pads to all clients)
-      historicalAuthorData[authorId] = {name: author.name, colorId: author.colorId};
-    }
-  }));
+  }> = await pad.getHistoricalAuthorData();
 
   // glue the clientVars together, send them and tell the other clients that a new one is there
 

diff --git a/src/tests/backend-new/specs/pad-historical-author-data.test.ts b/src/tests/backend-new/specs/pad-historical-author-data.test.ts
@@ -0,0 +1,135 @@
+// HistoricalAuthorDataCache pins the per-pad author-data cache used by
+// PadMessageHandler.handleClientReady. The cache exists to coalesce the
+// Promise.all(authors.map(getAuthor)) work across simultaneous CLIENT_READY
+// handshakes — see ether/etherpad#7756.
+//
+// The helper takes pure functions as input (no DB, no Pad), so this test
+// exercises the real production code path without standing up the full
+// pad / DB stack.
+
+import {describe, it, expect, vi, beforeEach} from 'vitest';
+import {HistoricalAuthorDataCache, type AuthorRecord} from '../../../node/db/HistoricalAuthorDataCache';
+
+type Fetcher = (id: string) => Promise<AuthorRecord | null | undefined>;
+type OnMissing = (id: string) => void;
+
+const makeCache = (
+  ids: string[],
+  fetcher: Fetcher,
+  ttlMs = 5_000,
+  now = () => Date.now(),
+  onMissing: OnMissing = () => {},
+) => new HistoricalAuthorDataCache(() => ids, fetcher, ttlMs, now, onMissing);
+
+describe('HistoricalAuthorDataCache', () => {
+  let getAuthorMock: ReturnType<typeof vi.fn<Fetcher>>;
+
+  beforeEach(() => {
+    getAuthorMock = vi.fn(async (id: string) => ({name: `n-${id}`, colorId: `c-${id}`}));
+  });
+
+  it('returns one entry per author with {name, colorId}', async () => {
+    const cache = makeCache(['a.1', 'a.2', 'a.3'], getAuthorMock);
+    const data = await cache.get();
+    expect(data).toEqual({
+      'a.1': {name: 'n-a.1', colorId: 'c-a.1'},
+      'a.2': {name: 'n-a.2', colorId: 'c-a.2'},
+      'a.3': {name: 'n-a.3', colorId: 'c-a.3'},
+    });
+  });
+
+  it('coalesces 50 simultaneous get() calls into 1 fetch per author', async () => {
+    const cache = makeCache(['a.1', 'a.2', 'a.3'], getAuthorMock);
+    const results = await Promise.all(Array.from({length: 50}, () => cache.get()));
+    expect(results).toHaveLength(50);
+    expect(getAuthorMock).toHaveBeenCalledTimes(3);
+    for (const r of results) {
+      expect(Object.keys(r).sort()).toEqual(['a.1', 'a.2', 'a.3']);
+    }
+  });
+
+  it('refetches once the TTL expires', async () => {
+    let clock = 0;
+    const cache = makeCache(['a.1'], getAuthorMock, 5_000, () => clock);
+    await cache.get();
+    expect(getAuthorMock).toHaveBeenCalledTimes(1);
+    clock = 4_000;
+    await cache.get();
+    expect(getAuthorMock).toHaveBeenCalledTimes(1);
+    clock = 6_000;
+    await cache.get();
+    expect(getAuthorMock).toHaveBeenCalledTimes(2);
+  });
+
+  it('returns a fresh object on every get() — callers may safely mutate without bleeding into other joiners', async () => {
+    const cache = makeCache(['a.1'], getAuthorMock);
+    const first = await cache.get();
+    const second = await cache.get();
+    // Two distinct top-level objects and per-author records.
+    expect(first).not.toBe(second);
+    expect(first['a.1']).not.toBe(second['a.1']);
+    expect(first).toEqual(second);
+    // Mutating the returned object must not affect the next caller.
+    first['a.1']!.name = 'mutated';
+    const third = await cache.get();
+    expect(third).toEqual({'a.1': {name: 'n-a.1', colorId: 'c-a.1'}});
+  });
+
+  it('a slow compute that runs past TTL still resolves callers; no duplicate fetch starts in flight', async () => {
+    // Compute that hangs on a gate; TTL is 10ms. Without the in-flight
+    // guard, the second get() after 10ms would start a duplicate compute,
+    // and the older resolution could clobber the newer cached value.
+    let release: () => void;
+    const gate = new Promise<void>((r) => { release = r; });
+    let calls = 0;
+    let clock = 0;
+    const fetcher = vi.fn(async (id: string) => {
+      calls++;
+      await gate;
+      return {name: `n-${id}`, colorId: `c-${id}`};
+    });
+    const cache = makeCache(['a.1'], fetcher, 10, () => clock);
+    const first = cache.get();
+    clock = 50; // well past ttlMs
+    const second = cache.get();
+    expect(calls).toBe(1);
+    release!();
+    const [a, b] = await Promise.all([first, second]);
+    expect(a).toEqual(b);
+    expect(calls).toBe(1);
+  });
+
+  it('calls onMissingAuthor exactly once per id the fetcher returns falsy for', async () => {
+    const fetcher = vi.fn(async (id: string) =>
+      id === 'a.gone' ? null : {name: `n-${id}`, colorId: 'c'});
+    const onMissing = vi.fn();
+    const cache = makeCache(['a.1', 'a.gone', 'a.2'], fetcher, 5_000, Date.now, onMissing);
+    const data = await cache.get();
+    expect(Object.keys(data).sort()).toEqual(['a.1', 'a.2']);
+    expect(onMissing).toHaveBeenCalledTimes(1);
+    expect(onMissing).toHaveBeenCalledWith('a.gone');
+  });
+
+  it('invalidate() forces the next call to refetch', async () => {
+    const cache = makeCache(['a.1'], getAuthorMock);
+    await cache.get();
+    await cache.get();
+    expect(getAuthorMock).toHaveBeenCalledTimes(1);
+    cache.invalidate();
+    await cache.get();
+    expect(getAuthorMock).toHaveBeenCalledTimes(2);
+  });
+
+  it('a failed fetch clears the cache so the next call retries', async () => {
+    let attempt = 0;
+    const flakyFetcher = vi.fn(async (id: string) => {
+      attempt++;
+      if (attempt === 1) throw new Error('first attempt fails');
+      return {name: `n-${id}`, colorId: 'c'};
+    });
+    const cache = makeCache(['a.1'], flakyFetcher);
+    await expect(cache.get()).rejects.toThrow('first attempt fails');
+    const data = await cache.get();
+    expect(data).toEqual({'a.1': {name: 'n-a.1', colorId: 'c'}});
+  });
+});