Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ RUN npm run build
# BACKEND BUILD STAGE
# ================================
FROM public.ecr.aws/docker/library/eclipse-temurin:25-jdk AS builder
ARG SOURCE_COMMIT=unknown
WORKDIR /app

# 1. Gradle wrapper (rarely changes)
Expand All @@ -51,13 +52,14 @@ COPY --from=frontend-builder /app/src/main/resources/static ./src/main/resources

# 6. Build application with cache mount
RUN --mount=type=cache,target=/root/.gradle \
./gradlew clean build -x test --no-daemon && \
SOURCE_COMMIT="${SOURCE_COMMIT}" ./gradlew clean build -x test --no-daemon && \
cp $(ls build/libs/*.jar | grep -v '\-plain\.jar' | head -n 1) build/app.jar

# ================================
# RUNTIME STAGE
# ================================
FROM public.ecr.aws/docker/library/eclipse-temurin:25-jre AS runtime
ARG SOURCE_COMMIT=unknown

# 1. System packages (never changes) - FIRST for maximum cache reuse
RUN apt-get update && apt-get install -y --no-install-recommends curl \
Expand All @@ -79,6 +81,7 @@ ENV APP_KILL_ON_CONFLICT=false
ENV DOCS_SNAPSHOT_DIR=/app/data/snapshots
ENV DOCS_PARSED_DIR=/app/data/parsed
ENV DOCS_INDEX_DIR=/app/data/index
ENV SOURCE_COMMIT=${SOURCE_COMMIT}

# 5. Application JAR (changes every build) - LAST for optimal caching
COPY --from=builder /app/build/app.jar app.jar
Expand Down
8 changes: 8 additions & 0 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,17 @@ plugins {
val spotbugsToolVersion = "4.9.8"
val pmdToolVersion = "7.20.0"
val palantirVersion = "2.85.0"
val sourceCommitEnvironmentVariable = "SOURCE_COMMIT"
val missingSourceCommit = "unknown"
val sourceCommit = providers.environmentVariable(sourceCommitEnvironmentVariable).orElse(missingSourceCommit)

springBoot {
mainClass.set("com.williamcallahan.javachat.JavaChatApplication")
buildInfo {
properties {
additional.put("commit", sourceCommit)
}
}
}

group = "com.williamcallahan"
Expand Down
15 changes: 14 additions & 1 deletion frontend/src/lib/components/CitationPanel.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@
let { citations, visible = true, panelId }: Props = $props()

let isExpanded = $state(false)
let citationListElement = $state<HTMLUListElement | null>(null)

// The trigger usually sits at the very bottom of a scrollable container
// (chat messages, lesson panel, mobile drawer). The list expands downward,
// so without this the new content renders below the fold and stays hidden.
$effect(() => {
if (isExpanded && citationListElement) {
citationListElement.scrollIntoView({ block: 'nearest' })
}
})

/**
* Simple hash for deterministic ID generation from citation content.
Expand Down Expand Up @@ -75,7 +85,7 @@
</button>

{#if isExpanded}
<ul id={citationListId} class="citation-list" role="list">
<ul id={citationListId} class="citation-list" role="list" bind:this={citationListElement}>
{#each uniqueCitations as citation (citation.url)}
{@const citationType = getCitationType(citation.url)}
{@const displaySource = getDisplaySource(citation.url)}
Expand Down Expand Up @@ -201,6 +211,9 @@
flex-direction: column;
gap: var(--citation-list-gap);
animation: slide-down var(--citation-transition-normal) ease-out;
/* Clearance for scrollIntoView: must exceed the slide-down translateY(-4px)
start offset, since the scroll happens while the entry animation runs. */
scroll-margin-bottom: var(--space-3, 12px);
}

@keyframes slide-down {
Expand Down
65 changes: 65 additions & 0 deletions frontend/src/lib/components/CitationPanel.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import { describe, it, expect, vi, beforeEach } from "vitest";
import { render, fireEvent } from "@testing-library/svelte";
import { tick } from "svelte";
import CitationPanel from "./CitationPanel.svelte";
import type { Citation } from "../services/chat";

const SINGLE_CITATION: Citation[] = [
{
url: "https://example.com/pdfs/think-java.pdf",
title: "Think Java: How to Think Like a Computer Scientist",
snippet: "Think Java 2nd Edition Book",
},
];

describe("CitationPanel", () => {
// The prototype polyfill lives in src/test/setup.ts; spying here captures calls
// from the panel revealing the expanded list inside its scroll container.
const scrollIntoViewSpy = vi.spyOn(HTMLElement.prototype, "scrollIntoView");

beforeEach(() => {
scrollIntoViewSpy.mockClear();
});

it("expands the citation list when the trigger is clicked", async () => {
const { getByRole, container } = render(CitationPanel, {
props: { citations: SINGLE_CITATION },
});

const trigger = getByRole("button", { name: /1 source/i });
expect(container.querySelector(".citation-list")).toBeNull();

await fireEvent.click(trigger);
await tick();

expect(trigger).toHaveAttribute("aria-expanded", "true");
expect(container.querySelector(".citation-list")).not.toBeNull();
});

it("scrolls the expanded list into view so it is never hidden below the fold", async () => {
const { getByRole } = render(CitationPanel, {
props: { citations: SINGLE_CITATION },
});

await fireEvent.click(getByRole("button", { name: /1 source/i }));
await tick();

expect(scrollIntoViewSpy).toHaveBeenCalledWith(expect.objectContaining({ block: "nearest" }));
});

it("does not scroll when collapsing the list", async () => {
const { getByRole } = render(CitationPanel, {
props: { citations: SINGLE_CITATION },
});

const trigger = getByRole("button", { name: /1 source/i });
await fireEvent.click(trigger);
await tick();
scrollIntoViewSpy.mockClear();

await fireEvent.click(trigger);
await tick();

expect(scrollIntoViewSpy).not.toHaveBeenCalled();
});
});
2 changes: 2 additions & 0 deletions frontend/src/lib/components/Header.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
role="tab"
class="nav-tab"
class:active={currentView === 'chat'}
aria-label="Chat"
aria-selected={currentView === 'chat'}
onclick={() => currentView = 'chat'}
>
Expand All @@ -38,6 +39,7 @@
role="tab"
class="nav-tab"
class:active={currentView === 'learn'}
aria-label="Learn"
aria-selected={currentView === 'learn'}
onclick={() => currentView = 'learn'}
>
Expand Down
14 changes: 14 additions & 0 deletions frontend/src/lib/components/Header.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import { describe, expect, it } from "vitest";
import { render } from "@testing-library/svelte";
import Header from "./Header.svelte";

describe("Header navigation accessibility", () => {
it("names icon-only mobile navigation tabs", () => {
const { getByRole } = render(Header, {
props: { currentView: "chat" },
});

expect(getByRole("tab", { name: "Chat" })).toHaveAttribute("aria-selected", "true");
expect(getByRole("tab", { name: "Learn" })).toHaveAttribute("aria-selected", "false");
});
});
7 changes: 7 additions & 0 deletions frontend/src/test/setup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ Object.defineProperty(HTMLElement.prototype, "scrollTo", {
value: () => {},
});

// jsdom doesn't implement scrollIntoView; CitationPanel uses it to reveal the expanded list.
// oxlint-disable-next-line no-extend-native -- jsdom polyfill, not production code
Object.defineProperty(HTMLElement.prototype, "scrollIntoView", {
writable: true,
value: () => {},
});

// requestAnimationFrame is used for post-update DOM adjustments; provide a safe fallback.
if (typeof window.requestAnimationFrame !== "function") {
window.requestAnimationFrame = (callback: FrameRequestCallback) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
* Defines the application's embedding port independent from Spring AI abstractions.
*/
public interface EmbeddingClient {
/** Minimal provider input used by keep-alive probes. */
String EMBEDDING_WARM_UP_PROBE_TEXT = "embedding model warm-up probe";

/**
* Produces one dense embedding vector per input text, preserving input order.
Expand Down Expand Up @@ -37,4 +39,16 @@ default float[] embed(String text) {
* @return embedding vector dimensions
*/
int dimensions();

/**
* Issues a minimal embedding request so the provider keeps its model resident.
*
* <p>Implementations must call their provider-specific request path directly instead
* of delegating to {@link #embed(List)}. The RAG pipeline logging aspect advises
* public {@code embed} executions, so routing scheduled probes around that method
* keeps "STEP 1" pipeline logs scoped to real requests.</p>
*
* @throws EmbeddingServiceUnavailableException when the provider cannot serve the probe
*/
void warmUp();
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package com.williamcallahan.javachat.service;

import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.scheduling.annotation.Scheduled;
Expand Down Expand Up @@ -31,7 +30,7 @@ public class EmbeddingModelKeepAlive {
/** Probe latency above this means the model was cold and a reload just happened. */
private static final long COLD_MODEL_WARN_THRESHOLD_MILLIS = 5_000L;

private static final List<String> KEEP_ALIVE_PROBE_TEXTS = List.of("embedding keep-alive probe");
private static final long NANOS_PER_MILLISECOND = 1_000_000L;

private final EmbeddingClient embeddingClient;

Expand All @@ -49,17 +48,19 @@ public EmbeddingModelKeepAlive(EmbeddingClient embeddingClient) {
* and the next tick retries. Unexpected runtime failures propagate to the scheduler's
* error handler rather than being swallowed here.</p>
*/
@Scheduled(initialDelay = STARTUP_WARMUP_DELAY_MILLIS, fixedDelay = KEEP_ALIVE_INTERVAL_MILLIS)
// fixedRate keeps probe *starts* on the cadence: with fixedDelay a slow cold
// start would push the next probe past the provider's idle-unload TTL.
@Scheduled(initialDelay = STARTUP_WARMUP_DELAY_MILLIS, fixedRate = KEEP_ALIVE_INTERVAL_MILLIS)
public void keepEmbeddingModelWarm() {
long probeStartMillis = System.currentTimeMillis();
long probeStartNanos = System.nanoTime();
try {
embeddingClient.embed(KEEP_ALIVE_PROBE_TEXTS);
embeddingClient.warmUp();
} catch (EmbeddingServiceUnavailableException exception) {
log.warn(
"[EMBEDDING] Keep-alive probe failed; the next chat request may pay a model cold start", exception);
return;
}
long probeDurationMillis = System.currentTimeMillis() - probeStartMillis;
long probeDurationMillis = (System.nanoTime() - probeStartNanos) / NANOS_PER_MILLISECOND;
if (probeDurationMillis > COLD_MODEL_WARN_THRESHOLD_MILLIS) {
log.warn(
"[EMBEDDING] Keep-alive probe took {}ms — embedding model was cold and has been reloaded",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,15 @@ public List<float[]> embed(List<String> texts) {
if (texts == null || texts.isEmpty()) {
return List.of();
}
return fetchValidatedEmbeddings(texts);
}

@Override
public void warmUp() {
fetchValidatedEmbeddings(List.of(EMBEDDING_WARM_UP_PROBE_TEXT));
}

private List<float[]> fetchValidatedEmbeddings(List<String> texts) {
try {
return callEmbeddingApi(texts);
} catch (org.springframework.web.client.RestClientResponseException apiException) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,15 @@ public List<float[]> embed(List<String> texts) {
if (texts == null || texts.isEmpty()) {
return List.of();
}
return createEmbeddings(texts);
}

@Override
public void warmUp() {
createEmbeddings(List.of(EMBEDDING_WARM_UP_PROBE_TEXT));
}

private List<float[]> createEmbeddings(List<String> texts) {
EmbeddingCreateParams.Builder embeddingRequestBuilder =
EmbeddingCreateParams.builder().model(modelName).inputOfArrayOfStrings(texts);
if (supportsDimensionOverride(modelName)) {
Expand Down
4 changes: 4 additions & 0 deletions src/main/resources/application.properties
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ app.cors.max-age-seconds=3600

# Actuator
management.endpoints.web.exposure.include=health,info,metrics
management.info.build.enabled=true
management.info.env.enabled=true
info.application.name=${spring.application.name}
info.deployment.commit=${SOURCE_COMMIT:unknown}
# Reduce Qdrant client warning verbosity via logging
logging.level.io.qdrant=ERROR
# Suppress PDFBox font mapping warnings (these are harmless)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ void keepEmbeddingModelWarmProbesTheEmbeddingProvider() {

new EmbeddingModelKeepAlive(recordingEmbeddingClient).keepEmbeddingModelWarm();

assertEquals(1, recordingEmbeddingClient.embedInvocationCount);
assertEquals(1, recordingEmbeddingClient.warmUpInvocationCount);
}

@Test
Expand All @@ -29,12 +29,16 @@ void keepEmbeddingModelWarmDoesNotPropagateProviderUnavailability() {
}

private static final class RecordingEmbeddingClient implements EmbeddingClient {
private int embedInvocationCount;
private int warmUpInvocationCount;

@Override
public List<float[]> embed(List<String> texts) {
embedInvocationCount++;
return List.of(new float[] {0.0f});
throw new AssertionError("keep-alive probes must not call embed(List)");
}

@Override
public void warmUp() {
warmUpInvocationCount++;
}

@Override
Expand All @@ -49,6 +53,11 @@ public List<float[]> embed(List<String> texts) {
throw new EmbeddingServiceUnavailableException("provider offline for test");
}

@Override
public void warmUp() {
throw new EmbeddingServiceUnavailableException("provider offline for test");
}

@Override
public int dimensions() {
return 1;
Expand Down