diff --git a/nosql-vector-algorithms-typescript/README.md b/nosql-vector-algorithms-typescript/README.md new file mode 100644 index 0000000..8e45682 --- /dev/null +++ b/nosql-vector-algorithms-typescript/README.md @@ -0,0 +1,72 @@ +# Vector Algorithm Comparison — Azure Cosmos DB NoSQL + +Compares **QuantizedFlat** and **DiskANN** vector index algorithms across **cosine**, **dotproduct**, and **euclidean** distance functions using Azure Cosmos DB for NoSQL. + +Creates 6 containers (2 algorithms × 3 distance functions) and runs identical vector searches against each to compare results, RU cost, and latency. + +## Prerequisites + +- [Node.js LTS](https://nodejs.org/) (v20+) +- [Azure CLI](https://learn.microsoft.com/cli/azure/install-azure-cli) +- Azure subscription with access to Azure Cosmos DB and Azure OpenAI + +## Quick Start + +### 1. Create Azure resources + +```bash +az login +bash scripts/create-resources.sh +``` + +### 2. Configure environment + +```bash +cp sample.env .env +# Edit .env with values from create-resources.sh output +``` + +### 3. Install dependencies and run + +```bash +npm install +npm start +``` + +## Available Scripts + +| Script | Description | +|--------|-------------| +| `npm start` | Compare both algorithms with cosine distance | +| `npm run start:quantizedflat` | Run QuantizedFlat with cosine | +| `npm run start:diskann` | Run DiskANN with cosine | +| `npm run start:dotproduct` | Compare both algorithms with dotproduct | +| `npm run start:euclidean` | Compare both algorithms with euclidean | +| `npm run metrics` | Multi-iteration benchmark across all containers | +| `npm run verify` | Validate container setup and vector configuration | +| `npm run delete-data` | Delete all documents from containers | +| `npm run build` | Compile TypeScript | + +## Container Matrix + +| Algorithm | Distance | Container Name | +|-----------|----------|----------------| +| QuantizedFlat | cosine | `hotels_quantizedflat_cosine` | +| QuantizedFlat | dotproduct | `hotels_quantizedflat_dotproduct` | +| QuantizedFlat | euclidean | `hotels_quantizedflat_euclidean` | +| DiskANN | cosine | `hotels_diskann_cosine` | +| DiskANN | dotproduct | `hotels_diskann_dotproduct` | +| DiskANN | euclidean | `hotels_diskann_euclidean` | + +## Environment Variables + +Set `VECTOR_ALGORITHM` and `VECTOR_DISTANCE_FUNCTION` to control which containers are queried: + +- `VECTOR_ALGORITHM`: `all` | `quantizedflat` | `diskann` +- `VECTOR_DISTANCE_FUNCTION`: `all` | `cosine` | `dotproduct` | `euclidean` + +## Clean Up + +```bash +bash scripts/delete-resources.sh +``` diff --git a/nosql-vector-algorithms-typescript/package-lock.json b/nosql-vector-algorithms-typescript/package-lock.json new file mode 100644 index 0000000..a814f98 --- /dev/null +++ b/nosql-vector-algorithms-typescript/package-lock.json @@ -0,0 +1,843 @@ +{ + "name": "nosql-vector-algorithms-typescript", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "nosql-vector-algorithms-typescript", + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "@azure/cosmos": "^4.5.1", + "@azure/identity": "^4.11.1", + "openai": "^5.20.1" + }, + "devDependencies": { + "@types/node": "^24.3.0", + "cross-env": "^10.1.0", + "typescript": "^5.9.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure-rest/core-client": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@azure-rest/core-client/-/core-client-2.5.1.tgz", + "integrity": "sha512-EHaOXW0RYDKS5CFffnixdyRPak5ytiCtU7uXDcP/uiY+A6jFRwNGzzJBiznkCzvi5EYpY+YWinieqHb0oY916A==", + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@azure/core-auth": "^1.10.0", + "@azure/core-rest-pipeline": "^1.22.0", + "@azure/core-tracing": "^1.3.0", + "@typespec/ts-http-runtime": "^0.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/abort-controller": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/@azure/abort-controller/-/abort-controller-2.1.2.tgz", + "integrity": "sha512-nBrLsEWm4J2u5LpAPjxADTlq3trDgVZZXHNKabeXZtpq3d3AbN/KGO82R87rdDz5/lYB024rtEf10/q0urNgsA==", + "license": "MIT", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/core-auth": { + "version": "1.10.1", + "resolved": "https://registry.npmjs.org/@azure/core-auth/-/core-auth-1.10.1.tgz", + "integrity": "sha512-ykRMW8PjVAn+RS6ww5cmK9U2CyH9p4Q88YJwvUslfuMmN98w/2rdGRLPqJYObapBCdzBVeDgYWdJnFPFb7qzpg==", + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@azure/core-util": "^1.13.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/core-client": { + "version": "1.10.1", + "resolved": "https://registry.npmjs.org/@azure/core-client/-/core-client-1.10.1.tgz", + "integrity": "sha512-Nh5PhEOeY6PrnxNPsEHRr9eimxLwgLlpmguQaHKBinFYA/RU9+kOYVOQqOrTsCL+KSxrLLl1gD8Dk5BFW/7l/w==", + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@azure/core-auth": "^1.10.0", + "@azure/core-rest-pipeline": "^1.22.0", + "@azure/core-tracing": "^1.3.0", + "@azure/core-util": "^1.13.0", + "@azure/logger": "^1.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/core-http-compat": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/@azure/core-http-compat/-/core-http-compat-2.3.2.tgz", + "integrity": "sha512-Tf6ltdKzOJEgxZeWLCjMxrxbodB/ZeCbzzA1A2qHbhzAjzjHoBVSUeSl/baT/oHAxhc4qdqVaDKnc2+iE932gw==", + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2" + }, + "engines": { + "node": ">=20.0.0" + }, + "peerDependencies": { + "@azure/core-client": "^1.10.0", + "@azure/core-rest-pipeline": "^1.22.0" + } + }, + "node_modules/@azure/core-lro": { + "version": "2.7.2", + "resolved": "https://registry.npmjs.org/@azure/core-lro/-/core-lro-2.7.2.tgz", + "integrity": "sha512-0YIpccoX8m/k00O7mDDMdJpbr6mf1yWo2dfmxt5A8XVZVVMz2SSKaEbMCeJRvgQ0IaSlqhjT47p4hVIRRy90xw==", + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.0.0", + "@azure/core-util": "^1.2.0", + "@azure/logger": "^1.0.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/core-paging": { + "version": "1.6.2", + "resolved": "https://registry.npmjs.org/@azure/core-paging/-/core-paging-1.6.2.tgz", + "integrity": "sha512-YKWi9YuCU04B55h25cnOYZHxXYtEvQEbKST5vqRga7hWY9ydd3FZHdeQF8pyh+acWZvppw13M/LMGx0LABUVMA==", + "license": "MIT", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/core-rest-pipeline": { + "version": "1.23.0", + "resolved": "https://registry.npmjs.org/@azure/core-rest-pipeline/-/core-rest-pipeline-1.23.0.tgz", + "integrity": "sha512-Evs1INHo+jUjwHi1T6SG6Ua/LHOQBCLuKEEE6efIpt4ZOoNonaT1kP32GoOcdNDbfqsD2445CPri3MubBy5DEQ==", + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@azure/core-auth": "^1.10.0", + "@azure/core-tracing": "^1.3.0", + "@azure/core-util": "^1.13.0", + "@azure/logger": "^1.3.0", + "@typespec/ts-http-runtime": "^0.3.4", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/core-tracing": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/@azure/core-tracing/-/core-tracing-1.3.1.tgz", + "integrity": "sha512-9MWKevR7Hz8kNzzPLfX4EAtGM2b8mr50HPDBvio96bURP/9C+HjdH3sBlLSNNrvRAr5/k/svoH457gB5IKpmwQ==", + "license": "MIT", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/core-util": { + "version": "1.13.1", + "resolved": "https://registry.npmjs.org/@azure/core-util/-/core-util-1.13.1.tgz", + "integrity": "sha512-XPArKLzsvl0Hf0CaGyKHUyVgF7oDnhKoP85Xv6M4StF/1AhfORhZudHtOyf2s+FcbuQ9dPRAjB8J2KvRRMUK2A==", + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@typespec/ts-http-runtime": "^0.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/cosmos": { + "version": "4.9.1", + "resolved": "https://registry.npmjs.org/@azure/cosmos/-/cosmos-4.9.1.tgz", + "integrity": "sha512-fPnfL4JsmJJ/jEYUhlznKfrEr2pMvJwBncGVcUC2Xi7Nlj0MrUMRE+UOrptl/lRV2W7l68Br+b9Ikzm0KiZZHg==", + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@azure/core-auth": "^1.9.0", + "@azure/core-rest-pipeline": "^1.19.1", + "@azure/core-tracing": "^1.2.0", + "@azure/core-util": "^1.11.0", + "@azure/keyvault-keys": "^4.9.0", + "@azure/logger": "^1.1.4", + "fast-json-stable-stringify": "^2.1.0", + "priorityqueuejs": "^2.0.0", + "semaphore": "^1.1.0", + "tslib": "^2.8.1" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/identity": { + "version": "4.13.0", + "resolved": "https://registry.npmjs.org/@azure/identity/-/identity-4.13.0.tgz", + "integrity": "sha512-uWC0fssc+hs1TGGVkkghiaFkkS7NkTxfnCH+Hdg+yTehTpMcehpok4PgUKKdyCH+9ldu6FhiHRv84Ntqj1vVcw==", + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.0.0", + "@azure/core-auth": "^1.9.0", + "@azure/core-client": "^1.9.2", + "@azure/core-rest-pipeline": "^1.17.0", + "@azure/core-tracing": "^1.0.0", + "@azure/core-util": "^1.11.0", + "@azure/logger": "^1.0.0", + "@azure/msal-browser": "^4.2.0", + "@azure/msal-node": "^3.5.0", + "open": "^10.1.0", + "tslib": "^2.2.0" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/keyvault-common": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/@azure/keyvault-common/-/keyvault-common-2.0.0.tgz", + "integrity": "sha512-wRLVaroQtOqfg60cxkzUkGKrKMsCP6uYXAOomOIysSMyt1/YM0eUn9LqieAWM8DLcU4+07Fio2YGpPeqUbpP9w==", + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.0.0", + "@azure/core-auth": "^1.3.0", + "@azure/core-client": "^1.5.0", + "@azure/core-rest-pipeline": "^1.8.0", + "@azure/core-tracing": "^1.0.0", + "@azure/core-util": "^1.10.0", + "@azure/logger": "^1.1.4", + "tslib": "^2.2.0" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/keyvault-keys": { + "version": "4.10.0", + "resolved": "https://registry.npmjs.org/@azure/keyvault-keys/-/keyvault-keys-4.10.0.tgz", + "integrity": "sha512-eDT7iXoBTRZ2n3fLiftuGJFD+yjkiB1GNqzU2KbY1TLYeXeSPVTVgn2eJ5vmRTZ11978jy2Kg2wI7xa9Tyr8ag==", + "license": "MIT", + "dependencies": { + "@azure-rest/core-client": "^2.3.3", + "@azure/abort-controller": "^2.1.2", + "@azure/core-auth": "^1.9.0", + "@azure/core-http-compat": "^2.2.0", + "@azure/core-lro": "^2.7.2", + "@azure/core-paging": "^1.6.2", + "@azure/core-rest-pipeline": "^1.19.0", + "@azure/core-tracing": "^1.2.0", + "@azure/core-util": "^1.11.0", + "@azure/keyvault-common": "^2.0.0", + "@azure/logger": "^1.1.4", + "tslib": "^2.8.1" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/logger": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/@azure/logger/-/logger-1.3.0.tgz", + "integrity": "sha512-fCqPIfOcLE+CGqGPd66c8bZpwAji98tZ4JI9i/mlTNTlsIWslCfpg48s/ypyLxZTump5sypjrKn2/kY7q8oAbA==", + "license": "MIT", + "dependencies": { + "@typespec/ts-http-runtime": "^0.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/msal-browser": { + "version": "4.30.0", + "resolved": "https://registry.npmjs.org/@azure/msal-browser/-/msal-browser-4.30.0.tgz", + "integrity": "sha512-HBBKfbZkMVzzF5bofvS1cXuNHFVc+gt4/HOnCmG/0hsHuZRJvJvDg/+7nTwIpoqvJc8BQp5o23rBUfisOLxR+w==", + "license": "MIT", + "dependencies": { + "@azure/msal-common": "15.17.0" + }, + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/@azure/msal-common": { + "version": "15.17.0", + "resolved": "https://registry.npmjs.org/@azure/msal-common/-/msal-common-15.17.0.tgz", + "integrity": "sha512-VQ5/gTLFADkwue+FohVuCqlzFPUq4xSrX8jeZe+iwZuY6moliNC8xt86qPVNYdtbQfELDf2Nu6LI+demFPHGgw==", + "license": "MIT", + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/@azure/msal-node": { + "version": "3.8.10", + "resolved": "https://registry.npmjs.org/@azure/msal-node/-/msal-node-3.8.10.tgz", + "integrity": "sha512-0Hz7Kx4hs70KZWep/Rd7aw/qOLUF92wUOhn7ZsOuB5xNR/06NL1E2RAI9+UKH1FtvN8nD6mFjH7UKSjv6vOWvQ==", + "license": "MIT", + "dependencies": { + "@azure/msal-common": "15.17.0", + "jsonwebtoken": "^9.0.0", + "uuid": "^8.3.0" + }, + "engines": { + "node": ">=16" + } + }, + "node_modules/@epic-web/invariant": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@epic-web/invariant/-/invariant-1.0.0.tgz", + "integrity": "sha512-lrTPqgvfFQtR/eY/qkIzp98OGdNJu0m5ji3q/nJI8v3SXkRKEnWiOxMmbvcSoAIzv/cGiuvRy57k4suKQSAdwA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/node": { + "version": "24.12.0", + "resolved": "https://registry.npmjs.org/@types/node/-/node-24.12.0.tgz", + "integrity": "sha512-GYDxsZi3ChgmckRT9HPU0WEhKLP08ev/Yfcq2AstjrDASOYCSXeyjDsHg4v5t4jOj7cyDX3vmprafKlWIG9MXQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~7.16.0" + } + }, + "node_modules/@typespec/ts-http-runtime": { + "version": "0.3.4", + "resolved": "https://registry.npmjs.org/@typespec/ts-http-runtime/-/ts-http-runtime-0.3.4.tgz", + "integrity": "sha512-CI0NhTrz4EBaa0U+HaaUZrJhPoso8sG7ZFya8uQoBA57fjzrjRSv87ekCjLZOFExN+gXE/z0xuN2QfH4H2HrLQ==", + "license": "MIT", + "dependencies": { + "http-proxy-agent": "^7.0.0", + "https-proxy-agent": "^7.0.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/agent-base": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", + "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, + "node_modules/buffer-equal-constant-time": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz", + "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==", + "license": "BSD-3-Clause" + }, + "node_modules/bundle-name": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/bundle-name/-/bundle-name-4.1.0.tgz", + "integrity": "sha512-tjwM5exMg6BGRI+kNmTntNsvdZS1X8BFYS6tnJ2hdH0kVxM6/eVZ2xy+FqStSWvYmtfFMDLIxurorHwDKfDz5Q==", + "license": "MIT", + "dependencies": { + "run-applescript": "^7.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/cross-env": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/cross-env/-/cross-env-10.1.0.tgz", + "integrity": "sha512-GsYosgnACZTADcmEyJctkJIoqAhHjttw7RsFrVoJNXbsWWqaq6Ym+7kZjq6mS45O0jij6vtiReppKQEtqWy6Dw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@epic-web/invariant": "^1.0.0", + "cross-spawn": "^7.0.6" + }, + "bin": { + "cross-env": "dist/bin/cross-env.js", + "cross-env-shell": "dist/bin/cross-env-shell.js" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/cross-spawn": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "dev": true, + "license": "MIT", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/default-browser": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/default-browser/-/default-browser-5.5.0.tgz", + "integrity": "sha512-H9LMLr5zwIbSxrmvikGuI/5KGhZ8E2zH3stkMgM5LpOWDutGM2JZaj460Udnf1a+946zc7YBgrqEWwbk7zHvGw==", + "license": "MIT", + "dependencies": { + "bundle-name": "^4.1.0", + "default-browser-id": "^5.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/default-browser-id": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/default-browser-id/-/default-browser-id-5.0.1.tgz", + "integrity": "sha512-x1VCxdX4t+8wVfd1so/9w+vQ4vx7lKd2Qp5tDRutErwmR85OgmfX7RlLRMWafRMY7hbEiXIbudNrjOAPa/hL8Q==", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/define-lazy-prop": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/define-lazy-prop/-/define-lazy-prop-3.0.0.tgz", + "integrity": "sha512-N+MeXYoqr3pOgn8xfyRPREN7gHakLYjhsHhWGT3fWAiL4IkAt0iDw14QiiEm2bE30c5XX5q0FtAA3CK5f9/BUg==", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/ecdsa-sig-formatter": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", + "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==", + "license": "Apache-2.0", + "dependencies": { + "safe-buffer": "^5.0.1" + } + }, + "node_modules/fast-json-stable-stringify": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", + "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", + "license": "MIT" + }, + "node_modules/http-proxy-agent": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", + "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.0", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/is-docker": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-docker/-/is-docker-3.0.0.tgz", + "integrity": "sha512-eljcgEDlEns/7AXFosB5K/2nCM4P7FQPkGc/DWLy5rmFEWvZayGrik1d9/QIY5nJ4f9YsVvBkA6kJpHn9rISdQ==", + "license": "MIT", + "bin": { + "is-docker": "cli.js" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-inside-container": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-inside-container/-/is-inside-container-1.0.0.tgz", + "integrity": "sha512-KIYLCCJghfHZxqjYBE7rEy0OBuTd5xCHS7tHVgvCLkx7StIoaxwNW3hCALgEUjFfeRk+MG/Qxmp/vtETEF3tRA==", + "license": "MIT", + "dependencies": { + "is-docker": "^3.0.0" + }, + "bin": { + "is-inside-container": "cli.js" + }, + "engines": { + "node": ">=14.16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-wsl": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/is-wsl/-/is-wsl-3.1.1.tgz", + "integrity": "sha512-e6rvdUCiQCAuumZslxRJWR/Doq4VpPR82kqclvcS0efgt430SlGIk05vdCN58+VrzgtIcfNODjozVielycD4Sw==", + "license": "MIT", + "dependencies": { + "is-inside-container": "^1.0.0" + }, + "engines": { + "node": ">=16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "dev": true, + "license": "ISC" + }, + "node_modules/jsonwebtoken": { + "version": "9.0.3", + "resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.3.tgz", + "integrity": "sha512-MT/xP0CrubFRNLNKvxJ2BYfy53Zkm++5bX9dtuPbqAeQpTVe0MQTFhao8+Cp//EmJp244xt6Drw/GVEGCUj40g==", + "license": "MIT", + "dependencies": { + "jws": "^4.0.1", + "lodash.includes": "^4.3.0", + "lodash.isboolean": "^3.0.3", + "lodash.isinteger": "^4.0.4", + "lodash.isnumber": "^3.0.3", + "lodash.isplainobject": "^4.0.6", + "lodash.isstring": "^4.0.1", + "lodash.once": "^4.0.0", + "ms": "^2.1.1", + "semver": "^7.5.4" + }, + "engines": { + "node": ">=12", + "npm": ">=6" + } + }, + "node_modules/jwa": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.1.tgz", + "integrity": "sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==", + "license": "MIT", + "dependencies": { + "buffer-equal-constant-time": "^1.0.1", + "ecdsa-sig-formatter": "1.0.11", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/jws": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/jws/-/jws-4.0.1.tgz", + "integrity": "sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA==", + "license": "MIT", + "dependencies": { + "jwa": "^2.0.1", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/lodash.includes": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/lodash.includes/-/lodash.includes-4.3.0.tgz", + "integrity": "sha512-W3Bx6mdkRTGtlJISOvVD/lbqjTlPPUDTMnlXZFnVwi9NKJ6tiAk6LVdlhZMm17VZisqhKcgzpO5Wz91PCt5b0w==", + "license": "MIT" + }, + "node_modules/lodash.isboolean": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/lodash.isboolean/-/lodash.isboolean-3.0.3.tgz", + "integrity": "sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg==", + "license": "MIT" + }, + "node_modules/lodash.isinteger": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/lodash.isinteger/-/lodash.isinteger-4.0.4.tgz", + "integrity": "sha512-DBwtEWN2caHQ9/imiNeEA5ys1JoRtRfY3d7V9wkqtbycnAmTvRRmbHKDV4a0EYc678/dia0jrte4tjYwVBaZUA==", + "license": "MIT" + }, + "node_modules/lodash.isnumber": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/lodash.isnumber/-/lodash.isnumber-3.0.3.tgz", + "integrity": "sha512-QYqzpfwO3/CWf3XP+Z+tkQsfaLL/EnUlXWVkIk5FUPc4sBdTehEqZONuyRt2P67PXAk+NXmTBcc97zw9t1FQrw==", + "license": "MIT" + }, + "node_modules/lodash.isplainobject": { + "version": "4.0.6", + "resolved": "https://registry.npmjs.org/lodash.isplainobject/-/lodash.isplainobject-4.0.6.tgz", + "integrity": "sha512-oSXzaWypCMHkPC3NvBEaPHf0KsA5mvPrOPgQWDsbg8n7orZ290M0BmC/jgRZ4vcJ6DTAhjrsSYgdsW/F+MFOBA==", + "license": "MIT" + }, + "node_modules/lodash.isstring": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/lodash.isstring/-/lodash.isstring-4.0.1.tgz", + "integrity": "sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw==", + "license": "MIT" + }, + "node_modules/lodash.once": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/lodash.once/-/lodash.once-4.1.1.tgz", + "integrity": "sha512-Sb487aTOCr9drQVL8pIxOzVhafOjZN9UU54hiN8PU3uAiSV7lx1yYNpbNmex2PK6dSJoNTSJUUswT651yww3Mg==", + "license": "MIT" + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, + "node_modules/open": { + "version": "10.2.0", + "resolved": "https://registry.npmjs.org/open/-/open-10.2.0.tgz", + "integrity": "sha512-YgBpdJHPyQ2UE5x+hlSXcnejzAvD0b22U2OuAP+8OnlJT+PjWPxtgmGqKKc+RgTM63U9gN0YzrYc71R2WT/hTA==", + "license": "MIT", + "dependencies": { + "default-browser": "^5.2.1", + "define-lazy-prop": "^3.0.0", + "is-inside-container": "^1.0.0", + "wsl-utils": "^0.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/openai": { + "version": "5.23.2", + "resolved": "https://registry.npmjs.org/openai/-/openai-5.23.2.tgz", + "integrity": "sha512-MQBzmTulj+MM5O8SKEk/gL8a7s5mktS9zUtAkU257WjvobGc9nKcBuVwjyEEcb9SI8a8Y2G/mzn3vm9n1Jlleg==", + "license": "Apache-2.0", + "bin": { + "openai": "bin/cli" + }, + "peerDependencies": { + "ws": "^8.18.0", + "zod": "^3.23.8" + }, + "peerDependenciesMeta": { + "ws": { + "optional": true + }, + "zod": { + "optional": true + } + } + }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/priorityqueuejs": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/priorityqueuejs/-/priorityqueuejs-2.0.0.tgz", + "integrity": "sha512-19BMarhgpq3x4ccvVi8k2QpJZcymo/iFUcrhPd4V96kYGovOdTsWwy7fxChYi4QY+m2EnGBWSX9Buakz+tWNQQ==", + "license": "MIT" + }, + "node_modules/run-applescript": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/run-applescript/-/run-applescript-7.1.0.tgz", + "integrity": "sha512-DPe5pVFaAsinSaV6QjQ6gdiedWDcRCbUuiQfQa2wmWV7+xC9bGulGI8+TdRmoFkAPaBXk8CrAbnlY2ISniJ47Q==", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/semaphore": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/semaphore/-/semaphore-1.1.0.tgz", + "integrity": "sha512-O4OZEaNtkMd/K0i6js9SL+gqy0ZCBMgUvlSqHKi4IBdjhe7wB8pwztUk1BbZ1fmrvpwFrPbHzqd2w5pTcJH6LA==", + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/semver": { + "version": "7.7.4", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", + "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, + "license": "MIT", + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/tslib": { + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", + "license": "0BSD" + }, + "node_modules/typescript": { + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "7.16.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz", + "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==", + "dev": true, + "license": "MIT" + }, + "node_modules/uuid": { + "version": "8.3.2", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", + "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==", + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" + } + }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dev": true, + "license": "ISC", + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/wsl-utils": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/wsl-utils/-/wsl-utils-0.1.0.tgz", + "integrity": "sha512-h3Fbisa2nKGPxCpm89Hk33lBLsnaGBvctQopaBSOW/uIs6FTe1ATyAnKFJrzVs9vpGdsTe73WF3V4lIsk4Gacw==", + "license": "MIT", + "dependencies": { + "is-wsl": "^3.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + } + } +} diff --git a/nosql-vector-algorithms-typescript/package.json b/nosql-vector-algorithms-typescript/package.json new file mode 100644 index 0000000..b458bdc --- /dev/null +++ b/nosql-vector-algorithms-typescript/package.json @@ -0,0 +1,31 @@ +{ + "name": "nosql-vector-algorithms-typescript", + "version": "1.0.0", + "main": "dist/index.js", + "type": "module", + "license": "MIT", + "scripts": { + "build": "tsc", + "start": "tsc && cross-env VECTOR_ALGORITHM=all VECTOR_DISTANCE_FUNCTION=cosine node --env-file .env dist/vector-algorithms.js", + "start:quantizedflat": "tsc && cross-env VECTOR_ALGORITHM=quantizedflat VECTOR_DISTANCE_FUNCTION=cosine node --env-file .env dist/vector-algorithms.js", + "start:diskann": "tsc && cross-env VECTOR_ALGORITHM=diskann VECTOR_DISTANCE_FUNCTION=cosine node --env-file .env dist/vector-algorithms.js", + "start:dotproduct": "tsc && cross-env VECTOR_ALGORITHM=all VECTOR_DISTANCE_FUNCTION=dotproduct node --env-file .env dist/vector-algorithms.js", + "start:euclidean": "tsc && cross-env VECTOR_ALGORITHM=all VECTOR_DISTANCE_FUNCTION=euclidean node --env-file .env dist/vector-algorithms.js", + "metrics": "tsc && node --env-file .env dist/other/query-metrics.js", + "verify": "tsc && node --env-file .env dist/other/verify-vector-setup.js", + "delete-data": "tsc && node --env-file .env dist/other/delete-all-documents.js" + }, + "dependencies": { + "@azure/cosmos": "^4.5.1", + "@azure/identity": "^4.11.1", + "openai": "^5.20.1" + }, + "devDependencies": { + "@types/node": "^24.3.0", + "cross-env": "^10.1.0", + "typescript": "^5.9.2" + }, + "engines": { + "node": ">=20.0.0" + } +} diff --git a/nosql-vector-algorithms-typescript/sample.env b/nosql-vector-algorithms-typescript/sample.env new file mode 100644 index 0000000..25b6cb3 --- /dev/null +++ b/nosql-vector-algorithms-typescript/sample.env @@ -0,0 +1,23 @@ +# Identity for local developer authentication with Azure CLI +AZURE_TOKEN_CREDENTIALS=AzureCliCredential + +# Azure Cosmos DB +AZURE_COSMOSDB_ENDPOINT="YOUR_COSMOS_DB_ENDPOINT" +AZURE_COSMOSDB_DATABASENAME="Hotels" + +# Azure OpenAI Service +AZURE_OPENAI_EMBEDDING_ENDPOINT="YOUR_AZURE_OPENAI_ENDPOINT" +AZURE_OPENAI_EMBEDDING_MODEL="text-embedding-3-small" +AZURE_OPENAI_EMBEDDING_API_VERSION="2024-08-01-preview" + +# Vector configuration +EMBEDDED_FIELD="DescriptionVector" +VECTOR_ALGORITHM="all" +VECTOR_DISTANCE_FUNCTION="cosine" + +# Data file +DATA_FILE_WITH_VECTORS="../data/HotelsData_toCosmosDB_Vector.json" + +# Benchmark configuration (optional) +BENCHMARK_ITERATIONS="5" +BENCHMARK_TOP_K="5" diff --git a/nosql-vector-algorithms-typescript/scripts/create-resources.sh b/nosql-vector-algorithms-typescript/scripts/create-resources.sh new file mode 100644 index 0000000..f7b893d --- /dev/null +++ b/nosql-vector-algorithms-typescript/scripts/create-resources.sh @@ -0,0 +1,146 @@ +#!/bin/bash +# Create Azure resources for vector algorithm comparison sample +# Creates: Resource Group, Cosmos DB (serverless), 6 containers (2 algorithms × 3 distance functions), Azure OpenAI +set -euo pipefail + +RESOURCE_GROUP=${RESOURCE_GROUP:-"rg-cosmos-vector-algorithms"} +LOCATION=${LOCATION:-"eastus2"} +COSMOSDB_ACCOUNT=${COSMOSDB_ACCOUNT:-"db-vector-$(openssl rand -hex 4)"} +DATABASE_NAME=${DATABASE_NAME:-"Hotels"} +OPENAI_ACCOUNT=${OPENAI_ACCOUNT:-"openai-vector-$(openssl rand -hex 4)"} +EMBEDDING_MODEL="text-embedding-3-small" +EMBEDDING_DIMENSIONS=1536 +PARTITION_KEY="/HotelId" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +echo "=== Vector Algorithm Comparison — Resource Setup ===" +echo "Resource Group: $RESOURCE_GROUP" +echo "Location: $LOCATION" +echo "Cosmos DB: $COSMOSDB_ACCOUNT" +echo "OpenAI: $OPENAI_ACCOUNT" +echo "" + +# 1. Resource Group +echo "Creating resource group..." +az group create --name "$RESOURCE_GROUP" --location "$LOCATION" --output none + +# 2. Azure Cosmos DB account (serverless) +echo "Creating Cosmos DB account (serverless)..." +az cosmosdb create \ + --name "$COSMOSDB_ACCOUNT" \ + --resource-group "$RESOURCE_GROUP" \ + --capabilities EnableServerless \ + --kind GlobalDocumentDB \ + --output none + +# 3. Database +echo "Creating database: $DATABASE_NAME..." +az cosmosdb sql database create \ + --account-name "$COSMOSDB_ACCOUNT" \ + --resource-group "$RESOURCE_GROUP" \ + --name "$DATABASE_NAME" \ + --output none + +# 4. Create 6 containers (2 algorithms × 3 distance functions) +ALGORITHMS=("quantizedflat" "diskann") +ALGORITHM_TYPES=("quantizedFlat" "diskANN") +DISTANCE_FUNCTIONS=("cosine" "dotproduct" "euclidean") + +for i in "${!ALGORITHMS[@]}"; do + alg="${ALGORITHMS[$i]}" + alg_type="${ALGORITHM_TYPES[$i]}" + + for dist in "${DISTANCE_FUNCTIONS[@]}"; do + container_name="hotels_${alg}_${dist}" + echo "Creating container: $container_name (algorithm: $alg_type, distance: $dist)..." + + # Use the pre-defined index policy file + index_policy_file="${SCRIPT_DIR}/policies/${alg}-index-policy.json" + + # Generate vector embedding policy dynamically + vector_embedding_policy=$(cat </dev/null || echo " Cosmos DB role assignment already exists" + +# Cognitive Services OpenAI User +az role assignment create \ + --assignee "$CURRENT_USER_ID" \ + --role "Cognitive Services OpenAI User" \ + --scope "$( az cognitiveservices account show --name "$OPENAI_ACCOUNT" --resource-group "$RESOURCE_GROUP" --query id -o tsv)" \ + --output none 2>/dev/null || echo " OpenAI role assignment already exists" + +# Print connection info +COSMOS_ENDPOINT=$(az cosmosdb show --name "$COSMOSDB_ACCOUNT" --resource-group "$RESOURCE_GROUP" --query documentEndpoint -o tsv) +OPENAI_ENDPOINT=$(az cognitiveservices account show --name "$OPENAI_ACCOUNT" --resource-group "$RESOURCE_GROUP" --query "properties.endpoint" -o tsv) + +echo "" +echo "=== Setup Complete ===" +echo "" +echo "Add these to your .env file:" +echo "" +echo "AZURE_COSMOSDB_ENDPOINT=\"$COSMOS_ENDPOINT\"" +echo "AZURE_COSMOSDB_DATABASENAME=\"$DATABASE_NAME\"" +echo "AZURE_OPENAI_EMBEDDING_ENDPOINT=\"$OPENAI_ENDPOINT\"" +echo "AZURE_OPENAI_EMBEDDING_MODEL=\"$EMBEDDING_MODEL\"" +echo "AZURE_OPENAI_EMBEDDING_API_VERSION=\"2024-08-01-preview\"" diff --git a/nosql-vector-algorithms-typescript/scripts/delete-resources.sh b/nosql-vector-algorithms-typescript/scripts/delete-resources.sh new file mode 100644 index 0000000..ab8a69b --- /dev/null +++ b/nosql-vector-algorithms-typescript/scripts/delete-resources.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# Delete all Azure resources for vector algorithm comparison sample +set -euo pipefail + +RESOURCE_GROUP=${RESOURCE_GROUP:-"rg-cosmos-vector-algorithms"} + +echo "Deleting resource group: $RESOURCE_GROUP" +echo "This will delete ALL resources in the group." +echo "" + +az group delete --name "$RESOURCE_GROUP" --yes --no-wait + +echo "Resource group deletion initiated (running in background)." diff --git a/nosql-vector-algorithms-typescript/scripts/policies/diskann-index-policy.json b/nosql-vector-algorithms-typescript/scripts/policies/diskann-index-policy.json new file mode 100644 index 0000000..468de15 --- /dev/null +++ b/nosql-vector-algorithms-typescript/scripts/policies/diskann-index-policy.json @@ -0,0 +1,11 @@ +{ + "indexingMode": "consistent", + "includedPaths": [{"path": "/*"}], + "excludedPaths": [{"path": "/DescriptionVector/*"}], + "vectorIndexes": [ + { + "path": "/DescriptionVector", + "type": "diskANN" + } + ] +} diff --git a/nosql-vector-algorithms-typescript/scripts/policies/quantizedflat-index-policy.json b/nosql-vector-algorithms-typescript/scripts/policies/quantizedflat-index-policy.json new file mode 100644 index 0000000..90a0e0b --- /dev/null +++ b/nosql-vector-algorithms-typescript/scripts/policies/quantizedflat-index-policy.json @@ -0,0 +1,11 @@ +{ + "indexingMode": "consistent", + "includedPaths": [{"path": "/*"}], + "excludedPaths": [{"path": "/DescriptionVector/*"}], + "vectorIndexes": [ + { + "path": "/DescriptionVector", + "type": "quantizedFlat" + } + ] +} diff --git a/nosql-vector-algorithms-typescript/src/other/delete-all-documents.ts b/nosql-vector-algorithms-typescript/src/other/delete-all-documents.ts new file mode 100644 index 0000000..7ccb7fc --- /dev/null +++ b/nosql-vector-algorithms-typescript/src/other/delete-all-documents.ts @@ -0,0 +1,105 @@ +import { getClientsPasswordless, getBulkOperationRUs } from '../utils.js'; +import { BulkOperationType } from '@azure/cosmos'; + +type VectorAlgorithm = 'quantizedflat' | 'diskann'; +type DistanceFunction = 'cosine' | 'dotproduct' | 'euclidean'; + +const ALGORITHMS: VectorAlgorithm[] = ['quantizedflat', 'diskann']; +const DISTANCE_FUNCTIONS: DistanceFunction[] = ['cosine', 'dotproduct', 'euclidean']; + +function getTargetContainers( + algorithmEnv: string, + distanceEnv: string +): string[] { + const algorithms: VectorAlgorithm[] = + algorithmEnv === 'all' ? ALGORITHMS : [algorithmEnv as VectorAlgorithm]; + const distances: DistanceFunction[] = + distanceEnv === 'all' ? DISTANCE_FUNCTIONS : [distanceEnv as DistanceFunction]; + + const containers: string[] = []; + for (const alg of algorithms) { + for (const dist of distances) { + containers.push(`hotels_${alg}_${dist}`); + } + } + return containers; +} + +async function main() { + const { dbClient } = getClientsPasswordless(); + + if (!dbClient) { + throw new Error('Database client is not configured. Please check your environment variables.'); + } + + const dbName = process.env.AZURE_COSMOSDB_DATABASENAME || 'Hotels'; + const algorithmEnv = (process.env.VECTOR_ALGORITHM || 'all').trim().toLowerCase(); + const distanceEnv = (process.env.VECTOR_DISTANCE_FUNCTION || 'all').trim().toLowerCase(); + + const containers = getTargetContainers(algorithmEnv, distanceEnv); + + const database = dbClient.database(dbName); + console.log(`Connected to database: ${dbName}`); + console.log(`Containers to clean: ${containers.join(', ')}\n`); + + let totalDeleted = 0; + let totalFailed = 0; + + for (const containerName of containers) { + try { + const container = database.container(containerName); + console.log(`Deleting documents from container: ${containerName}`); + + const { resources } = await container.items + .query('SELECT c.id, c.HotelId FROM c') + .fetchAll(); + + if (resources.length === 0) { + console.log(` No documents found in ${containerName}`); + continue; + } + + console.log(` Found ${resources.length} documents to delete`); + + const operations = resources.map((doc: any) => ({ + operationType: BulkOperationType.Delete, + id: doc.id, + partitionKey: [doc.HotelId], + })); + + const response = await container.items.executeBulkOperations(operations); + + let deleted = 0; + let failed = 0; + if (response) { + response.forEach((result: any) => { + if (result.statusCode >= 200 && result.statusCode < 300) { + deleted++; + } else { + failed++; + } + }); + } + + const totalRequestCharge = getBulkOperationRUs(response); + + console.log(` ✓ Deleted: ${deleted}, Failed: ${failed}`); + console.log(` Delete Request Charge: ${totalRequestCharge.toFixed(2)} RUs`); + totalDeleted += deleted; + totalFailed += failed; + } catch (error) { + if ((error as any).code === 404) { + console.log(` Container '${containerName}' not found — skipping.`); + } else { + console.error(` Error deleting from ${containerName}:`, (error as Error).message); + } + } + } + + console.log(`\nSummary: Total Deleted: ${totalDeleted}, Total Failed: ${totalFailed}`); +} + +main().catch((error) => { + console.error('Unexpected error:', error); + process.exitCode = 1; +}); diff --git a/nosql-vector-algorithms-typescript/src/other/query-metrics.ts b/nosql-vector-algorithms-typescript/src/other/query-metrics.ts new file mode 100644 index 0000000..8093c18 --- /dev/null +++ b/nosql-vector-algorithms-typescript/src/other/query-metrics.ts @@ -0,0 +1,176 @@ +import { getClientsPasswordless, validateFieldName, getQueryActivityId } from '../utils.js'; + +type VectorAlgorithm = 'quantizedflat' | 'diskann'; +type DistanceFunction = 'cosine' | 'dotproduct' | 'euclidean'; + +const ALGORITHMS: VectorAlgorithm[] = ['quantizedflat', 'diskann']; +const DISTANCE_FUNCTIONS: DistanceFunction[] = ['cosine', 'dotproduct', 'euclidean']; + +const ALGORITHM_LABELS: Record = { + quantizedflat: 'QuantizedFlat', + diskann: 'DiskANN', +}; + +function getTargetContainers( + algorithmEnv: string, + distanceEnv: string +): Array<{ containerName: string; algorithm: VectorAlgorithm; distanceFunction: DistanceFunction }> { + const algorithms: VectorAlgorithm[] = + algorithmEnv === 'all' ? ALGORITHMS : [algorithmEnv as VectorAlgorithm]; + const distances: DistanceFunction[] = + distanceEnv === 'all' ? DISTANCE_FUNCTIONS : [distanceEnv as DistanceFunction]; + + const targets: Array<{ containerName: string; algorithm: VectorAlgorithm; distanceFunction: DistanceFunction }> = []; + for (const alg of algorithms) { + for (const dist of distances) { + targets.push({ + containerName: `hotels_${alg}_${dist}`, + algorithm: alg, + distanceFunction: dist, + }); + } + } + return targets; +} + +interface BenchmarkResult { + containerName: string; + algorithm: string; + distanceFunction: string; + avgLatencyMs: number; + avgRU: number; + resultCount: number; +} + +async function main() { + const { aiClient, dbClient } = getClientsPasswordless(); + + if (!aiClient) { + throw new Error('Azure OpenAI client is not configured. Please check your environment variables.'); + } + if (!dbClient) { + throw new Error('Cosmos DB client is not configured. Please check your environment variables.'); + } + + const dbName = process.env.AZURE_COSMOSDB_DATABASENAME || 'Hotels'; + const embeddedField = process.env.EMBEDDED_FIELD || 'DescriptionVector'; + const deployment = process.env.AZURE_OPENAI_EMBEDDING_MODEL!; + const algorithmEnv = (process.env.VECTOR_ALGORITHM || 'all').trim().toLowerCase(); + const distanceEnv = (process.env.VECTOR_DISTANCE_FUNCTION || 'all').trim().toLowerCase(); + const iterations = parseInt(process.env.BENCHMARK_ITERATIONS || '5', 10); + const topK = parseInt(process.env.BENCHMARK_TOP_K || '5', 10); + const searchQuery = 'quintessential lodging near running trails, eateries, retail'; + + const targets = getTargetContainers(algorithmEnv, distanceEnv); + + console.log(`\n📊 Vector Algorithm Benchmark`); + console.log(` Iterations: ${iterations} (first discarded as cold start)`); + console.log(` Top K: ${topK}`); + console.log(` Containers: ${targets.map(t => t.containerName).join(', ')}\n`); + + const database = dbClient.database(dbName); + + // Generate query embedding once + const embeddingResponse = await aiClient.embeddings.create({ + model: deployment, + input: [searchQuery], + }); + const queryEmbedding = embeddingResponse.data[0].embedding; + + const safeEmbeddedField = validateFieldName(embeddedField); + const queryText = `SELECT TOP ${topK} c.HotelName, c.Description, c.Rating, VectorDistance(c.${safeEmbeddedField}, @embedding) AS SimilarityScore FROM c ORDER BY VectorDistance(c.${safeEmbeddedField}, @embedding)`; + + const benchmarkResults: BenchmarkResult[] = []; + + for (const target of targets) { + console.log(`\n━━━ Benchmarking: ${ALGORITHM_LABELS[target.algorithm]} / ${target.distanceFunction} ━━━`); + + try { + const container = database.container(target.containerName); + await container.read(); + + const latencies: number[] = []; + const rus: number[] = []; + let resultCount = 0; + + for (let i = 0; i < iterations; i++) { + const startTime = Date.now(); + + const queryResponse = await container.items + .query({ + query: queryText, + parameters: [ + { name: '@embedding', value: queryEmbedding }, + ], + }) + .fetchAll(); + + const latencyMs = Date.now() - startTime; + const activityId = getQueryActivityId(queryResponse); + + console.log(` Iteration ${i + 1}: ${latencyMs}ms, ${queryResponse.requestCharge?.toFixed(2)} RUs${i === 0 ? ' (cold start — excluded)' : ''}${activityId ? `, activity: ${activityId}` : ''}`); + + // Discard first iteration (cold start) + if (i > 0) { + latencies.push(latencyMs); + rus.push(queryResponse.requestCharge ?? 0); + } + resultCount = queryResponse.resources?.length ?? 0; + } + + const avgLatency = latencies.length > 0 ? latencies.reduce((a, b) => a + b, 0) / latencies.length : 0; + const avgRU = rus.length > 0 ? rus.reduce((a, b) => a + b, 0) / rus.length : 0; + + benchmarkResults.push({ + containerName: target.containerName, + algorithm: ALGORITHM_LABELS[target.algorithm], + distanceFunction: target.distanceFunction, + avgLatencyMs: avgLatency, + avgRU: avgRU, + resultCount, + }); + } catch (error) { + if ((error as any).code === 404) { + console.error(` ✗ Container '${target.containerName}' not found.`); + } else { + console.error(` ✗ Error:`, (error as Error).message); + } + } + } + + // Print comparison table + if (benchmarkResults.length > 0) { + console.log('\n╔══════════════════════════════════════════════════════════════════════╗'); + console.log('║ Benchmark Comparison Results ║'); + console.log('╠══════════════════════════════════════════════════════════════════════╣'); + console.log( + '║ ' + + 'Algorithm'.padEnd(16) + + 'Distance'.padEnd(14) + + 'Avg Latency'.padEnd(14) + + 'Avg RU'.padEnd(12) + + 'Results'.padEnd(10) + + '║' + ); + console.log('╠══════════════════════════════════════════════════════════════════════╣'); + + for (const r of benchmarkResults) { + console.log( + '║ ' + + r.algorithm.padEnd(16) + + r.distanceFunction.padEnd(14) + + `${r.avgLatencyMs.toFixed(0)}ms`.padEnd(14) + + r.avgRU.toFixed(2).padEnd(12) + + String(r.resultCount).padEnd(10) + + '║' + ); + } + + console.log('╚══════════════════════════════════════════════════════════════════════╝'); + } +} + +main().catch((error) => { + console.error('Benchmark failed:', error); + process.exitCode = 1; +}); diff --git a/nosql-vector-algorithms-typescript/src/other/verify-vector-setup.ts b/nosql-vector-algorithms-typescript/src/other/verify-vector-setup.ts new file mode 100644 index 0000000..7ba843b --- /dev/null +++ b/nosql-vector-algorithms-typescript/src/other/verify-vector-setup.ts @@ -0,0 +1,382 @@ +/** + * Verify Vector Setup + * + * Validates that the Azure Cosmos DB vector algorithm comparison infrastructure + * is configured correctly by checking: + * + * 1. Environment variables — all required settings are present + * 2. Client initialization — Azure OpenAI and Cosmos DB clients connect + * 3. Embedding model — model responds and returns expected dimensions + * 4. Container access — each expected container exists and is accessible + * 5. Container policies — vectorEmbeddingPolicy and indexingPolicy are aligned + * 6. Stored vectors — documents contain vectors with correct dimensions + * 7. Vector search — VectorDistance returns non-null, ordered scores + */ +import { getClientsPasswordless, validateFieldName } from '../utils.js'; + +type VectorAlgorithm = 'quantizedflat' | 'diskann'; +type DistanceFunction = 'cosine' | 'dotproduct' | 'euclidean'; + +const ALGORITHMS: VectorAlgorithm[] = ['quantizedflat', 'diskann']; +const DISTANCE_FUNCTIONS: DistanceFunction[] = ['cosine', 'dotproduct', 'euclidean']; + +function getTargetContainers( + algorithmEnv: string, + distanceEnv: string +): Array<{ containerName: string; algorithm: VectorAlgorithm; distanceFunction: DistanceFunction }> { + const algorithms: VectorAlgorithm[] = + algorithmEnv === 'all' ? ALGORITHMS : [algorithmEnv as VectorAlgorithm]; + const distances: DistanceFunction[] = + distanceEnv === 'all' ? DISTANCE_FUNCTIONS : [distanceEnv as DistanceFunction]; + + const targets: Array<{ containerName: string; algorithm: VectorAlgorithm; distanceFunction: DistanceFunction }> = []; + for (const alg of algorithms) { + for (const dist of distances) { + targets.push({ + containerName: `hotels_${alg}_${dist}`, + algorithm: alg, + distanceFunction: dist, + }); + } + } + return targets; +} + +interface CheckResult { + name: string; + passed: boolean; + message: string; + details?: string; +} + +function checkEnvironmentVariables(): CheckResult { + const required: Record = { + AZURE_COSMOSDB_ENDPOINT: process.env.AZURE_COSMOSDB_ENDPOINT, + AZURE_OPENAI_EMBEDDING_ENDPOINT: process.env.AZURE_OPENAI_EMBEDDING_ENDPOINT, + AZURE_OPENAI_EMBEDDING_MODEL: process.env.AZURE_OPENAI_EMBEDDING_MODEL, + AZURE_OPENAI_EMBEDDING_API_VERSION: process.env.AZURE_OPENAI_EMBEDDING_API_VERSION, + EMBEDDED_FIELD: process.env.EMBEDDED_FIELD, + }; + + const missing = Object.entries(required) + .filter(([, value]) => !value) + .map(([key]) => key); + + if (missing.length > 0) { + return { + name: 'Environment variables', + passed: false, + message: `Missing required variables: ${missing.join(', ')}`, + details: 'Ensure your .env file is loaded and all required variables are set.', + }; + } + return { name: 'Environment variables', passed: true, message: 'All required variables present' }; +} + +function checkClients(aiClient: unknown, dbClient: unknown): CheckResult { + if (!aiClient) { + return { name: 'Client initialization', passed: false, message: 'Azure OpenAI client is not configured.' }; + } + if (!dbClient) { + return { name: 'Client initialization', passed: false, message: 'Cosmos DB client is not configured.' }; + } + return { name: 'Client initialization', passed: true, message: 'Both clients initialized' }; +} + +async function checkEmbeddingModel(aiClient: any, deployment: string, expectedDimensions: number): Promise { + try { + const response = await aiClient.embeddings.create({ model: deployment, input: ['test'] }); + const actualDimensions = response.data[0].embedding.length; + + if (actualDimensions !== expectedDimensions) { + return { + name: 'Embedding model dimensions', + passed: false, + message: `Dimension mismatch: model returns ${actualDimensions} but expected ${expectedDimensions}`, + }; + } + return { + name: 'Embedding model dimensions', + passed: true, + message: `Model '${deployment}' returns ${actualDimensions} dimensions (matches config)`, + }; + } catch (error: any) { + return { name: 'Embedding model dimensions', passed: false, message: `Failed: ${error.message}` }; + } +} + +async function checkContainerAccess(dbClient: any, dbName: string, containerName: string): Promise { + try { + const container = dbClient.database(dbName).container(containerName); + await container.read(); + return { name: `Container access (${containerName})`, passed: true, message: 'Accessible' }; + } catch (error: any) { + return { + name: `Container access (${containerName})`, + passed: false, + message: error.code === 404 + ? `Container '${containerName}' not found. Run scripts/create-resources.sh.` + : `Access failed: ${error.message}`, + }; + } +} + +async function checkContainerPolicies( + dbClient: any, + dbName: string, + containerName: string, + embeddedField: string, + expectedDimensions: number, + expectedDistanceFunction: string +): Promise { + try { + const container = dbClient.database(dbName).container(containerName); + const { resource } = await container.read(); + + if (!resource) { + return { name: `Container policies (${containerName})`, passed: false, message: 'Could not read container definition.' }; + } + + const issues: string[] = []; + const vectorEmbeddings: any[] = resource.vectorEmbeddingPolicy?.vectorEmbeddings ?? []; + const vectorIndexes: any[] = resource.indexingPolicy?.vectorIndexes ?? []; + const expectedPath = `/${embeddedField}`; + + if (vectorEmbeddings.length === 0) issues.push('No vectorEmbeddingPolicy defined.'); + if (vectorIndexes.length === 0) issues.push('No vectorIndexes defined.'); + + const matchingEmbedding = vectorEmbeddings.find((v: any) => v.path === expectedPath); + if (matchingEmbedding) { + if (matchingEmbedding.dimensions !== expectedDimensions) { + issues.push(`Dimensions mismatch: policy=${matchingEmbedding.dimensions}, expected=${expectedDimensions}`); + } + if (matchingEmbedding.distanceFunction && matchingEmbedding.distanceFunction !== expectedDistanceFunction) { + issues.push(`Distance function mismatch: policy='${matchingEmbedding.distanceFunction}', expected='${expectedDistanceFunction}'`); + } + } + + const indexType = vectorIndexes.find((v: any) => v.path === expectedPath)?.type ?? 'unknown'; + + if (issues.length > 0) { + return { name: `Container policies (${containerName})`, passed: false, message: issues.join(' | ') }; + } + return { + name: `Container policies (${containerName})`, + passed: true, + message: `Index type: ${indexType}, dims: ${matchingEmbedding?.dimensions}, dist: ${matchingEmbedding?.distanceFunction}`, + }; + } catch (error: any) { + return { name: `Container policies (${containerName})`, passed: false, message: `Policy check failed: ${error.message}` }; + } +} + +async function checkStoredVectors( + dbClient: any, + dbName: string, + containerName: string, + embeddedField: string, + expectedDimensions: number +): Promise { + try { + const safeField = validateFieldName(embeddedField); + const container = dbClient.database(dbName).container(containerName); + + const countResult = await container.items.query('SELECT VALUE COUNT(1) FROM c').fetchAll(); + const totalCount = countResult.resources[0] ?? 0; + + if (totalCount === 0) { + return { name: `Stored vectors (${containerName})`, passed: false, message: 'No documents found. Insert data first.' }; + } + + const vectorCountResult = await container.items + .query(`SELECT VALUE COUNT(1) FROM c WHERE IS_ARRAY(c.${safeField})`) + .fetchAll(); + const vectorCount = vectorCountResult.resources[0] ?? 0; + + if (vectorCount === 0) { + return { + name: `Stored vectors (${containerName})`, + passed: false, + message: `None of ${totalCount} documents contain vector field '${embeddedField}'.`, + }; + } + + const sampleResult = await container.items + .query(`SELECT TOP 1 ARRAY_LENGTH(c.${safeField}) AS dims FROM c WHERE IS_ARRAY(c.${safeField})`) + .fetchAll(); + const storedDims = sampleResult.resources[0]?.dims; + + if (storedDims !== expectedDimensions) { + return { + name: `Stored vectors (${containerName})`, + passed: false, + message: `Stored dims (${storedDims}) != expected (${expectedDimensions}).`, + }; + } + + return { + name: `Stored vectors (${containerName})`, + passed: true, + message: `${vectorCount}/${totalCount} documents have ${storedDims}-dim vectors`, + }; + } catch (error: any) { + return { name: `Stored vectors (${containerName})`, passed: false, message: `Check failed: ${error.message}` }; + } +} + +async function checkVectorSearch( + dbClient: any, + aiClient: any, + dbName: string, + containerName: string, + embeddedField: string, + deployment: string, + query: string +): Promise { + try { + const safeField = validateFieldName(embeddedField); + const embeddingResponse = await aiClient.embeddings.create({ model: deployment, input: [query] }); + const queryVector = embeddingResponse.data[0].embedding; + + const container = dbClient.database(dbName).container(containerName); + const queryText = `SELECT TOP 5 c.HotelName, VectorDistance(c.${safeField}, @embedding) AS SimilarityScore FROM c ORDER BY VectorDistance(c.${safeField}, @embedding)`; + + const response = await container.items + .query({ query: queryText, parameters: [{ name: '@embedding', value: queryVector }] }) + .fetchAll(); + + const results = response.resources; + if (!results || results.length === 0) { + return { name: `Vector search (${containerName})`, passed: false, message: 'No results returned.' }; + } + + const nullScores = results.filter((r: any) => r.SimilarityScore == null); + if (nullScores.length > 0) { + return { + name: `Vector search (${containerName})`, + passed: false, + message: `${nullScores.length}/${results.length} results have null scores. Dimension or path mismatch.`, + }; + } + + const scores = results.map((r: any) => r.SimilarityScore as number); + const isDescending = scores.every((s: number, i: number) => i === 0 || scores[i - 1] >= s); + const scoreRange = `[${Math.min(...scores).toFixed(4)} – ${Math.max(...scores).toFixed(4)}]`; + + if (!isDescending) { + return { + name: `Vector search (${containerName})`, + passed: false, + message: `Results not ordered by similarity. Scores: ${scores.map((s: number) => s.toFixed(4)).join(', ')}`, + }; + } + + return { + name: `Vector search (${containerName})`, + passed: true, + message: `${results.length} results, score range: ${scoreRange}, RU: ${response.requestCharge?.toFixed(2) ?? 'n/a'}`, + }; + } catch (error: any) { + return { name: `Vector search (${containerName})`, passed: false, message: `Search failed: ${error.message}` }; + } +} + +// ─── Main ───────────────────────────────────────────────────────────────────── + +async function main() { + console.log('=== Azure Cosmos DB Vector Algorithm Setup Validation ===\n'); + + const embeddedField = process.env.EMBEDDED_FIELD || 'DescriptionVector'; + const dbName = process.env.AZURE_COSMOSDB_DATABASENAME || 'Hotels'; + const deployment = process.env.AZURE_OPENAI_EMBEDDING_MODEL!; + const expectedDimensions = parseInt(process.env.EMBEDDING_DIMENSIONS || '1536', 10); + const algorithmEnv = (process.env.VECTOR_ALGORITHM || 'all').trim().toLowerCase(); + const distanceEnv = (process.env.VECTOR_DISTANCE_FUNCTION || 'all').trim().toLowerCase(); + const searchQuery = 'quintessential lodging near running trails, eateries, retail'; + + const targets = getTargetContainers(algorithmEnv, distanceEnv); + const results: CheckResult[] = []; + + // 1. Environment variables + const envCheck = checkEnvironmentVariables(); + results.push(envCheck); + printCheck(envCheck); + if (!envCheck.passed) { printSummary(results); return; } + + // 2. Client initialization + const { aiClient, dbClient } = getClientsPasswordless(); + const clientCheck = checkClients(aiClient, dbClient); + results.push(clientCheck); + printCheck(clientCheck); + if (!clientCheck.passed) { printSummary(results); return; } + + // 3. Embedding model check + const embeddingCheck = await checkEmbeddingModel(aiClient!, deployment, expectedDimensions); + results.push(embeddingCheck); + printCheck(embeddingCheck); + + // 4-7. Per-container checks + for (const target of targets) { + console.log(`\n--- Container: ${target.containerName} (${target.algorithm}/${target.distanceFunction}) ---`); + + const accessCheck = await checkContainerAccess(dbClient!, dbName, target.containerName); + results.push(accessCheck); + printCheck(accessCheck); + if (!accessCheck.passed) continue; + + const policyCheck = await checkContainerPolicies( + dbClient!, dbName, target.containerName, embeddedField, expectedDimensions, target.distanceFunction + ); + results.push(policyCheck); + printCheck(policyCheck); + + const storedCheck = await checkStoredVectors(dbClient!, dbName, target.containerName, embeddedField, expectedDimensions); + results.push(storedCheck); + printCheck(storedCheck); + if (!storedCheck.passed) continue; + + const searchCheck = await checkVectorSearch( + dbClient!, aiClient!, dbName, target.containerName, embeddedField, deployment, searchQuery + ); + results.push(searchCheck); + printCheck(searchCheck); + } + + printSummary(results); +} + +function printCheck(check: CheckResult): void { + const icon = check.passed ? 'PASS' : 'FAIL'; + console.log(` [${icon}] ${check.name}: ${check.message}`); + if (check.details) { + console.log(` ${check.details}`); + } +} + +function printSummary(results: CheckResult[]): void { + const passed = results.filter(r => r.passed).length; + const failed = results.filter(r => !r.passed).length; + + console.log('\n=== Summary ==='); + console.log(` Passed: ${passed} | Failed: ${failed} | Total: ${results.length}`); + + if (failed > 0) { + console.log('\nFailed checks:'); + results.filter(r => !r.passed).forEach(r => { + console.log(` - ${r.name}: ${r.message}`); + }); + console.log('\nCommon issues:'); + console.log(' 1. Missing containers: Run scripts/create-resources.sh to provision all 6 containers.'); + console.log(' 2. Dimension mismatch: VectorDistance returns null silently when dimensions differ.'); + console.log(' 3. Wrong vector path: Verify EMBEDDED_FIELD matches the container vectorEmbeddingPolicy path.'); + } else { + console.log('\nAll checks passed. Vector algorithm comparison infrastructure is correctly configured.'); + } + + process.exitCode = failed > 0 ? 1 : 0; +} + +main().catch(error => { + console.error('Validation failed with unhandled error:', error); + process.exitCode = 1; +}); diff --git a/nosql-vector-algorithms-typescript/src/utils.ts b/nosql-vector-algorithms-typescript/src/utils.ts new file mode 100644 index 0000000..6d0052f --- /dev/null +++ b/nosql-vector-algorithms-typescript/src/utils.ts @@ -0,0 +1,377 @@ +import { CosmosClient, BulkOperationType } from '@azure/cosmos'; +import { AzureOpenAI } from "openai"; +import { promises as fs } from "fs"; +import { DefaultAzureCredential, getBearerTokenProvider } from "@azure/identity"; + +// Define a type for JSON data +export type JsonData = Record; + +export function getClients(): { aiClient: AzureOpenAI | null; dbClient: CosmosClient | null } { + + let aiClient: AzureOpenAI | null = null; + let dbClient: CosmosClient | null = null; + + const apiKey = process.env.AZURE_OPENAI_EMBEDDING_KEY!; + const apiVersion = process.env.AZURE_OPENAI_EMBEDDING_API_VERSION!; + const endpoint = process.env.AZURE_OPENAI_EMBEDDING_ENDPOINT!; + const deployment = process.env.AZURE_OPENAI_EMBEDDING_MODEL!; + + if (apiKey && apiVersion && endpoint && deployment) { + + aiClient = new AzureOpenAI({ + apiKey, + apiVersion, + endpoint, + deployment + }); + } + + const cosmosEndpoint = process.env.AZURE_COSMOSDB_ENDPOINT!; + const cosmosKey = process.env.AZURE_COSMOSDB_KEY!; + + if (cosmosEndpoint && cosmosKey) { + dbClient = new CosmosClient({ endpoint: cosmosEndpoint, key: cosmosKey }); + } + + return { aiClient, dbClient }; +} + +/** + * Get Azure OpenAI and Cosmos DB clients using passwordless authentication (managed identity) + * This function uses DefaultAzureCredential for authentication instead of API keys + * + * @returns Object containing AzureOpenAI and CosmosClient instances or null if configuration is missing + */ +export function getClientsPasswordless(): { aiClient: AzureOpenAI | null; dbClient: CosmosClient | null } { + let aiClient: AzureOpenAI | null = null; + let dbClient: CosmosClient | null = null; + + // For Azure OpenAI with DefaultAzureCredential + const apiVersion = process.env.AZURE_OPENAI_EMBEDDING_API_VERSION!; + const endpoint = process.env.AZURE_OPENAI_EMBEDDING_ENDPOINT!; + const deployment = process.env.AZURE_OPENAI_EMBEDDING_MODEL!; + + if (apiVersion && endpoint && deployment) { + const credential = new DefaultAzureCredential(); + const scope = "https://cognitiveservices.azure.com/.default"; + const azureADTokenProvider = getBearerTokenProvider(credential, scope); + aiClient = new AzureOpenAI({ + apiVersion, + endpoint, + deployment, + azureADTokenProvider + }); + } + + // For Cosmos DB with DefaultAzureCredential + const cosmosEndpoint = process.env.AZURE_COSMOSDB_ENDPOINT!; + + if (cosmosEndpoint) { + const credential = new DefaultAzureCredential(); + + dbClient = new CosmosClient({ + endpoint: cosmosEndpoint, + aadCredentials: credential + }); + } + + return { aiClient, dbClient }; +} + +export async function readFileReturnJson(filePath: string): Promise { + + console.log(`Reading JSON file from ${filePath}`); + + const fileAsString = await fs.readFile(filePath, "utf-8"); + return JSON.parse(fileAsString); +} + +/** + * Check if a container has any documents + * @param container - Cosmos DB container reference + * @returns Number of documents in the container + */ +async function getDocumentCount(container: any): Promise { + const countResult = await container.items + .query('SELECT VALUE COUNT(1) FROM c') + .fetchAll(); + + return countResult.resources[0]; +} + +export async function insertData(container, data): Promise<{ total: number; inserted: number; failed: number; skipped: number; requestCharge: number }> { + // Check if container already has documents + const existingCount = await getDocumentCount(container); + + if (existingCount > 0) { + console.log(`Container already has ${existingCount} documents. Skipping insert.`); + return { total: 0, inserted: 0, failed: 0, skipped: existingCount, requestCharge: 0 }; + } + + console.log(`Inserting ${data.length} items using executeBulkOperations...`); + + const operations = data.map((item: any) => ({ + operationType: BulkOperationType.Create, + resourceBody: { + id: item.HotelId, // Map HotelId to id (required by Cosmos DB) + ...item, + }, + partitionKey: [item.HotelId], + })); + + let inserted = 0; + let failed = 0; + let skipped = 0; + let totalRequestCharge = 0; + + try { + const startTime = Date.now(); + console.log(`Starting bulk insert (${operations.length} items)...`); + + const response = await container.items.executeBulkOperations(operations); + + const endTime = Date.now(); + const duration = ((endTime - startTime) / 1000).toFixed(2); + console.log(`Bulk insert completed in ${duration}s`); + + totalRequestCharge += getBulkOperationRUs(response); + + if (response) { + response.forEach((result: any) => { + if (result.statusCode >= 200 && result.statusCode < 300) { + inserted++; + } else if (result.statusCode === 409) { + skipped++; + } else { + failed++; + } + }); + } + } catch (error) { + console.error(`Bulk insert failed:`, error); + failed = operations.length; + } + + console.log(`\nInsert Request Charge: ${totalRequestCharge.toFixed(2)} RUs\n`); + return { total: data.length, inserted, failed, skipped, requestCharge: totalRequestCharge }; +} + +/** + * Validates a field name to ensure it's a safe identifier for use in queries. + * Prevents NoSQL injection when using string interpolation in query construction. + */ +export function validateFieldName(fieldName: string): string { + const validIdentifierPattern = /^[A-Za-z_][A-Za-z0-9_]*$/; + + if (!validIdentifierPattern.test(fieldName)) { + throw new Error( + `Invalid field name: "${fieldName}". ` + + `Field names must start with a letter or underscore and contain only letters, numbers, and underscores.` + ); + } + + return fieldName; +} + +/** + * Print search results in a consistent format + */ +export function printSearchResults(searchResults: any[], requestCharge?: number): void { + console.log('\n--- Search Results ---'); + if (!searchResults || searchResults.length === 0) { + console.log('No results found.'); + return; + } + + searchResults.forEach((result, index) => { + console.log(`${index + 1}. ${result.HotelName}, Score: ${result.SimilarityScore.toFixed(4)}`); + }); + + if (requestCharge !== undefined) { + console.log(`\nVector Search Request Charge: ${requestCharge.toFixed(2)} RUs`); + } + console.log(''); +} + +export function getQueryActivityId(queryResponse: any): string | undefined { + if (!queryResponse) { + return undefined; + } + + const diagnostics = queryResponse.diagnostics as any; + const gatewayStats = Array.isArray(diagnostics?.clientSideRequestStatistics?.gatewayStatistics) + ? diagnostics.clientSideRequestStatistics.gatewayStatistics + : []; + const gatewayActivityId = gatewayStats.find((entry: any) => entry?.activityId)?.activityId; + + return queryResponse.activityId ?? gatewayActivityId; +} + +export function getBulkOperationRUs(response: any): number { + if (!response) { + console.warn('Empty response. Cannot calculate RUs from bulk operation.'); + return 0; + } + + let items: any[] = []; + if (Array.isArray(response)) { + items = response; + } else if (Array.isArray(response.resources)) { + items = response.resources; + } else if (Array.isArray(response.results)) { + items = response.results; + } else if (Array.isArray(response.result)) { + items = response.result; + } else if (typeof response === 'object') { + items = [response]; + } else { + console.warn('Response does not contain bulk operation results.'); + return 0; + } + + let totalRequestCharge = 0; + + items.forEach((result: any) => { + let requestCharge = 0; + + if (typeof result.requestCharge === 'number') { + requestCharge = result.requestCharge; + } + + if (!requestCharge && result.response && typeof result.response.requestCharge === 'number') { + requestCharge = result.response.requestCharge; + } + + if (!requestCharge && result.response && typeof result.response.requestCharge === 'string') { + const parsed = parseFloat(result.response.requestCharge); + requestCharge = isNaN(parsed) ? 0 : parsed; + } + + if (!requestCharge && typeof result.requestCharge === 'string') { + const parsed = parseFloat(result.requestCharge); + requestCharge = isNaN(parsed) ? 0 : parsed; + } + + if (!requestCharge && result.operationResponse) { + const op = result.operationResponse; + const headerVal = op.headers?.['x-ms-request-charge'] + ?? (typeof op.headers?.get === 'function' ? op.headers.get('x-ms-request-charge') : undefined) + ?? op._response?.headers?.['x-ms-request-charge']; + + if (headerVal !== undefined) { + const parsed = parseFloat(headerVal as any); + requestCharge = isNaN(parsed) ? 0 : parsed; + } + } + + if (!requestCharge && result.headers) { + const hv = result.headers['x-ms-request-charge'] ?? (typeof result.headers.get === 'function' ? result.headers.get('x-ms-request-charge') : undefined); + if (hv !== undefined) { + const parsed = parseFloat(hv as any); + requestCharge = isNaN(parsed) ? 0 : parsed; + } + } + + if (!requestCharge) { + const candidateHeaders = + result.operationResponse?._response?.headers + ?? result.operationResponse?.headers + ?? result._response?.headers + ?? result.headers; + + const fallback = candidateHeaders ? (candidateHeaders['x-ms-request-charge'] ?? (typeof candidateHeaders.get === 'function' ? candidateHeaders.get('x-ms-request-charge') : undefined)) : undefined; + + if (fallback !== undefined) { + const parsed = parseFloat(fallback as any); + requestCharge = isNaN(parsed) ? 0 : parsed; + } + } + + totalRequestCharge += requestCharge; + }); + + if (totalRequestCharge === 0) { + try { + const sample = items[0]; + const sampleKeys = sample ? Object.keys(sample) : []; + console.warn('getBulkOperationRUs: no RUs found. Sample result keys:', sampleKeys); + if (sample && sample.response) { + try { + const respKeys = Object.keys(sample.response); + console.warn(' sample.response keys:', respKeys); + const hdrs = sample.response.headers ?? sample.response._response?.headers ?? sample.response?.operationResponse?.headers; + console.warn(' sample.response headers sample:', hdrs ? Object.keys(hdrs) : hdrs); + } catch (e) { + console.warn(' Could not inspect sample.response for headers:', e); + } + } + } catch (e) { + console.warn('Could not inspect sample result for debugging:', e); + } + } + + return totalRequestCharge; +} + +/** + * Print a side-by-side comparison table of vector search results across containers + */ +export function printComparisonTable( + results: Array<{ + containerName: string; + algorithm: string; + distanceFunction: string; + searchResults: any[]; + requestCharge: number; + latencyMs: number; + }> +): void { + console.log('\n╔══════════════════════════════════════════════════════════════════════════════════╗'); + console.log('║ Vector Algorithm Comparison Results ║'); + console.log('╠══════════════════════════════════════════════════════════════════════════════════╣'); + + // Header + console.log( + '║ ' + + 'Algorithm'.padEnd(16) + + 'Distance'.padEnd(14) + + 'Top Result'.padEnd(22) + + 'Score'.padEnd(10) + + 'RU'.padEnd(10) + + 'ms'.padEnd(8) + + '║' + ); + console.log('╠══════════════════════════════════════════════════════════════════════════════════╣'); + + for (const r of results) { + const topResult = r.searchResults[0]; + const topName = topResult ? (topResult.HotelName as string).substring(0, 20) : 'N/A'; + const topScore = topResult ? topResult.SimilarityScore.toFixed(4) : 'N/A'; + + console.log( + '║ ' + + r.algorithm.padEnd(16) + + r.distanceFunction.padEnd(14) + + topName.padEnd(22) + + topScore.padEnd(10) + + r.requestCharge.toFixed(2).padEnd(10) + + r.latencyMs.toFixed(0).padEnd(8) + + '║' + ); + } + + console.log('╚══════════════════════════════════════════════════════════════════════════════════╝'); + + // Detailed results per container + for (const r of results) { + console.log(`\n--- ${r.algorithm} / ${r.distanceFunction} (${r.containerName}) ---`); + if (r.searchResults.length === 0) { + console.log(' No results.'); + continue; + } + r.searchResults.forEach((item, i) => { + console.log(` ${i + 1}. ${item.HotelName}, Score: ${item.SimilarityScore.toFixed(4)}`); + }); + console.log(` RU: ${r.requestCharge.toFixed(2)} | Latency: ${r.latencyMs.toFixed(0)}ms`); + } +} diff --git a/nosql-vector-algorithms-typescript/src/vector-algorithms.ts b/nosql-vector-algorithms-typescript/src/vector-algorithms.ts new file mode 100644 index 0000000..d265706 --- /dev/null +++ b/nosql-vector-algorithms-typescript/src/vector-algorithms.ts @@ -0,0 +1,181 @@ +import path from 'path'; +import { + readFileReturnJson, + getClientsPasswordless, + validateFieldName, + insertData, + getQueryActivityId, + printComparisonTable +} from './utils.js'; + +// ESM specific features - create __dirname equivalent +import { fileURLToPath } from "node:url"; +import { dirname } from "node:path"; +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +type VectorAlgorithm = 'quantizedflat' | 'diskann'; +type DistanceFunction = 'cosine' | 'dotproduct' | 'euclidean'; + +const ALGORITHMS: VectorAlgorithm[] = ['quantizedflat', 'diskann']; +const DISTANCE_FUNCTIONS: DistanceFunction[] = ['cosine', 'dotproduct', 'euclidean']; + +const ALGORITHM_LABELS: Record = { + quantizedflat: 'QuantizedFlat', + diskann: 'DiskANN', +}; + +/** + * Determine which containers to query based on VECTOR_ALGORITHM and VECTOR_DISTANCE_FUNCTION env vars. + * Container naming pattern: hotels_{algorithm}_{distance_function} + */ +function getTargetContainers( + algorithmEnv: string, + distanceEnv: string +): Array<{ containerName: string; algorithm: VectorAlgorithm; distanceFunction: DistanceFunction }> { + const algorithms: VectorAlgorithm[] = + algorithmEnv === 'all' ? ALGORITHMS : [algorithmEnv as VectorAlgorithm]; + const distances: DistanceFunction[] = + distanceEnv === 'all' ? DISTANCE_FUNCTIONS : [distanceEnv as DistanceFunction]; + + const targets: Array<{ containerName: string; algorithm: VectorAlgorithm; distanceFunction: DistanceFunction }> = []; + + for (const alg of algorithms) { + if (!ALGORITHMS.includes(alg)) { + throw new Error(`Invalid VECTOR_ALGORITHM '${alg}'. Must be one of: all, ${ALGORITHMS.join(', ')}`); + } + for (const dist of distances) { + if (!DISTANCE_FUNCTIONS.includes(dist)) { + throw new Error(`Invalid VECTOR_DISTANCE_FUNCTION '${dist}'. Must be one of: all, ${DISTANCE_FUNCTIONS.join(', ')}`); + } + targets.push({ + containerName: `hotels_${alg}_${dist}`, + algorithm: alg, + distanceFunction: dist, + }); + } + } + + return targets; +} + +async function main() { + const { aiClient, dbClient } = getClientsPasswordless(); + + try { + if (!aiClient) { + throw new Error('Azure OpenAI client is not configured. Please check your environment variables.'); + } + if (!dbClient) { + throw new Error('Cosmos DB client is not configured. Please check your environment variables.'); + } + + const dbName = process.env.AZURE_COSMOSDB_DATABASENAME || 'Hotels'; + const embeddedField = process.env.EMBEDDED_FIELD || 'DescriptionVector'; + const dataFile = process.env.DATA_FILE_WITH_VECTORS || '../data/HotelsData_toCosmosDB_Vector.json'; + const deployment = process.env.AZURE_OPENAI_EMBEDDING_MODEL!; + const algorithmEnv = (process.env.VECTOR_ALGORITHM || 'all').trim().toLowerCase(); + const distanceEnv = (process.env.VECTOR_DISTANCE_FUNCTION || 'cosine').trim().toLowerCase(); + const searchQuery = 'quintessential lodging near running trails, eateries, retail'; + + const targets = getTargetContainers(algorithmEnv, distanceEnv); + + console.log(`\n🔬 Vector Algorithm Comparison`); + console.log(` Database: ${dbName}`); + console.log(` Algorithms: ${algorithmEnv}`); + console.log(` Distance functions: ${distanceEnv}`); + console.log(` Containers to query: ${targets.map(t => t.containerName).join(', ')}`); + console.log(` Search query: "${searchQuery}"\n`); + + const database = dbClient.database(dbName); + + // Load data once (shared across containers) + const data = await readFileReturnJson(path.join(__dirname, '..', dataFile)); + + // Generate query embedding once (reuse across containers) + console.log('Generating query embedding...'); + const embeddingResponse = await aiClient.embeddings.create({ + model: deployment, + input: [searchQuery], + }); + const queryEmbedding = embeddingResponse.data[0].embedding; + console.log(`Query embedding: ${queryEmbedding.length} dimensions\n`); + + const safeEmbeddedField = validateFieldName(embeddedField); + const queryText = `SELECT TOP 5 c.HotelName, c.Description, c.Rating, VectorDistance(c.${safeEmbeddedField}, @embedding) AS SimilarityScore FROM c ORDER BY VectorDistance(c.${safeEmbeddedField}, @embedding)`; + + const comparisonResults: Array<{ + containerName: string; + algorithm: string; + distanceFunction: string; + searchResults: any[]; + requestCharge: number; + latencyMs: number; + }> = []; + + for (const target of targets) { + console.log(`\n━━━ ${ALGORITHM_LABELS[target.algorithm]} / ${target.distanceFunction} ━━━`); + console.log(`Container: ${target.containerName}`); + + try { + const container = database.container(target.containerName); + await container.read(); + + // Insert data (skips if already populated) + await insertData(container, data); + + // Run vector search + console.log('Executing vector search...'); + const startTime = Date.now(); + + const queryResponse = await container.items + .query({ + query: queryText, + parameters: [ + { name: '@embedding', value: queryEmbedding }, + ], + }) + .fetchAll(); + + const latencyMs = Date.now() - startTime; + + const activityId = getQueryActivityId(queryResponse); + if (activityId) { + console.log('Query activity ID:', activityId); + } + + const { resources, requestCharge } = queryResponse; + + comparisonResults.push({ + containerName: target.containerName, + algorithm: ALGORITHM_LABELS[target.algorithm], + distanceFunction: target.distanceFunction, + searchResults: resources, + requestCharge: requestCharge ?? 0, + latencyMs, + }); + + console.log(`✓ ${resources.length} results, ${requestCharge?.toFixed(2)} RUs, ${latencyMs}ms`); + } catch (error) { + if ((error as any).code === 404) { + console.error(`✗ Container '${target.containerName}' not found. Run scripts/create-resources.sh first.`); + } else { + console.error(`✗ Error querying ${target.containerName}:`, (error as Error).message); + } + } + } + + // Print comparison table + if (comparisonResults.length > 0) { + printComparisonTable(comparisonResults); + } + } catch (error) { + console.error('App failed:', error); + process.exitCode = 1; + } +} + +main().catch(error => { + console.error('Unhandled error:', error); + process.exitCode = 1; +}); diff --git a/nosql-vector-algorithms-typescript/tsconfig.json b/nosql-vector-algorithms-typescript/tsconfig.json new file mode 100644 index 0000000..2f648a9 --- /dev/null +++ b/nosql-vector-algorithms-typescript/tsconfig.json @@ -0,0 +1,23 @@ +{ + "compilerOptions": { + "target": "ES2020", + "module": "NodeNext", + "moduleResolution": "nodenext", + "declaration": true, + "outDir": "./dist", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "noImplicitAny": false, + "forceConsistentCasingInFileNames": true, + "sourceMap": true, + "resolveJsonModule": true + }, + "include": [ + "src/**/*" + ], + "exclude": [ + "node_modules", + "dist" + ] +}