diff --git a/README.md b/README.md index 329d71c..7391128 100644 --- a/README.md +++ b/README.md @@ -234,6 +234,30 @@ The arocapi provides the following endpoints: - `GET /file/:id` - Download or access file content - `POST /search` - Search entities using OpenSearch +## Customising OpenSearch query + +The OpenSearch query builder used in the `/search` endpoint can be customised to match any +structure of the actual OpenSearch data. The default OpensearchQueryBuilder class is used to +build all the queries for the search and an optional config can be passed on +to customise the class behaviour. + +### Query Builder (Optional) +Use the `queryBuilderClass` optional parameter to specify a custom builder class. +If not specified, the default `OpensearchQueryBuilder` is used. +The custom class should extend the `OpensearchQueryBuilder` class and +may override any or all of the methods as required: + +- `buildQuery`: For generating the base search query. +- `buildAggregations`: For generating the facets or aggregations related query. +- `buildSort`: For generating the ordering/sorting part of the query. + +### Query Builder Options (Optional) +The `queryBuilderOptions` optional parameter is passed as a constructor argument +when instantiating the queryBuilderClass. The default `OpensearchQueryBuilder` can be +configured with: + +- `aggregations`: This field controls the aggregation and will be passed verbatim as the `aggs` field in the generated search query. + ## Customising Entity Responses The API provides a flexible transformer system for customising entity responses diff --git a/src/app.ts b/src/app.ts index 4b06796..0bd99d5 100644 --- a/src/app.ts +++ b/src/app.ts @@ -19,6 +19,8 @@ import type { FileTransformer, } from './types/transformers.js'; import { createValidationError } from './utils/errors.js'; +import type { QueryBuilderOptions } from './utils/queryBuilder.js'; +import { OpensearchQueryBuilder } from './utils/queryBuilder.js'; export type { AuthorisedEntity, AuthorisedFile, StandardEntity, StandardFile } from './transformers/default.js'; export { AllPublicAccessTransformer, AllPublicFileAccessTransformer } from './transformers/default.js'; @@ -43,6 +45,8 @@ export type { FileTransformer, TransformerContext, } from './types/transformers.js'; +export { OpensearchQueryBuilder }; +export type { QueryBuilderOptions }; declare module 'fastify' { interface FastifyInstance { @@ -97,6 +101,8 @@ const setupSearch = async (fastify: FastifyInstance, opensearch: Client) => { export type Options = { prisma: PrismaClient; opensearch: Client; + queryBuilderClass?: typeof OpensearchQueryBuilder; + queryBuilderOptions?: QueryBuilderOptions; disableCors?: boolean; accessTransformer: AccessTransformer; entityTransformers?: EntityTransformer[]; @@ -109,6 +115,8 @@ const app: FastifyPluginAsync = async (fastify, options) => { const { prisma, opensearch, + queryBuilderClass, + queryBuilderOptions, disableCors = false, accessTransformer, entityTransformers, @@ -155,7 +163,7 @@ const app: FastifyPluginAsync = async (fastify, options) => { fastify.register(files, { fileAccessTransformer, fileTransformers }); fastify.register(file, { fileHandler }); fastify.register(crate, { roCrateHandler }); - fastify.register(search, { accessTransformer, entityTransformers }); + fastify.register(search, { accessTransformer, entityTransformers, queryBuilderClass, queryBuilderOptions }); }; export default fp(app); diff --git a/src/routes/search.ts b/src/routes/search.ts index f7e67d1..ddb468f 100644 --- a/src/routes/search.ts +++ b/src/routes/search.ts @@ -1,12 +1,12 @@ import type { MultiBucketAggregateBaseFiltersBucket } from '@opensearch-project/opensearch/api/_types/_common.aggregations.js'; -import type { BoolQuery } from '@opensearch-project/opensearch/api/_types/_common.query_dsl.js'; -import type { Search_Request, Search_RequestBody } from '@opensearch-project/opensearch/api/index.js'; +import type { Search_Request } from '@opensearch-project/opensearch/api/index.js'; import type { FastifyPluginAsync } from 'fastify'; import type { ZodTypeProvider } from 'fastify-type-provider-zod'; import { z } from 'zod/v4'; import { baseEntityTransformer, resolveEntityReferences } from '../transformers/default.js'; import type { AccessTransformer, EntityTransformer } from '../types/transformers.js'; import { createInternalError } from '../utils/errors.js'; +import { OpensearchQueryBuilder, type QueryBuilderOptions } from '../utils/queryBuilder.js'; const boundingBoxSchema = z.object({ topRight: z.object({ @@ -30,148 +30,22 @@ const searchParamsSchema = z.object({ sort: z.enum(['id', 'name', 'createdAt', 'updatedAt', 'relevance']).default('relevance'), order: z.enum(['asc', 'desc']).default('asc'), }); -type SearchParams = z.infer; - -const buildQuery = ( - searchType: SearchParams['searchType'], - query: SearchParams['query'], - filters: SearchParams['filters'], - boundingBox: SearchParams['boundingBox'], -) => { - const must: BoolQuery['must'] = []; - const filter: BoolQuery['filter'] = []; - - if (searchType === 'basic') { - must.push({ - multi_match: { - query, - fields: ['name^2', 'description'], - type: 'best_fields', - fuzziness: 'AUTO', - zero_terms_query: 'all', - }, - }); - } else { - must.push({ - query_string: { - query, - fields: ['name^2', 'description'], - default_operator: 'AND', - }, - }); - } - - if (filters) { - Object.entries(filters).forEach(([field, values]) => { - filter.push({ - terms: { - [field]: values, - }, - }); - }); - } - - if (boundingBox) { - filter.push({ - geo_bounding_box: { - location: { - top_left: { - lat: boundingBox.topRight.lat, - lon: boundingBox.bottomLeft.lng, - }, - bottom_right: { - lat: boundingBox.bottomLeft.lat, - lon: boundingBox.topRight.lng, - }, - }, - }, - }); - } - - return { - bool: { - must, - filter, - }, - }; -}; - -// TODO: Pull these from a config file -const buildAggregations = ( - geohashPrecision: SearchParams['geohashPrecision'], - boundingBox: SearchParams['boundingBox'], -) => { - const aggs: Search_RequestBody['aggs'] = { - inLanguage: { - terms: { - field: 'inLanguage.keyword', - size: 20, - }, - }, - mediaType: { - terms: { - field: 'mediaType.keyword', - size: 20, - }, - }, - communicationMode: { - terms: { - field: 'communicationMode.keyword', - size: 20, - }, - }, - entityType: { - terms: { - field: 'entityType.keyword', - size: 20, - }, - }, - }; - - // Add geohash aggregation if precision is specified - if (geohashPrecision && boundingBox) { - aggs.geohash_grid = { - geohash_grid: { - field: 'location', - precision: geohashPrecision, - bounds: { - top_left: { - lat: boundingBox.topRight.lat, - lon: boundingBox.bottomLeft.lng, - }, - bottom_right: { - lat: boundingBox.bottomLeft.lat, - lon: boundingBox.topRight.lng, - }, - }, - }, - }; - } - - return aggs; -}; - -const buildSort = (sort: SearchParams['sort'], order: SearchParams['order']) => { - if (sort === 'relevance') { - return; - } - - const sortField = sort === 'id' ? 'rocrateId' : sort; - - if (sortField === 'name') { - return [{ 'name.keyword': order }]; - } - - return [{ [sortField]: order }]; -}; type SearchRouteOptions = { accessTransformer: AccessTransformer; entityTransformers?: EntityTransformer[]; + queryBuilderClass?: typeof OpensearchQueryBuilder; + queryBuilderOptions?: QueryBuilderOptions; }; const search: FastifyPluginAsync = async (fastify, opts) => { - const { accessTransformer, entityTransformers = [] } = opts; + const { + accessTransformer, + entityTransformers = [], + queryBuilderClass = OpensearchQueryBuilder, + queryBuilderOptions, + } = opts; + const queryBuilder = new queryBuilderClass(queryBuilderOptions); fastify.withTypeProvider().post( '/search', { @@ -186,20 +60,20 @@ const search: FastifyPluginAsync = async (fastify, opts) => const opensearchQuery: Search_Request = { index: 'entities', body: { - query: buildQuery(searchType, query, filters, boundingBox), - aggs: buildAggregations(geohashPrecision, boundingBox), + query: queryBuilder.buildQuery(searchType, query, filters, boundingBox), + aggs: queryBuilder.buildAggregations(geohashPrecision, boundingBox), highlight: { fields: { name: {}, description: {}, }, }, - sort: buildSort(sort, order), + sort: queryBuilder.buildSort(sort, order), from: offset, size: limit, }, }; - + fastify.log.debug(opensearchQuery); const response = await fastify.opensearch.search(opensearchQuery); if (!response.body?.hits?.hits) { diff --git a/src/utils/queryBuilder.ts b/src/utils/queryBuilder.ts new file mode 100644 index 0000000..93c4fb4 --- /dev/null +++ b/src/utils/queryBuilder.ts @@ -0,0 +1,134 @@ +import type { SortOrder } from '@opensearch-project/opensearch/api/_types/_common.js'; +import type { BoolQuery } from '@opensearch-project/opensearch/api/_types/_common.query_dsl.js'; +import type { Search_RequestBody } from '@opensearch-project/opensearch/api/index.js'; + +type Aggregations = Required['aggs']; + +type BoundingBox = { + topRight: { + lat: number; + lng: number; + }; + bottomLeft: { + lat: number; + lng: number; + }; +}; + +export type QueryBuilderOptions = { + aggregations?: Aggregations; +}; + +export class OpensearchQueryBuilder { + aggregations: Aggregations; + + constructor(opts?: QueryBuilderOptions) { + this.aggregations = + opts?.aggregations || + Object.fromEntries( + ['inLanguage', 'mediaType', 'communicationMode', 'entityType'].map((name) => [ + name, + { terms: { field: `${name}.keyword`, size: 20 } }, + ]), + ); + } + + buildQuery(searchType: string, query: string, filters?: Record, boundingBox?: BoundingBox) { + const must: BoolQuery['must'] = []; + const filter: BoolQuery['filter'] = []; + + if (searchType === 'basic') { + must.push({ + multi_match: { + query, + fields: ['name^2', 'description'], + type: 'best_fields', + fuzziness: 'AUTO', + zero_terms_query: 'all', + }, + }); + } else { + must.push({ + query_string: { + query, + fields: ['name^2', 'description'], + default_operator: 'AND', + }, + }); + } + + if (filters) { + Object.entries(filters).forEach(([field, values]) => { + filter.push({ + terms: { + [field]: values, + }, + }); + }); + } + + if (boundingBox) { + filter.push({ + geo_bounding_box: { + location: { + top_left: { + lat: boundingBox.topRight.lat, + lon: boundingBox.bottomLeft.lng, + }, + bottom_right: { + lat: boundingBox.bottomLeft.lat, + lon: boundingBox.topRight.lng, + }, + }, + }, + }); + } + + return { + bool: { + must, + filter, + }, + }; + } + + buildAggregations(geohashPrecision: number, boundingBox?: BoundingBox) { + const aggs = { ...this.aggregations }; + + // Add geohash aggregation if precision is specified + if (geohashPrecision && boundingBox) { + aggs.geohash_grid = { + geohash_grid: { + field: 'location', + precision: geohashPrecision, + bounds: { + top_left: { + lat: boundingBox.topRight.lat, + lon: boundingBox.bottomLeft.lng, + }, + bottom_right: { + lat: boundingBox.bottomLeft.lat, + lon: boundingBox.topRight.lng, + }, + }, + }, + }; + } + + return aggs; + } + + buildSort(sort: string, order: SortOrder) { + if (sort === 'relevance') { + return; + } + + const sortField = sort === 'id' ? 'rocrateId' : sort; + + if (sortField === 'name') { + return [{ 'name.keyword': order }]; + } + + return [{ [sortField]: order }]; + } +}