Skip to content

Commit 5857f5c

Browse files
authored
Added pgvectorscale client (#355)
* pgvectorscale client added * added pgvectorscale dependencies to enable independent client installation * Bug fix vector type not found in the database.
1 parent f248805 commit 5857f5c

File tree

9 files changed

+540
-0
lines changed

9 files changed

+540
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ All the database client supported
3737
| elastic | `pip install vectordb-bench[elastic]` |
3838
| pgvector | `pip install vectordb-bench[pgvector]` |
3939
| pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
40+
| pgvectorscale | `pip install vectordb-bench[pgvectorscale]` |
4041
| redis | `pip install vectordb-bench[redis]` |
4142
| memorydb | `pip install vectordb-bench[memorydb]` |
4243
| chromadb | `pip install vectordb-bench[chromadb]` |

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ pinecone = [ "pinecone-client" ]
7171
weaviate = [ "weaviate-client" ]
7272
elastic = [ "elasticsearch" ]
7373
pgvector = [ "psycopg", "psycopg-binary", "pgvector" ]
74+
pgvectorscale = [ "psycopg", "psycopg-binary", "pgvector" ]
7475
pgvecto_rs = [ "pgvecto_rs[psycopg3]>=0.2.1" ]
7576
redis = [ "redis" ]
7677
memorydb = [ "memorydb" ]

vectordb_bench/backend/clients/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class DB(Enum):
3030
WeaviateCloud = "WeaviateCloud"
3131
PgVector = "PgVector"
3232
PgVectoRS = "PgVectoRS"
33+
PgVectorScale = "PgVectorScale"
3334
Redis = "Redis"
3435
MemoryDB = "MemoryDB"
3536
Chroma = "Chroma"
@@ -71,6 +72,10 @@ def init_cls(self) -> Type[VectorDB]:
7172
if self == DB.PgVectoRS:
7273
from .pgvecto_rs.pgvecto_rs import PgVectoRS
7374
return PgVectoRS
75+
76+
if self == DB.PgVectorScale:
77+
from .pgvectorscale.pgvectorscale import PgVectorScale
78+
return PgVectorScale
7479

7580
if self == DB.Redis:
7681
from .redis.redis import Redis
@@ -123,6 +128,10 @@ def config_cls(self) -> Type[DBConfig]:
123128
from .pgvecto_rs.config import PgVectoRSConfig
124129
return PgVectoRSConfig
125130

131+
if self == DB.PgVectorScale:
132+
from .pgvectorscale.config import PgVectorScaleConfig
133+
return PgVectorScaleConfig
134+
126135
if self == DB.Redis:
127136
from .redis.config import RedisConfig
128137
return RedisConfig
@@ -172,6 +181,10 @@ def case_config_cls(self, index_type: IndexType | None = None) -> Type[DBCaseCon
172181
from .aws_opensearch.config import AWSOpenSearchIndexConfig
173182
return AWSOpenSearchIndexConfig
174183

184+
if self == DB.PgVectorScale:
185+
from .pgvectorscale.config import _pgvectorscale_case_config
186+
return _pgvectorscale_case_config.get(index_type)
187+
175188
# DB.Pinecone, DB.Chroma, DB.Redis
176189
return EmptyDBCaseConfig
177190

vectordb_bench/backend/clients/api.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ class MetricType(str, Enum):
1515
class IndexType(str, Enum):
1616
HNSW = "HNSW"
1717
DISKANN = "DISKANN"
18+
STREAMING_DISKANN = "DISKANN"
1819
IVFFlat = "IVF_FLAT"
1920
IVFSQ8 = "IVF_SQ8"
2021
Flat = "FLAT"
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
from abc import abstractmethod
2+
from typing import TypedDict
3+
from pydantic import BaseModel, SecretStr
4+
from typing_extensions import LiteralString
5+
from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
6+
7+
POSTGRE_URL_PLACEHOLDER = "postgresql://%s:%s@%s/%s"
8+
9+
10+
class PgVectorScaleConfigDict(TypedDict):
11+
"""These keys will be directly used as kwargs in psycopg connection string,
12+
so the names must match exactly psycopg API"""
13+
14+
user: str
15+
password: str
16+
host: str
17+
port: int
18+
dbname: str
19+
20+
21+
class PgVectorScaleConfig(DBConfig):
22+
user_name: SecretStr = SecretStr("postgres")
23+
password: SecretStr
24+
host: str = "localhost"
25+
port: int = 5432
26+
db_name: str
27+
28+
def to_dict(self) -> PgVectorScaleConfigDict:
29+
user_str = self.user_name.get_secret_value()
30+
pwd_str = self.password.get_secret_value()
31+
return {
32+
"host": self.host,
33+
"port": self.port,
34+
"dbname": self.db_name,
35+
"user": user_str,
36+
"password": pwd_str,
37+
}
38+
39+
40+
class PgVectorScaleIndexConfig(BaseModel, DBCaseConfig):
41+
metric_type: MetricType | None = None
42+
create_index_before_load: bool = False
43+
create_index_after_load: bool = True
44+
45+
def parse_metric(self) -> str:
46+
if self.metric_type == MetricType.COSINE:
47+
return "vector_cosine_ops"
48+
return ""
49+
50+
def parse_metric_fun_op(self) -> LiteralString:
51+
if self.metric_type == MetricType.COSINE:
52+
return "<=>"
53+
return ""
54+
55+
def parse_metric_fun_str(self) -> str:
56+
if self.metric_type == MetricType.COSINE:
57+
return "cosine_distance"
58+
return ""
59+
60+
@abstractmethod
61+
def index_param(self) -> dict:
62+
...
63+
64+
@abstractmethod
65+
def search_param(self) -> dict:
66+
...
67+
68+
@abstractmethod
69+
def session_param(self) -> dict:
70+
...
71+
72+
73+
class PgVectorScaleStreamingDiskANNConfig(PgVectorScaleIndexConfig):
74+
index: IndexType = IndexType.STREAMING_DISKANN
75+
storage_layout: str | None
76+
num_neighbors: int | None
77+
search_list_size: int | None
78+
max_alpha: float | None
79+
num_dimensions: int | None
80+
num_bits_per_dimension: int | None
81+
query_search_list_size: int | None
82+
query_rescore: int | None
83+
84+
def index_param(self) -> dict:
85+
return {
86+
"metric": self.parse_metric(),
87+
"index_type": self.index.value,
88+
"options": {
89+
"storage_layout": self.storage_layout,
90+
"num_neighbors": self.num_neighbors,
91+
"search_list_size": self.search_list_size,
92+
"max_alpha": self.max_alpha,
93+
"num_dimensions": self.num_dimensions,
94+
},
95+
}
96+
97+
def search_param(self) -> dict:
98+
return {
99+
"metric": self.parse_metric(),
100+
"metric_fun_op": self.parse_metric_fun_op(),
101+
}
102+
103+
def session_param(self) -> dict:
104+
return {
105+
"diskann.query_search_list_size": self.query_search_list_size,
106+
"diskann.query_rescore": self.query_rescore,
107+
}
108+
109+
_pgvectorscale_case_config = {
110+
IndexType.STREAMING_DISKANN: PgVectorScaleStreamingDiskANNConfig,
111+
}

0 commit comments

Comments
 (0)