branch: externals/vecdb
commit 39bfcd06858b7bb7992dd6aa23fb7b4a823c7f9f
Merge: 8847f872e7 e89d5a9a3a
Author: Andrew Hyatt <[email protected]>
Commit: Andrew Hyatt <[email protected]>
Merge branch 'main' into ci
---
Eldev | 3 +-
NEWS.org | 3 +-
README.org | 11 ++
vecdb-integration-test.el | 36 +++++--
vecdb-psql.el | 254 ++++++++++++++++++++++++++++++++++++++++++++++
vecdb.el | 4 +-
6 files changed, 300 insertions(+), 11 deletions(-)
diff --git a/Eldev b/Eldev
index ad2d86f3ce..43c29f47cf 100644
--- a/Eldev
+++ b/Eldev
@@ -1,4 +1,5 @@
-; -*- mode: emacs-lisp; lexical-binding: t -*-
+ ; -*- mode: emacs-lisp;
lexical-binding: t -*-
(eldev-use-package-archive 'gnu-elpa)
+(eldev-use-package-archive 'nongnu-elpa)
(eldev-use-plugin 'maintainer)
diff --git a/NEWS.org b/NEWS.org
index 195efd160d..c4cb5f1cec 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -1,3 +1,4 @@
+* Version 0.2
+- Add Postgres backend
* Version 0.1
- Initial version
-
diff --git a/README.org b/README.org
index 8643592f65..af9249d08a 100644
--- a/README.org
+++ b/README.org
@@ -86,3 +86,14 @@ However, the full set of options, here demonstrating the
equivalent settings to
:tenant "default"
:database "default"))
#+end_src
+** Postgres with pgvector
+The popular database Postgres has an extension that allows it to have vector
database functionality, [[https://github.com/pgvector/pgvector][pgvector]].
This needs the =pg-el= library.
+
+A provider defines a database, and the collection will define a table with the
collection name in that database.
+
+For example,
+#+begin_src emacs-lisp
+(defvar my-postgres-provider (make-vecdb-psqlprovider :dbname "mydatabase"
:username "myuser"))
+#+end_src
+
+This also takes an optional password as well. For now, this just uses
localhost as a default.
diff --git a/vecdb-integration-test.el b/vecdb-integration-test.el
index 35f670ba0d..f99ef5c295 100644
--- a/vecdb-integration-test.el
+++ b/vecdb-integration-test.el
@@ -33,6 +33,7 @@
;; CHROMA_DATABASE (optional, defaults to "default")
;; For Qdrant: QDRANT_URL (e.g., "http://localhost:6333")
;; QDRANT_API_KEY (e.g., "your-api-key")
+;; For Postgres: PSQL_DB (should exist already), PSQL_USERNAME,
PSQL_PASSWORD (optional)
;; 3. Execute from the command line:
;; emacs -batch -l ert -l vecdb-integration-test.el -f
ert-run-tests-batch-and-exit
;;
@@ -46,6 +47,7 @@
(require 'vecdb)
(require 'vecdb-chroma)
(require 'vecdb-qdrant)
+(require 'vecdb-psql)
(require 'cl-lib) ;; For cl-remove-if-not, cl-every
(declare-function chroma-ext--tmp-project-dir "ext-chroma")
@@ -75,7 +77,18 @@ Skips tests if no providers are configured."
(make-vecdb-qdrant-provider
:url qdrant-url
:api-key qdrant-api-key)
- (warn "QDRANT_URL is set, but QDRANT_API_KEY is missing.
Qdrant provider will not be configured.")))))))
+ (warn "QDRANT_URL is set, but QDRANT_API_KEY is missing.
Qdrant provider will not be configured.")))))
+
+ ;; Postgres Configuration
+ (let ((postgres-db (getenv "PSQL_DB"))
+ (postgres-username (getenv "PSQL_USERNAME"))
+ (postgres-password (getenv "PSQL_PASSWORD")))
+
+ (when postgres-username
+ (make-vecdb-psql-provider
+ :dbname postgres-db
+ :username postgres-username
+ :password postgres-password)))))
(progn
(ert-skip "No vector database provider environment variables set.
(CHROMA_URL or QDRANT_URL must be set)")
@@ -92,6 +105,7 @@ itself might globally skip if no providers at all are
configured)."
(declare (indent defun))
(let ((chroma-test-name (intern (format "%s-chroma" base-name)))
(qdrant-test-name (intern (format "%s-qdrant" base-name)))
+ (psql-test-name (intern (format "%s-psql" base-name)))
(base-doc (or docstring (format "Test %s for a vector database
provider." base-name))))
`(progn
(ert-deftest ,chroma-test-name ()
@@ -110,7 +124,15 @@ itself might globally skip if no providers at all are
configured)."
(vecdb-test--get-providers))))
(if current-provider
(funcall ,body-function current-provider)
- (ert-skip (format "Qdrant provider not configured for %s"
',qdrant-test-name))))))))
+ (ert-skip (format "Qdrant provider not configured for %s"
',qdrant-test-name)))))
+ (ert-deftest ,psql-test-name ()
+ ,(format "%s (Postgres)" base-doc)
+ (interactive)
+ (let ((current-provider (cl-find-if (lambda (p) (eq (type-of p)
'vecdb-psql-provider))
+ (vecdb-test--get-providers))))
+ (if current-provider
+ (funcall ,body-function current-provider)
+ (ert-skip (format "Postgres provider not configured for %s"
',psql-test-name))))))))
(defmacro with-test-collection (current-provider collection-var
collection-name-base options &rest body)
"Execute BODY with COLLECTION-VAR bound to a new collection.
@@ -122,11 +144,11 @@ The full collection name is generated by appending the
provider's name.
The collection is created before BODY and deleted afterwards."
(declare (indent 1) (debug t))
(let ((full-collection-name (gensym "full-collection-name-"))
- (vector-size-val (gensym "vector-size-"))
(default-vector-size 3))
`(let* ((,full-collection-name (format "%s-%s" ,collection-name-base
(vecdb-provider-name ,current-provider)))
- (,vector-size-val (or (plist-get ,options :vector-size)
,default-vector-size))
- (,collection-var (make-vecdb-collection :name
,full-collection-name :vector-size ,vector-size-val)))
+ (,collection-var (make-vecdb-collection :name ,full-collection-name
+ :vector-size (or
(plist-get ,options :vector-size) ,default-vector-size)
+ :payload-fields (plist-get
,options :payload-fields))))
(unwind-protect
(progn
(vecdb-create ,current-provider ,collection-var)
@@ -165,7 +187,7 @@ The collection is created before BODY and deleted
afterwards."
(make-vecdb-item :id 1 :vector [0 1 2] :payload '(:val 1))
(make-vecdb-item :id 2 :vector [0 1 2] :payload '(:val 2))
(make-vecdb-item :id 3 :vector [0 1 2] :payload '(:val 3)))))
- (with-test-collection current-provider current-collection collection-name
`(:vector-size ,vector-size)
+ (with-test-collection current-provider current-collection collection-name
`(:vector-size ,vector-size :payload-fields ((val . integer)))
(vecdb-upsert-items current-provider
current-collection items t)
(dolist (item items)
(let ((retrieved-item (vecdb-get-item
current-provider current-collection (vecdb-item-id item))))
@@ -190,7 +212,7 @@ The collection is created before BODY and deleted
afterwards."
(item2 (make-vecdb-item :id 2 :vector [0.4 0.5 0.6] :payload '(:val
2)))
(item3 (make-vecdb-item :id 3 :vector [0.7 0.8 0.9] :payload '(:val
3)))
(items (list item1 item2 item3)))
- (with-test-collection current-provider current-collection collection-name
`(:vector-size ,vector-size)
+ (with-test-collection current-provider current-collection collection-name
`(:vector-size ,vector-size :payload-fields ((val . integer)))
(vecdb-upsert-items current-provider
current-collection items t)
;; Search for a vector similar to item2
(let ((results (vecdb-search-by-vector
current-provider current-collection [0.41 0.51 0.61] 3)))
diff --git a/vecdb-psql.el b/vecdb-psql.el
new file mode 100644
index 0000000000..40dc662180
--- /dev/null
+++ b/vecdb-psql.el
@@ -0,0 +1,254 @@
+;;; vecdb-psql.el --- An interface to postgres with vector extension -*-
lexical-binding: t; -*-
+
+;; Copyright (c) 2025 Free Software Foundation, Inc.
+
+;; Author: Andrew Hyatt <[email protected]>
+;; Homepage: https://github.com/ahyatt/vecdb
+;; SPDX-License-Identifier: GPL-3.0-or-later
+;;
+;; This program is free software; you can redistribute it and/or
+;; modify it under the terms of the GNU General Public License as
+;; published by the Free Software Foundation; either version 3 of the
+;; License, or (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+;; This package provides an implementation of vecdb for using with postgres.
+
+;;; Code:
+
+(require 'vecdb)
+(require 'pg)
+(require 'cl-lib)
+(require 'map)
+(require 'seq)
+
+(cl-defstruct (vecdb-psql-provider (:include vecdb-provider
+ (name "postgres")))
+ "Provider for the vector database.
+DBNAME is the database name, which must have been created by the user."
+ dbname
+ username
+ (password ""))
+
+(defconst vecdb-psql-connection-cache
+ (make-hash-table :test 'equal)
+ "Cache for database connections by db name.")
+
+(defun vecdb-psql-get-connection (provider)
+ "Get a connection to the database specified by PROVIDER."
+ (let* ((key (vecdb-psql-provider-dbname provider))
+ (connection (gethash key vecdb-psql-connection-cache)))
+ (unless connection
+ (setq connection
+ (pg-connect
+ (vecdb-psql-provider-dbname provider)
+ (vecdb-psql-provider-username provider)
+ (vecdb-psql-provider-password provider)))
+ (puthash key connection vecdb-psql-connection-cache))
+ connection))
+
+(defun vecdb-psql-table-name (collection-name)
+ "Turn COLLECTION-NAME into a safe table name."
+ (replace-regexp-in-string "[^a-zA-Z0-9_]" "_" (downcase collection-name)))
+
+(defun vecdb-psql-type (collection-type)
+ "Convert COLLECTION-TYPE to a PostgreSQL type string."
+ (pcase collection-type
+ ('string "TEXT")
+ ('integer "INTEGER")
+ ('float "FLOAT")
+ (_ (error "Unsupported field type: %s" collection-type))))
+
+(defun vecdb-psql-oid (collection-type)
+ "Convert COLLECTION-TYPE to a psql OID."
+ (pcase collection-type
+ ('string "text")
+ ('integer "int8")
+ ('float "float8")
+ (_ (error "Unsupported field type: %s" collection-type))))
+
+(cl-defmethod vecdb-create ((provider vecdb-psql-provider)
+ (collection vecdb-collection))
+ "Create COLLECTION in database PROVIDER."
+ (pg-exec (vecdb-psql-get-connection provider)
+ (format "CREATE TABLE IF NOT EXISTS %s (
+ id INTEGER PRIMARY KEY,
+ vector VECTOR(%d) NOT NULL%s
+ %s
+ );"
+ (vecdb-psql-table-name (vecdb-collection-name collection))
+ (vecdb-collection-vector-size collection)
+ (if (vecdb-collection-payload-fields collection) "," "")
+ (mapconcat
+ (lambda (field)
+ (format "%s %s NULL"
+ (car field)
+ (vecdb-psql-type (cdr field))))
+ (vecdb-collection-payload-fields collection)
+ ", ")))
+ (pg-exec (vecdb-psql-get-connection provider)
+ (format "CREATE INDEX IF NOT EXISTS %s_embedding_hnsw_idx ON %s
USING hnsw (vector vector_cosine_ops)"
+ (vecdb-psql-table-name (vecdb-collection-name collection))
+ (vecdb-psql-table-name (vecdb-collection-name collection))))
+ (mapc (lambda (field)
+ (pg-exec (vecdb-psql-get-connection provider)
+ (format "CREATE INDEX IF NOT EXISTS %s_%s_idx ON %s (%s)"
+ (vecdb-psql-table-name (vecdb-collection-name
collection))
+ (car field)
+ (vecdb-psql-table-name (vecdb-collection-name
collection))
+ (car field))))
+ (vecdb-collection-payload-fields collection)))
+
+(cl-defmethod vecdb-delete ((provider vecdb-psql-provider)
+ (collection vecdb-collection))
+ "Delete COLLECTION from database PROVIDER."
+ (pg-exec (vecdb-psql-get-connection provider)
+ (format "DROP TABLE IF EXISTS %s;"
+ (vecdb-psql-table-name (vecdb-collection-name
collection)))))
+
+(cl-defmethod vecdb-exists ((provider vecdb-psql-provider)
+ (collection vecdb-collection))
+ "Check if the COLLECTION exists in the database specified by PROVIDER."
+ (let ((result
+ (pg-exec (vecdb-psql-get-connection provider)
+ (format "SELECT EXISTS (
+ SELECT FROM information_schema.tables
+ WHERE table_name = '%s'
+ );"
+ (vecdb-psql-table-name (vecdb-collection-name
collection))))))
+ (and result
+ (equal (caar (pg-result result :tuples)) t))))
+
+(defun vecdb-psql--plist-keys (plist)
+ "Return a list of keys from PLIST, as strings with the colon removed."
+ (cl-loop for (k _v) on plist by #'cddr
+ collect (substring (symbol-name k) 1)))
+
+(cl-defmethod vecdb-upsert-items ((provider vecdb-psql-provider)
+ (collection vecdb-collection)
+ data-list &optional _)
+ "Upsert items into the COLLECTION in the database PROVIDER.
+All items in DATA-LIST must have the same payloads."
+ (let ((arg-count 0))
+ (funcall #'pg-exec-prepared
+ (vecdb-psql-get-connection provider)
+ (format "INSERT INTO %s (id, vector%s%s) VALUES %s
+ ON CONFLICT (id) DO UPDATE SET vector =
EXCLUDED.vector%s%s;"
+ (vecdb-psql-table-name (vecdb-collection-name collection))
+ (if (vecdb-collection-payload-fields collection) ", " "")
+ ;; We assume every vecdb-item has the same payload
structure
+ (mapconcat #'identity (vecdb-psql--plist-keys
+ (vecdb-item-payload (car
data-list)))
+ ", ")
+ (mapconcat (lambda (item)
+ (format "(%s)"
+ (string-join (cl-loop for i from 1
below (+ 2 (length (vecdb-item-payload item)))
+ do (cl-incf
arg-count)
+ collect
(format "$%d" arg-count))
+ ", ")))
+ data-list
+ ", ")
+ (if (vecdb-collection-payload-fields collection) ", " "")
+ (mapconcat
+ (lambda (field)
+ (format "%s = EXCLUDED.%s" (car field) (car field)))
+ (vecdb-collection-payload-fields collection)
+ ", "))
+ (mapcan (lambda (item)
+ (append
+ (list
+ (cons (vecdb-item-id item) "int8")
+ (cons (vecdb-item-vector item) "vector"))
+ (mapcar (lambda (payload-key)
+ (cons (plist-get (vecdb-item-payload item)
payload-key)
+ (vecdb-psql-oid (assoc-default
+ (intern (substring
(symbol-name payload-key) 1))
+
(vecdb-collection-payload-fields collection)))))
+ (map-keys (vecdb-item-payload (car
data-list))))))
+ data-list))))
+
+(defun vecdb-psql--full-row-to-item (row collection)
+ "Convert a full database row ROW into a vecdb-item for COLLECTION."
+ (make-vecdb-item
+ :id (nth 0 row)
+ :vector (nth 1 row)
+ :payload
+ (flatten-list (cl-loop for field in (vecdb-collection-payload-fields
collection)
+ collect
+ (list (intern (format ":%s" (car field)))
+ (nth (+ 2 (cl-position field
+
(vecdb-collection-payload-fields collection)
+ :test #'equal))
+ row))))))
+
+(cl-defmethod vecdb-get-item ((provider vecdb-psql-provider)
+ (collection vecdb-collection)
+ id)
+ "Get an item from COLLECTION by ID.
+PROVIDER specifies the database that the collection is in."
+ (let ((result
+ (pg-result
+ (pg-exec-prepared (vecdb-psql-get-connection provider)
+ (format "SELECT id, vector::vector%s %s FROM %s
WHERE id = $1;"
+ (if (vecdb-collection-payload-fields
collection) ", " "")
+ (mapconcat
+ (lambda (field)
+ (format "%s" (car field)))
+ (vecdb-collection-payload-fields
collection)
+ ", ")
+ (vecdb-psql-table-name
(vecdb-collection-name collection)))
+ (list (cons id "int8")))
+ :tuples)))
+ (when result
+ (vecdb-psql--full-row-to-item (car result) collection))))
+
+(cl-defmethod vecdb-delete-items ((provider vecdb-psql-provider)
+ (collection vecdb-collection)
+ ids &optional _)
+ "Delete items from COLLECTION by IDs.
+PROVIDER is the database that the collection is in."
+ (when ids
+ ;; TODO: This should ideally be a prepared statement, but I dont know how
to do
+ ;; this with psql.
+ (pg-exec (vecdb-psql-get-connection provider)
+ (format "DELETE FROM %s WHERE id IN (%s);"
+ (vecdb-psql-table-name (vecdb-collection-name collection))
+ (mapconcat #'number-to-string ids ", ")))))
+
+(cl-defmethod vecdb-search-by-vector ((provider vecdb-psql-provider)
+ (collection vecdb-collection)
+ vector
+ &optional limit)
+ "Search for items in COLLECTION by VECTOR.
+PROVIDER is the database that the collection is in."
+ (let ((limit-clause (if limit
+ (format "LIMIT %d" limit)
+ "")))
+ (mapcar (lambda (row)
+ (vecdb-psql--full-row-to-item row collection))
+ (pg-result
+ (pg-exec-prepared (vecdb-psql-get-connection provider)
+ (format "SELECT id, vector::vector%s %s FROM %s
+ ORDER BY vector <-> $1 %s;"
+ (if (vecdb-collection-payload-fields
collection) ", " "")
+ (mapconcat
+ (lambda (field)
+ (format "%s" (car field)))
+ (vecdb-collection-payload-fields
collection)
+ ", ")
+ (vecdb-psql-table-name
(vecdb-collection-name collection))
+ limit-clause)
+ (list (cons vector "vector")))
+ :tuples))))
+
+(provide 'vecdb-psql)
+
+;;; vecdb-psql.el ends here
diff --git a/vecdb.el b/vecdb.el
index 78179c804b..a0bb0fae5f 100644
--- a/vecdb.el
+++ b/vecdb.el
@@ -4,8 +4,8 @@
;; Author: Andrew Hyatt <[email protected]>
;; Homepage: https://github.com/ahyatt/vecdb
-;; Package-Requires: ((emacs "29.1") (plz "0.8"))
-;; Package-Version: 0.1
+;; Package-Requires: ((emacs "29.1") (plz "0.8") (pg "0.56"))
+;; Package-Version: 0.2
;; SPDX-License-Identifier: GPL-3.0-or-later
;;
;; This program is free software; you can redistribute it and/or