branch: externals/vecdb
commit a296dd7075c6d3e4982ab8db49606f15616d7923
Merge: 169b7cb9e8 d7247e32cb
Author: Andrew Hyatt <ahy...@gmail.com>
Commit: GitHub <nore...@github.com>

    Add postgres implementation of vecdb with pgvector
---
 Eldev                     |   3 +-
 README.org                |  11 ++
 vecdb-integration-test.el |  36 +++++--
 vecdb-psql.el             | 254 ++++++++++++++++++++++++++++++++++++++++++++++
 vecdb.el                  |   2 +-
 5 files changed, 297 insertions(+), 9 deletions(-)

diff --git a/Eldev b/Eldev
index ad2d86f3ce..43c29f47cf 100644
--- a/Eldev
+++ b/Eldev
@@ -1,4 +1,5 @@
-; -*- mode: emacs-lisp; lexical-binding: t -*-
+                                        ; -*- mode: emacs-lisp; 
lexical-binding: t -*-
 
 (eldev-use-package-archive 'gnu-elpa)
+(eldev-use-package-archive 'nongnu-elpa)
 (eldev-use-plugin 'maintainer)
diff --git a/README.org b/README.org
index 8643592f65..af9249d08a 100644
--- a/README.org
+++ b/README.org
@@ -86,3 +86,14 @@ However, the full set of options, here demonstrating the 
equivalent settings to
                             :tenant "default"
                             :database "default"))
 #+end_src
+** Postgres with pgvector
+The popular database Postgres has an extension that allows it to have vector 
database functionality, [[https://github.com/pgvector/pgvector][pgvector]].  
This needs the =pg-el= library.
+
+A provider defines a database, and the collection will define a table with the 
collection name in that database.
+
+For example,
+#+begin_src emacs-lisp
+(defvar my-postgres-provider (make-vecdb-psqlprovider :dbname "mydatabase" 
:username "myuser"))
+#+end_src
+
+This also takes an optional password as well.  For now, this just uses 
localhost as a default.
diff --git a/vecdb-integration-test.el b/vecdb-integration-test.el
index 35f670ba0d..f99ef5c295 100644
--- a/vecdb-integration-test.el
+++ b/vecdb-integration-test.el
@@ -33,6 +33,7 @@
 ;;                 CHROMA_DATABASE (optional, defaults to "default")
 ;;     For Qdrant: QDRANT_URL (e.g., "http://localhost:6333";)
 ;;                 QDRANT_API_KEY (e.g., "your-api-key")
+;;     For Postgres: PSQL_DB (should exist already), PSQL_USERNAME, 
PSQL_PASSWORD (optional)
 ;;  3. Execute from the command line:
 ;;     emacs -batch -l ert -l vecdb-integration-test.el -f 
ert-run-tests-batch-and-exit
 ;;
@@ -46,6 +47,7 @@
 (require 'vecdb)
 (require 'vecdb-chroma)
 (require 'vecdb-qdrant)
+(require 'vecdb-psql)
 (require 'cl-lib) ;; For cl-remove-if-not, cl-every
 
 (declare-function chroma-ext--tmp-project-dir "ext-chroma")
@@ -75,7 +77,18 @@ Skips tests if no providers are configured."
                     (make-vecdb-qdrant-provider
                      :url qdrant-url
                      :api-key qdrant-api-key)
-                  (warn "QDRANT_URL is set, but QDRANT_API_KEY is missing. 
Qdrant provider will not be configured.")))))))
+                  (warn "QDRANT_URL is set, but QDRANT_API_KEY is missing. 
Qdrant provider will not be configured.")))))
+
+          ;; Postgres Configuration
+          (let ((postgres-db (getenv "PSQL_DB"))
+                (postgres-username (getenv "PSQL_USERNAME"))
+                (postgres-password (getenv "PSQL_PASSWORD")))
+
+            (when postgres-username
+              (make-vecdb-psql-provider
+               :dbname postgres-db
+               :username postgres-username
+               :password postgres-password)))))
 
    (progn
      (ert-skip "No vector database provider environment variables set. 
(CHROMA_URL or QDRANT_URL must be set)")
@@ -92,6 +105,7 @@ itself might globally skip if no providers at all are 
configured)."
   (declare (indent defun))
   (let ((chroma-test-name (intern (format "%s-chroma" base-name)))
         (qdrant-test-name (intern (format "%s-qdrant" base-name)))
+        (psql-test-name (intern (format "%s-psql" base-name)))
         (base-doc (or docstring (format "Test %s for a vector database 
provider." base-name))))
     `(progn
        (ert-deftest ,chroma-test-name ()
@@ -110,7 +124,15 @@ itself might globally skip if no providers at all are 
configured)."
                                              (vecdb-test--get-providers))))
            (if current-provider
                (funcall ,body-function current-provider)
-             (ert-skip (format "Qdrant provider not configured for %s" 
',qdrant-test-name))))))))
+             (ert-skip (format "Qdrant provider not configured for %s" 
',qdrant-test-name)))))
+       (ert-deftest ,psql-test-name ()
+         ,(format "%s (Postgres)" base-doc)
+         (interactive)
+         (let ((current-provider (cl-find-if (lambda (p) (eq (type-of p) 
'vecdb-psql-provider))
+                                             (vecdb-test--get-providers))))
+           (if current-provider
+               (funcall ,body-function current-provider)
+             (ert-skip (format "Postgres provider not configured for %s" 
',psql-test-name))))))))
 
 (defmacro with-test-collection (current-provider collection-var 
collection-name-base options &rest body)
   "Execute BODY with COLLECTION-VAR bound to a new collection.
@@ -122,11 +144,11 @@ The full collection name is generated by appending the 
provider's name.
 The collection is created before BODY and deleted afterwards."
   (declare (indent 1) (debug t))
   (let ((full-collection-name (gensym "full-collection-name-"))
-        (vector-size-val (gensym "vector-size-"))
         (default-vector-size 3))
     `(let* ((,full-collection-name (format "%s-%s" ,collection-name-base 
(vecdb-provider-name ,current-provider)))
-            (,vector-size-val (or (plist-get ,options :vector-size) 
,default-vector-size))
-            (,collection-var (make-vecdb-collection :name 
,full-collection-name :vector-size ,vector-size-val)))
+            (,collection-var (make-vecdb-collection :name ,full-collection-name
+                                                    :vector-size (or 
(plist-get ,options :vector-size) ,default-vector-size)
+                                                    :payload-fields (plist-get 
,options :payload-fields))))
        (unwind-protect
            (progn
              (vecdb-create ,current-provider ,collection-var)
@@ -165,7 +187,7 @@ The collection is created before BODY and deleted 
afterwards."
                  (make-vecdb-item :id 1 :vector [0 1 2] :payload '(:val 1))
                  (make-vecdb-item :id 2 :vector [0 1 2] :payload '(:val 2))
                  (make-vecdb-item :id 3 :vector [0 1 2] :payload '(:val 3)))))
-    (with-test-collection current-provider current-collection collection-name 
`(:vector-size ,vector-size)
+    (with-test-collection current-provider current-collection collection-name 
`(:vector-size ,vector-size :payload-fields ((val . integer)))
                           (vecdb-upsert-items current-provider 
current-collection items t)
                           (dolist (item items)
                             (let ((retrieved-item (vecdb-get-item 
current-provider current-collection (vecdb-item-id item))))
@@ -190,7 +212,7 @@ The collection is created before BODY and deleted 
afterwards."
          (item2 (make-vecdb-item :id 2 :vector [0.4 0.5 0.6] :payload '(:val 
2)))
          (item3 (make-vecdb-item :id 3 :vector [0.7 0.8 0.9] :payload '(:val 
3)))
          (items (list item1 item2 item3)))
-    (with-test-collection current-provider current-collection collection-name 
`(:vector-size ,vector-size)
+    (with-test-collection current-provider current-collection collection-name 
`(:vector-size ,vector-size :payload-fields ((val . integer)))
                           (vecdb-upsert-items current-provider 
current-collection items t)
                           ;; Search for a vector similar to item2
                           (let ((results (vecdb-search-by-vector 
current-provider current-collection [0.41 0.51 0.61] 3)))
diff --git a/vecdb-psql.el b/vecdb-psql.el
new file mode 100644
index 0000000000..40dc662180
--- /dev/null
+++ b/vecdb-psql.el
@@ -0,0 +1,254 @@
+;;; vecdb-psql.el --- An interface to postgres with vector extension -*- 
lexical-binding: t; -*-
+
+;; Copyright (c) 2025  Free Software Foundation, Inc.
+
+;; Author: Andrew Hyatt <ahy...@gmail.com>
+;; Homepage: https://github.com/ahyatt/vecdb
+;; SPDX-License-Identifier: GPL-3.0-or-later
+;;
+;; This program is free software; you can redistribute it and/or
+;; modify it under the terms of the GNU General Public License as
+;; published by the Free Software Foundation; either version 3 of the
+;; License, or (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+;; This package provides an implementation of vecdb for using with postgres.
+
+;;; Code:
+
+(require 'vecdb)
+(require 'pg)
+(require 'cl-lib)
+(require 'map)
+(require 'seq)
+
+(cl-defstruct (vecdb-psql-provider (:include vecdb-provider
+                                             (name "postgres")))
+  "Provider for the vector database.
+DBNAME is the database name, which must have been created by the user."
+  dbname
+  username
+  (password ""))
+
+(defconst vecdb-psql-connection-cache
+  (make-hash-table :test 'equal)
+  "Cache for database connections by db name.")
+
+(defun vecdb-psql-get-connection (provider)
+  "Get a connection to the database specified by PROVIDER."
+  (let* ((key (vecdb-psql-provider-dbname provider))
+         (connection (gethash key vecdb-psql-connection-cache)))
+    (unless connection
+      (setq connection
+            (pg-connect
+             (vecdb-psql-provider-dbname provider)
+             (vecdb-psql-provider-username provider)
+             (vecdb-psql-provider-password provider)))
+      (puthash key connection vecdb-psql-connection-cache))
+    connection))
+
+(defun vecdb-psql-table-name (collection-name)
+  "Turn COLLECTION-NAME into a safe table name."
+  (replace-regexp-in-string "[^a-zA-Z0-9_]" "_" (downcase collection-name)))
+
+(defun vecdb-psql-type (collection-type)
+  "Convert COLLECTION-TYPE to a PostgreSQL type string."
+  (pcase collection-type
+    ('string "TEXT")
+    ('integer "INTEGER")
+    ('float "FLOAT")
+    (_ (error "Unsupported field type: %s" collection-type))))
+
+(defun vecdb-psql-oid (collection-type)
+  "Convert COLLECTION-TYPE to a psql OID."
+  (pcase collection-type
+    ('string "text")
+    ('integer "int8")
+    ('float "float8")
+    (_ (error "Unsupported field type: %s" collection-type))))
+
+(cl-defmethod vecdb-create ((provider vecdb-psql-provider)
+                            (collection vecdb-collection))
+  "Create COLLECTION in database PROVIDER."
+  (pg-exec (vecdb-psql-get-connection provider)
+           (format "CREATE TABLE IF NOT EXISTS %s (
+                     id INTEGER PRIMARY KEY,
+                     vector VECTOR(%d) NOT NULL%s
+                     %s
+                   );"
+                   (vecdb-psql-table-name (vecdb-collection-name collection))
+                   (vecdb-collection-vector-size collection)
+                   (if (vecdb-collection-payload-fields collection) "," "")
+                   (mapconcat
+                    (lambda (field)
+                      (format "%s %s NULL"
+                              (car field)
+                              (vecdb-psql-type (cdr field))))
+                    (vecdb-collection-payload-fields collection)
+                    ", ")))
+  (pg-exec (vecdb-psql-get-connection provider)
+           (format "CREATE INDEX IF NOT EXISTS %s_embedding_hnsw_idx ON %s 
USING hnsw (vector vector_cosine_ops)"
+                   (vecdb-psql-table-name (vecdb-collection-name collection))
+                   (vecdb-psql-table-name (vecdb-collection-name collection))))
+  (mapc (lambda (field)
+          (pg-exec (vecdb-psql-get-connection provider)
+                   (format "CREATE INDEX IF NOT EXISTS %s_%s_idx ON %s (%s)"
+                           (vecdb-psql-table-name (vecdb-collection-name 
collection))
+                           (car field)
+                           (vecdb-psql-table-name (vecdb-collection-name 
collection))
+                           (car field))))
+        (vecdb-collection-payload-fields collection)))
+
+(cl-defmethod vecdb-delete ((provider vecdb-psql-provider)
+                            (collection vecdb-collection))
+  "Delete COLLECTION from database PROVIDER."
+  (pg-exec (vecdb-psql-get-connection provider)
+           (format "DROP TABLE IF EXISTS %s;"
+                   (vecdb-psql-table-name (vecdb-collection-name 
collection)))))
+
+(cl-defmethod vecdb-exists ((provider vecdb-psql-provider)
+                            (collection vecdb-collection))
+  "Check if the COLLECTION exists in the database specified by PROVIDER."
+  (let ((result
+         (pg-exec (vecdb-psql-get-connection provider)
+                  (format "SELECT EXISTS (
+                            SELECT FROM information_schema.tables
+                            WHERE table_name = '%s'
+                          );"
+                          (vecdb-psql-table-name (vecdb-collection-name 
collection))))))
+    (and result
+         (equal (caar (pg-result result :tuples)) t))))
+
+(defun vecdb-psql--plist-keys (plist)
+  "Return a list of keys from PLIST, as strings with the colon removed."
+  (cl-loop for (k _v) on plist by #'cddr
+           collect (substring (symbol-name k) 1)))
+
+(cl-defmethod vecdb-upsert-items ((provider vecdb-psql-provider)
+                                  (collection vecdb-collection)
+                                  data-list &optional _)
+  "Upsert items into the COLLECTION in the database PROVIDER.
+All items in DATA-LIST must have the same payloads."
+  (let ((arg-count 0))
+    (funcall #'pg-exec-prepared
+             (vecdb-psql-get-connection provider)
+             (format "INSERT INTO %s (id, vector%s%s) VALUES %s
+                    ON CONFLICT (id) DO UPDATE SET vector = 
EXCLUDED.vector%s%s;"
+                     (vecdb-psql-table-name (vecdb-collection-name collection))
+                     (if (vecdb-collection-payload-fields collection) ", " "")
+                     ;; We assume every vecdb-item has the same payload 
structure
+                     (mapconcat #'identity (vecdb-psql--plist-keys
+                                            (vecdb-item-payload (car 
data-list)))
+                                ", ")
+                     (mapconcat (lambda (item)
+                                  (format "(%s)"
+                                          (string-join (cl-loop for i from 1 
below (+ 2 (length (vecdb-item-payload item)))
+                                                                do (cl-incf 
arg-count)
+                                                                collect 
(format "$%d" arg-count))
+                                                       ", ")))
+                                data-list
+                                ", ")
+                     (if (vecdb-collection-payload-fields collection) ", " "")
+                     (mapconcat
+                      (lambda (field)
+                        (format "%s = EXCLUDED.%s" (car field) (car field)))
+                      (vecdb-collection-payload-fields collection)
+                      ", "))
+             (mapcan (lambda (item)
+                       (append
+                        (list
+                         (cons (vecdb-item-id item) "int8")
+                         (cons (vecdb-item-vector item) "vector"))
+                        (mapcar (lambda (payload-key)
+                                  (cons (plist-get (vecdb-item-payload item) 
payload-key)
+                                        (vecdb-psql-oid (assoc-default
+                                                         (intern (substring 
(symbol-name payload-key) 1))
+                                                         
(vecdb-collection-payload-fields collection)))))
+                                (map-keys (vecdb-item-payload (car 
data-list))))))
+                     data-list))))
+
+(defun vecdb-psql--full-row-to-item (row collection)
+  "Convert a full database row ROW into a vecdb-item for COLLECTION."
+  (make-vecdb-item
+   :id (nth 0 row)
+   :vector (nth 1 row)
+   :payload
+   (flatten-list (cl-loop for field in (vecdb-collection-payload-fields 
collection)
+                          collect
+                          (list (intern (format ":%s" (car field)))
+                                (nth (+ 2 (cl-position field
+                                                       
(vecdb-collection-payload-fields collection)
+                                                       :test #'equal))
+                                     row))))))
+
+(cl-defmethod vecdb-get-item ((provider vecdb-psql-provider)
+                              (collection vecdb-collection)
+                              id)
+  "Get an item from COLLECTION by ID.
+PROVIDER specifies the database that the collection is in."
+  (let ((result
+         (pg-result
+          (pg-exec-prepared (vecdb-psql-get-connection provider)
+                            (format "SELECT id, vector::vector%s %s FROM %s 
WHERE id = $1;"
+                                    (if (vecdb-collection-payload-fields 
collection) ", " "")
+                                    (mapconcat
+                                     (lambda (field)
+                                       (format "%s" (car field)))
+                                     (vecdb-collection-payload-fields 
collection)
+                                     ", ")
+                                    (vecdb-psql-table-name 
(vecdb-collection-name collection)))
+                            (list (cons id "int8")))
+          :tuples)))
+    (when result
+      (vecdb-psql--full-row-to-item (car result) collection))))
+
+(cl-defmethod vecdb-delete-items ((provider vecdb-psql-provider)
+                                  (collection vecdb-collection)
+                                  ids &optional _)
+  "Delete items from COLLECTION by IDs.
+PROVIDER is the database that the collection is in."
+  (when ids
+    ;; TODO: This should ideally be a prepared statement, but I dont know how 
to do
+    ;; this with psql.
+    (pg-exec (vecdb-psql-get-connection provider)
+             (format "DELETE FROM %s WHERE id IN (%s);"
+                     (vecdb-psql-table-name (vecdb-collection-name collection))
+                     (mapconcat #'number-to-string ids ", ")))))
+
+(cl-defmethod vecdb-search-by-vector ((provider vecdb-psql-provider)
+                                      (collection vecdb-collection)
+                                      vector
+                                      &optional limit)
+  "Search for items in COLLECTION by VECTOR.
+PROVIDER is the database that the collection is in."
+  (let ((limit-clause (if limit
+                          (format "LIMIT %d" limit)
+                        "")))
+    (mapcar (lambda (row)
+              (vecdb-psql--full-row-to-item row collection))
+            (pg-result
+             (pg-exec-prepared (vecdb-psql-get-connection provider)
+                               (format "SELECT id, vector::vector%s %s FROM %s
+                      ORDER BY vector <-> $1 %s;"
+                                       (if (vecdb-collection-payload-fields 
collection) ", " "")
+                                       (mapconcat
+                                        (lambda (field)
+                                          (format "%s" (car field)))
+                                        (vecdb-collection-payload-fields 
collection)
+                                        ", ")
+                                       (vecdb-psql-table-name 
(vecdb-collection-name collection))
+                                       limit-clause)
+                               (list (cons vector "vector")))
+             :tuples))))
+
+(provide 'vecdb-psql)
+
+;;; vecdb-psql.el ends here
diff --git a/vecdb.el b/vecdb.el
index 78179c804b..f56d7baf7b 100644
--- a/vecdb.el
+++ b/vecdb.el
@@ -4,7 +4,7 @@
 
 ;; Author: Andrew Hyatt <ahy...@gmail.com>
 ;; Homepage: https://github.com/ahyatt/vecdb
-;; Package-Requires: ((emacs "29.1") (plz "0.8"))
+;; Package-Requires: ((emacs "29.1") (plz "0.8") (pg "0.56"))
 ;; Package-Version: 0.1
 ;; SPDX-License-Identifier: GPL-3.0-or-later
 ;;

Reply via email to