branch: externals/vecdb commit c5e2c63b4596dc82a9fa09452d5ea882f0b864fa Author: Andrew Hyatt <ahy...@gmail.com> Commit: Andrew Hyatt <ahy...@gmail.com>
Add first version of Postgres integration --- vecdb-psql.el | 225 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) diff --git a/vecdb-psql.el b/vecdb-psql.el new file mode 100644 index 0000000000..1e0aa6abb8 --- /dev/null +++ b/vecdb-psql.el @@ -0,0 +1,225 @@ +;;; vecdb-psql.el --- An interface to postgres with vector extension -*- lexical-binding: t; -*- + +;; Copyright (c) 2025 Free Software Foundation, Inc. + +;; Author: Andrew Hyatt <ahy...@gmail.com> +;; Homepage: https://github.com/ahyatt/vecdb +;; SPDX-License-Identifier: GPL-3.0-or-later +;; +;; This program is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 3 of the +;; License, or (at your option) any later version. +;; +;; This program is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. + +;;; Commentary: +;; This package provides an implementation of vecdb for using with postgres. + +;;; Code: + +(require 'vecdb) +(require 'pg) +(require 'cl-lib) +(require 'map) +(require 'seq) + +(cl-defstruct (vecdb-psql (:include vecdb-provider + (name "postgres"))) + "Provider for the vector database. +DBNAME is the database name, which must have been created by the user." + dbname + username + (password "")) + +(defconst vecdb-psql-connection-cache + (make-hash-table :test 'equal) + "Cache for database connections by db name.") + +(defun vecdb-psql-get-connection (provider) + "Get a connection to the database specified by PROVIDER." + (let* ((key (vecdb-psql-dbname provider)) + (connection (gethash key vecdb-psql-connection-cache))) + (unless connection + (setq connection + (pg-connect + (vecdb-psql-dbname provider) + (vecdb-psql-username provider) + (vecdb-psql-password provider))) + (puthash key connection vecdb-psql-connection-cache)) + connection)) + +(cl-defmethod vecdb-create ((provider vecdb-psql) + (collection vecdb-collection)) + (pg-exec (vecdb-psql-get-connection provider) + (format "CREATE TABLE IF NOT EXISTS %s ( + id INTEGER PRIMARY KEY, + vector VECTOR(%d) NOT NULL, + %s + );" + (vecdb-collection-name collection) + (vecdb-collection-vector-size collection) + (mapconcat + (lambda (field) + (format "%s %s NULL" + (car field) + (pcase (cdr field) + ('string "TEXT") + ('integer "INTEGER") + ('float "FLOAT") + (_ (error "Unsupported field type: %s" (cdr field)))))) + (vecdb-collection-payload-fields collection) + ", "))) + (pg-exec (vecdb-psql-get-connection provider) + (format "CREATE INDEX IF NOT EXISTS %s_embedding_hnsw_idx ON %s USING hnsw (vector vector_cosine_ops)" + (vecdb-collection-name collection) + (vecdb-collection-name collection))) + (mapc (lambda (field) + (pg-exec (vecdb-psql-get-connection provider) + (format "CREATE INDEX IF NOT EXISTS %s_%s_idx ON %s (%s)" + (vecdb-collection-name collection) + (car field) + (vecdb-collection-name collection) + (car field)))) + (vecdb-collection-payload-fields collection))) + +(cl-defmethod vecdb-delete ((provider vecdb-psql) + (collection vecdb-collection)) + (pg-exec (vecdb-psql-get-connection provider) + (format "DROP TABLE IF EXISTS %s;" + (vecdb-collection-name collection)))) + +(cl-defmethod vecdb-exists ((provider vecdb-psql) + (collection vecdb-collection)) + "Check if the collection exists in the database." + (let ((result + (pg-exec (vecdb-psql-get-connection provider) + (format "SELECT EXISTS ( + SELECT FROM information_schema.tables + WHERE table_name = '%s' + );" + (vecdb-collection-name collection))))) + (and result + (equal (caar (pg-result result :tuples)) t)))) + +(defun vecdb-psql--plist-keys (plist) + "Return a list of keys from PLIST, as strings with the colon removed." + (cl-loop for (k _v) on plist by #'cddr + collect (substring (symbol-name k) 1))) + +(cl-defmethod vecdb-upsert-items ((provider vecdb-psql) + (collection vecdb-collection) + data-list &optional _) + "Upsert items into the collection in the database. +All items in DATA-LIST must have the same paylaods." + (pg-exec (vecdb-psql-get-connection provider) + (format "INSERT INTO %s (id, vector, %s) VALUES %s + ON CONFLICT (id) DO UPDATE SET vector = EXCLUDED.vector, %s;" + (vecdb-collection-name collection) + ;; We assume every vecdb-item has the same payload structure + (mapconcat #'identity (vecdb-psql--plist-keys + (vecdb-item-payload (car data-list))) + ", ") + (mapconcat + (lambda (item) + (format "(%d, '[%s]'::vector, %s)" + (vecdb-item-id item) + (mapconcat + (lambda (v) + (format "%s" v)) + (vecdb-item-vector item) + ", ") + (mapconcat + (lambda (key) + (format "'%s'" (plist-get (vecdb-item-payload item) + (intern (format ":%s" key))))) + (vecdb-psql--plist-keys (vecdb-item-payload item)) + ", "))) + data-list + ", ") + (mapconcat + (lambda (field) + (format "%s = EXCLUDED.%s" (car field) (car field))) + (vecdb-collection-payload-fields collection) + ", ")))) + +(defun vecdb-psql--full-row-to-item (row collection) + "Convert a full database row ROW into a vecdb-item for COLLECTION." + (make-vecdb-item + :id (nth 0 row) + :vector (nth 1 row) + :payload + (flatten-list (cl-loop for field in (vecdb-collection-payload-fields collection) + collect + (list (intern (format ":%s" (car field))) + (nth (+ 2 (cl-position field + (vecdb-collection-payload-fields collection) + :test #'equal)) + row)))))) + +(cl-defmethod vecdb-get-item ((provider vecdb-psql) + (collection vecdb-collection) + id) + "Get an item from the collection by ID." + (let ((result + (pg-result + (pg-exec (vecdb-psql-get-connection provider) + (format "SELECT id, vector::vector, %s FROM %s WHERE id = %d;" + (mapconcat + (lambda (field) + (car field)) + (vecdb-collection-payload-fields collection) + ", ") + (vecdb-collection-name collection) + id)) + :tuples))) + (when result + (vecdb-psql--full-row-to-item (car result) collection)))) + +(cl-defmethod vecdb-delete-items ((provider vecdb-psql) + (collection vecdb-collection) + ids &optional _) + "Delete items from the collection by IDs." + (when ids + (pg-exec (vecdb-psql-get-connection provider) + (format "DELETE FROM %s WHERE id IN (%s);" + (vecdb-collection-name collection) + (mapconcat #'number-to-string ids ", "))))) + +(cl-defmethod vecdb-search-by-vector ((provider vecdb-psql) + (collection vecdb-collection) + vector + &optional limit) + "Search for items in the collection by VECTOR." + (let ((limit-clause (if limit + (format "LIMIT %d" limit) + ""))) + (mapcar (lambda (row) + (vecdb-psql--full-row-to-item row collection)) + (pg-result + (pg-exec (vecdb-psql-get-connection provider) + (format "SELECT id, vector::vector, %s FROM %s + ORDER BY vector <-> '[%s]'::vector %s;" + (mapconcat + (lambda (field) + (car field)) + (vecdb-collection-payload-fields collection) + ", ") + (vecdb-collection-name collection) + (mapconcat + (lambda (v) + (format "%s" v)) + vector + ", ") + limit-clause)) + :tuples)))) + +(provider 'vecdb-psql) + +;;; vecdb-psql.el ends here