Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-annoy for openSUSE:Factory checked in at 2023-01-02 16:38:24 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-annoy (Old) and /work/SRC/openSUSE:Factory/.python-annoy.new.1563 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-annoy" Mon Jan 2 16:38:24 2023 rev:12 rq:1046206 version:1.17.1 Changes: -------- --- /work/SRC/openSUSE:Factory/python-annoy/python-annoy.changes 2021-04-19 21:06:10.232044770 +0200 +++ /work/SRC/openSUSE:Factory/.python-annoy.new.1563/python-annoy.changes 2023-01-02 16:38:25.592868169 +0100 @@ -1,0 +2,6 @@ +Mon Jan 2 14:23:16 UTC 2023 - Dirk Müller <dmuel...@suse.com> + +- update to v1.17.1: + * Add -fpermissive plus some other minor things + +------------------------------------------------------------------- Old: ---- v1.17.0.tar.gz New: ---- v1.17.1.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-annoy.spec ++++++ --- /var/tmp/diff_new_pack.ND5n9H/_old 2023-01-02 16:38:26.052870643 +0100 +++ /var/tmp/diff_new_pack.ND5n9H/_new 2023-01-02 16:38:26.060870686 +0100 @@ -1,7 +1,7 @@ # # spec file for package python-annoy # -# Copyright (c) 2021 SUSE LLC +# Copyright (c) 2023 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -19,7 +19,7 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} %define skip_python36 1 Name: python-annoy -Version: 1.17.0 +Version: 1.17.1 Release: 0 Summary: Approximation of Nearest Neighbors License: Apache-2.0 ++++++ denose.patch ++++++ --- /var/tmp/diff_new_pack.ND5n9H/_old 2023-01-02 16:38:26.084870815 +0100 +++ /var/tmp/diff_new_pack.ND5n9H/_new 2023-01-02 16:38:26.088870836 +0100 @@ -1,7 +1,7 @@ -Index: annoy-1.17.0/test/accuracy_test.py +Index: annoy-1.17.1/test/accuracy_test.py =================================================================== ---- annoy-1.17.0.orig/test/accuracy_test.py -+++ annoy-1.17.0/test/accuracy_test.py +--- annoy-1.17.1.orig/test/accuracy_test.py ++++ annoy-1.17.1/test/accuracy_test.py @@ -24,7 +24,6 @@ try: except ImportError: from urllib.request import urlretrieve # Python 3 @@ -10,10 +10,10 @@ class AccuracyTest(unittest.TestCase): def _get_index(self, dataset): -Index: annoy-1.17.0/test/hamming_index_test.py +Index: annoy-1.17.1/test/hamming_index_test.py =================================================================== ---- annoy-1.17.0.orig/test/hamming_index_test.py -+++ annoy-1.17.0/test/hamming_index_test.py +--- annoy-1.17.1.orig/test/hamming_index_test.py ++++ annoy-1.17.1/test/hamming_index_test.py @@ -12,11 +12,11 @@ # License for the specific language governing permissions and limitations under # the License. @@ -36,12 +36,12 @@ def test_zero_vectors(self): # Mentioned on the annoy-user list bitstrings = [ -Index: annoy-1.17.0/setup.py +Index: annoy-1.17.1/setup.py =================================================================== ---- annoy-1.17.0.orig/setup.py -+++ annoy-1.17.0/setup.py -@@ -84,6 +84,5 @@ setup(name='annoy', - 'Programming Language :: Python :: 3.6', +--- annoy-1.17.1.orig/setup.py ++++ annoy-1.17.1/setup.py +@@ -88,6 +88,5 @@ setup(name='annoy', + 'Programming Language :: Python :: 3.9', ], keywords='nns, approximate nearest neighbor search', - setup_requires=['nose>=1.0'], ++++++ reproducible.patch ++++++ --- /var/tmp/diff_new_pack.ND5n9H/_old 2023-01-02 16:38:26.100870901 +0100 +++ /var/tmp/diff_new_pack.ND5n9H/_new 2023-01-02 16:38:26.104870923 +0100 @@ -3,12 +3,12 @@ https://bugzilla.opensuse.org/show_bug.cgi?id=1100677 -Index: annoy-1.17.0/setup.py +Index: annoy-1.17.1/setup.py =================================================================== ---- annoy-1.17.0.orig/setup.py -+++ annoy-1.17.0/setup.py +--- annoy-1.17.1.orig/setup.py ++++ annoy-1.17.1/setup.py @@ -39,17 +39,6 @@ with codecs.open('README.rst', encoding= - extra_compile_args = ['-D_CRT_SECURE_NO_WARNINGS'] + extra_compile_args = ['-D_CRT_SECURE_NO_WARNINGS', '-fpermissive'] extra_link_args = [] -# Not all CPUs have march as a tuning parameter ++++++ v1.17.0.tar.gz -> v1.17.1.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.17.0/CMakeLists.txt new/annoy-1.17.1/CMakeLists.txt --- old/annoy-1.17.0/CMakeLists.txt 1970-01-01 01:00:00.000000000 +0100 +++ new/annoy-1.17.1/CMakeLists.txt 2022-08-08 11:31:20.000000000 +0200 @@ -0,0 +1,21 @@ +cmake_minimum_required(VERSION 3.14) +project(Annoy + DESCRIPTION "Approximate Nearest Neighbors Oh Yeah" + VERSION 1.17.0 + LANGUAGES CXX +) + +add_library(Annoy INTERFACE) + +set(ANNOY_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include/annoy) +file(MAKE_DIRECTORY ${ANNOY_INCLUDE_DIR}) +foreach (HEADER annoylib.h kissrandom.h mman.h) + file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/src/${HEADER} DESTINATION ${ANNOY_INCLUDE_DIR}) +endforeach() + +target_include_directories(Annoy INTERFACE include/) + +if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) + # Add Python set-up code here. +endif() + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.17.0/LICENSE new/annoy-1.17.1/LICENSE --- old/annoy-1.17.0/LICENSE 2020-09-18 18:03:55.000000000 +0200 +++ new/annoy-1.17.1/LICENSE 2022-08-08 11:31:20.000000000 +0200 @@ -187,7 +187,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright [yyyy] [name of copyright owner] + Copyright 2021 (c) Spotify and its affiliates. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.17.0/README.rst new/annoy-1.17.1/README.rst --- old/annoy-1.17.0/README.rst 2020-09-18 18:03:55.000000000 +0200 +++ new/annoy-1.17.1/README.rst 2022-08-08 11:31:20.000000000 +0200 @@ -56,8 +56,9 @@ from annoy import AnnoyIndex import random - f = 40 - t = AnnoyIndex(f, 'angular') # Length of item vector that will be indexed + f = 40 # Length of item vector that will be indexed + + t = AnnoyIndex(f, 'angular') for i in range(1000): v = [random.gauss(0, 1) for z in range(f)] t.add_item(i, v) @@ -136,6 +137,7 @@ * `Boris Nagaev <https://github.com/starius>`__ wrote `Lua bindings <https://github.com/spotify/annoy/blob/master/README_Lua.md>`__. * During part of Spotify Hack Week 2016 (and a bit afterward), `Jim Kang <https://github.com/jimkang>`__ wrote `Node bindings <https://github.com/jimkang/annoy-node>`__ for Annoy. * `Min-Seok Kim <https://github.com/mskimm>`__ built a `Scala version <https://github.com/mskimm/ann4s>`__ of Annoy. +* `hanabi1224 <https://github.com/hanabi1224>`__ built a read-only `Rust version <https://github.com/hanabi1224/RuAnnoy>`__ of Annoy, together with **dotnet, jvm and dart** read-only bindings. * `Presentation from New York Machine Learning meetup <http://www.slideshare.net/erikbern/approximate-nearest-neighbor-methods-and-vector-models-nyc-ml-meetup>`__ about Annoy * Annoy is available as a `conda package <https://anaconda.org/conda-forge/python-annoy>`__ on Linux, OS X, and Windows. * `ann-benchmarks <https://github.com/erikbern/ann-benchmarks>`__ is a benchmark for several approximate nearest neighbor libraries. Annoy seems to be fairly competitive, especially at higher precisions: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.17.0/README_GO.rst new/annoy-1.17.1/README_GO.rst --- old/annoy-1.17.0/README_GO.rst 2020-09-18 18:03:55.000000000 +0200 +++ new/annoy-1.17.1/README_GO.rst 2022-08-08 11:31:20.000000000 +0200 @@ -7,6 +7,7 @@ mkdir -p $GOPATH/src/annoyindex cp src/annoygomodule_wrap.cxx src/annoyindex.go src/annoygomodule.h src/annoylib.h src/kissrandom.h test/annoy_test.go $GOPATH/src/annoyindex cd $GOPATH/src/annoyindex + go mod init annoyindex go get -t ... go test go build @@ -67,6 +68,6 @@ Discuss ------- -There might be some memory leaks. +There might be some memory leaks. See [this issue](https://github.com/swig/swig/issues/2292). Go glue written by Taneli Leppä (@rosmo). You can contact me via email (see https://github.com/rosmo). diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.17.0/annoy/__init__.pyi new/annoy-1.17.1/annoy/__init__.pyi --- old/annoy-1.17.0/annoy/__init__.pyi 1970-01-01 01:00:00.000000000 +0100 +++ new/annoy-1.17.1/annoy/__init__.pyi 2022-08-08 11:31:20.000000000 +0200 @@ -0,0 +1,45 @@ + +from typing import Sized, overload +from typing_extensions import Literal, Protocol + +class _Vector(Protocol, Sized): + def __getitem__(self, i: int) -> float: ... + +class AnnoyIndex: + f: int + def __init__(self, f: int, metric: Literal["angular", "euclidean", "manhattan", "hamming", "dot"]) -> None: ... + def load(self, fn: str, prefault: bool = ...) -> Literal[True]: ... + def save(self, fn: str, prefault: bool = ...) -> Literal[True]: ... + @overload + def get_nns_by_item(self, i: int, n: int, search_k: int = ..., include_distances: Literal[False] = ...) -> list[int]: ... + @overload + def get_nns_by_item( + self, i: int, n: int, search_k: int, include_distances: Literal[True] + ) -> tuple[list[int], list[float]]: ... + @overload + def get_nns_by_item( + self, i: int, n: int, search_k: int = ..., *, include_distances: Literal[True] + ) -> tuple[list[int], list[float]]: ... + @overload + def get_nns_by_vector( + self, vector: _Vector, n: int, search_k: int = ..., include_distances: Literal[False] = ... + ) -> list[int]: ... + @overload + def get_nns_by_vector( + self, vector: _Vector, n: int, search_k: int, include_distances: Literal[True] + ) -> tuple[list[int], list[float]]: ... + @overload + def get_nns_by_vector( + self, vector: _Vector, n: int, search_k: int = ..., *, include_distances: Literal[True] + ) -> tuple[list[int], list[float]]: ... + def get_item_vector(self, __i: int) -> list[float]: ... + def add_item(self, i: int, vector: _Vector) -> None: ... + def on_disk_build(self, fn: str) -> Literal[True]: ... + def build(self, n_trees: int, n_jobs: int = ...) -> Literal[True]: ... + def unbuild(self) -> Literal[True]: ... + def unload(self) -> Literal[True]: ... + def get_distance(self, __i: int, __j: int) -> float: ... + def get_n_items(self) -> int: ... + def get_n_trees(self) -> int: ... + def verbose(self, __v: bool) -> Literal[True]: ... + def set_seed(self, __s: int) -> None: ... diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.17.0/setup.py new/annoy-1.17.1/setup.py --- old/annoy-1.17.0/setup.py 2020-09-18 18:03:55.000000000 +0200 +++ new/annoy-1.17.1/setup.py 2022-08-08 11:31:20.000000000 +0200 @@ -36,7 +36,7 @@ long_description = readme_note + fobj.read() # Various platform-dependent extras -extra_compile_args = ['-D_CRT_SECURE_NO_WARNINGS'] +extra_compile_args = ['-D_CRT_SECURE_NO_WARNINGS', '-fpermissive'] extra_link_args = [] # Not all CPUs have march as a tuning parameter @@ -73,9 +73,10 @@ extra_link_args = manual_linker_args.split(',') setup(name='annoy', - version='1.17.0', + version='1.17.1', description='Approximate Nearest Neighbors in C++/Python optimized for memory usage and loading/saving to disk.', packages=['annoy'], + package_data={'annoy': ['__init__.pyi', 'py.typed']}, ext_modules=[ Extension( 'annoy.annoylib', ['src/annoymodule.cc'], @@ -98,6 +99,9 @@ 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', ], keywords='nns, approximate nearest neighbor search', setup_requires=['nose>=1.0'], diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.17.0/src/annoygomodule.h new/annoy-1.17.1/src/annoygomodule.h --- old/annoy-1.17.0/src/annoygomodule.h 2020-09-18 18:03:55.000000000 +0200 +++ new/annoy-1.17.1/src/annoygomodule.h 2022-08-08 11:31:20.000000000 +0200 @@ -1,6 +1,8 @@ #include "annoylib.h" #include "kissrandom.h" +using namespace Annoy; + namespace GoAnnoy { class AnnoyIndex { @@ -89,4 +91,12 @@ this->f = f; } }; + +class AnnoyIndexDotProduct : public AnnoyIndex { + public: + AnnoyIndexDotProduct(int f) { + ptr = new ::AnnoyIndex<int32_t, float, ::DotProduct, ::Kiss64Random, AnnoyIndexSingleThreadedBuildPolicy>(f); + this->f = f; + } +}; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.17.0/src/annoygomodule.i new/annoy-1.17.1/src/annoygomodule.i --- old/annoy-1.17.0/src/annoygomodule.i 2020-09-18 18:03:55.000000000 +0200 +++ new/annoy-1.17.1/src/annoygomodule.i 2022-08-08 11:31:20.000000000 +0200 @@ -1,5 +1,7 @@ %module annoyindex +namespace Annoy {} + %{ #include "annoygomodule.h" %} @@ -91,6 +93,4 @@ %feature("notabstract") GoAnnoyIndexAngular; %feature("notabstract") GoAnnoyIndexEuclidean; %feature("notabstract") GoAnnoyIndexManhattan; - - - +%feature("notabstract") GoAnnoyIndexDotProduct; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.17.0/src/annoylib.h new/annoy-1.17.1/src/annoylib.h --- old/annoy-1.17.0/src/annoylib.h 2020-09-18 18:03:55.000000000 +0200 +++ new/annoy-1.17.1/src/annoylib.h 2022-08-08 11:31:20.000000000 +0200 @@ -13,8 +13,8 @@ // the License. -#ifndef ANNOYLIB_H -#define ANNOYLIB_H +#ifndef ANNOY_ANNOYLIB_H +#define ANNOY_ANNOYLIB_H #include <stdio.h> #include <sys/stat.h> @@ -58,6 +58,10 @@ #include <queue> #include <limits> +#if __cplusplus >= 201103L +#include <type_traits> +#endif + #ifdef ANNOYLIB_MULTITHREADED_BUILD #include <thread> #include <mutex> @@ -72,9 +76,9 @@ // This allows others to supply their own logger / error printer without // requiring Annoy to import their headers. See RcppAnnoy for a use case. #ifndef __ERROR_PRINTER_OVERRIDE__ - #define showUpdate(...) { fprintf(stderr, __VA_ARGS__ ); } + #define annoylib_showUpdate(...) { fprintf(stderr, __VA_ARGS__ ); } #else - #define showUpdate(...) { __ERROR_PRINTER_OVERRIDE__( __VA_ARGS__ ); } + #define annoylib_showUpdate(...) { __ERROR_PRINTER_OVERRIDE__( __VA_ARGS__ ); } #endif // Portable alloc definition, cf Writing R Extensions, Section 1.6.4 @@ -87,40 +91,24 @@ # include <alloca.h> #endif -inline void set_error_from_errno(char **error, const char* msg) { - showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno); - if (error) { - *error = (char *)malloc(256); // TODO: win doesn't support snprintf - sprintf(*error, "%s: %s (%d)", msg, strerror(errno), errno); - } -} - -inline void set_error_from_string(char **error, const char* msg) { - showUpdate("%s\n", msg); - if (error) { - *error = (char *)malloc(strlen(msg) + 1); - strcpy(*error, msg); - } -} - // We let the v array in the Node struct take whatever space is needed, so this is a mostly insignificant number. // Compilers need *some* size defined for the v array, and some memory checking tools will flag for buffer overruns if this is set too low. -#define V_ARRAY_SIZE 65536 +#define ANNOYLIB_V_ARRAY_SIZE 65536 #ifndef _MSC_VER -#define popcount __builtin_popcountll +#define annoylib_popcount __builtin_popcountll #else // See #293, #358 -#define popcount cole_popcount +#define annoylib_popcount cole_popcount #endif #if !defined(NO_MANUAL_VECTORIZATION) && defined(__GNUC__) && (__GNUC__ >6) && defined(__AVX512F__) // See #402 -#define USE_AVX512 +#define ANNOYLIB_USE_AVX512 #elif !defined(NO_MANUAL_VECTORIZATION) && defined(__AVX__) && defined (__SSE__) && defined(__SSE2__) && defined(__SSE3__) -#define USE_AVX +#define ANNOYLIB_USE_AVX #else #endif -#if defined(USE_AVX) || defined(USE_AVX512) +#if defined(ANNOYLIB_USE_AVX) || defined(ANNOYLIB_USE_AVX512) #if defined(_MSC_VER) #include <intrin.h> #elif defined(__GNUC__) @@ -129,11 +117,30 @@ #endif #if !defined(__MINGW32__) -#define FTRUNCATE_SIZE(x) static_cast<int64_t>(x) +#define ANNOYLIB_FTRUNCATE_SIZE(x) static_cast<int64_t>(x) #else -#define FTRUNCATE_SIZE(x) (x) +#define ANNOYLIB_FTRUNCATE_SIZE(x) (x) #endif +namespace Annoy { + +inline void set_error_from_errno(char **error, const char* msg) { + annoylib_showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno); + if (error) { + *error = (char *)malloc(256); // TODO: win doesn't support snprintf + sprintf(*error, "%s: %s (%d)", msg, strerror(errno), errno); + } +} + +inline void set_error_from_string(char **error, const char* msg) { + annoylib_showUpdate("%s\n", msg); + if (error) { + *error = (char *)malloc(strlen(msg) + 1); + strcpy(*error, msg); + } +} + + using std::vector; using std::pair; using std::numeric_limits; @@ -145,7 +152,7 @@ bool ok = ftruncate(_fd, new_size) != -1; #else munmap(*_ptr, old_size); - bool ok = ftruncate(_fd, FTRUNCATE_SIZE(new_size)) != -1; + bool ok = ftruncate(_fd, ANNOYLIB_FTRUNCATE_SIZE(new_size)) != -1; #ifdef MAP_POPULATE *_ptr = mmap(*_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, _fd, 0); #else @@ -194,7 +201,7 @@ return d; } -#ifdef USE_AVX +#ifdef ANNOYLIB_USE_AVX // Horizontal single sum of 256bit vector. inline float hsum256_ps_avx(__m256 v) { const __m128 x128 = _mm_add_ps(_mm256_extractf128_ps(v, 1), _mm256_castps256_ps128(v)); @@ -277,7 +284,7 @@ #endif -#ifdef USE_AVX512 +#ifdef ANNOYLIB_USE_AVX512 template<> inline float dot<float>(const float* x, const float *y, int f) { float result = 0; @@ -452,7 +459,7 @@ S children[2]; // Will possibly store more than 2 T norm; }; - T v[V_ARRAY_SIZE]; + T v[ANNOYLIB_V_ARRAY_SIZE]; }; template<typename S, typename T> static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) { @@ -523,7 +530,7 @@ S n_descendants; S children[2]; // Will possibly store more than 2 T dot_factor; - T v[V_ARRAY_SIZE]; + T v[ANNOYLIB_V_ARRAY_SIZE]; }; static const char* name() { @@ -630,7 +637,7 @@ struct Node { S n_descendants; S children[2]; - T v[V_ARRAY_SIZE]; + T v[ANNOYLIB_V_ARRAY_SIZE]; }; static const size_t max_iterations = 20; @@ -659,7 +666,7 @@ static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) { size_t dist = 0; for (int i = 0; i < f; i++) { - dist += popcount(x->v[i] ^ y->v[i]); + dist += annoylib_popcount(x->v[i] ^ y->v[i]); } return dist; } @@ -727,7 +734,7 @@ S n_descendants; T a; // need an extra constant term to determine the offset of the plane S children[2]; - T v[V_ARRAY_SIZE]; + T v[ANNOYLIB_V_ARRAY_SIZE]; }; template<typename S, typename T> static inline T margin(const Node<S, T>* n, const T* y, int f) { @@ -815,7 +822,7 @@ } }; -template<typename S, typename T> +template<typename S, typename T, typename R = uint64_t> class AnnoyIndexInterface { public: // Note that the methods with an **error argument will allocate memory and write the pointer to that string if error is non-NULL @@ -833,12 +840,18 @@ virtual S get_n_trees() const = 0; virtual void verbose(bool v) = 0; virtual void get_item(S item, T* v) const = 0; - virtual void set_seed(int q) = 0; + virtual void set_seed(R q) = 0; virtual bool on_disk_build(const char* filename, char** error=NULL) = 0; }; template<typename S, typename T, typename Distance, typename Random, class ThreadedBuildPolicy> - class AnnoyIndex : public AnnoyIndexInterface<S, T> { + class AnnoyIndex : public AnnoyIndexInterface<S, T, +#if __cplusplus >= 201103L + typename std::remove_const<decltype(Random::default_seed)>::type +#else + typename Random::seed_type +#endif + > { /* * We use random projection to build a forest of binary trees of all items. * Basically just split the hyperspace into two sides by a hyperplane, @@ -849,6 +862,11 @@ public: typedef Distance D; typedef typename D::template Node<S, T> Node; +#if __cplusplus >= 201103L + typedef typename std::remove_const<decltype(Random::default_seed)>::type R; +#else + typedef typename Random::seed_type R; +#endif protected: const int _f; @@ -859,8 +877,7 @@ S _nodes_size; vector<S> _roots; S _K; - bool _is_seeded; - int _seed; + R _seed; bool _loaded; bool _verbose; int _fd; @@ -868,7 +885,7 @@ bool _built; public: - AnnoyIndex(int f) : _f(f) { + AnnoyIndex(int f) : _f(f), _seed(Random::default_seed) { _s = offsetof(Node, v) + _f * sizeof(T); // Size of each node _verbose = false; _built = false; @@ -922,7 +939,7 @@ return false; } _nodes_size = 1; - if (ftruncate(_fd, FTRUNCATE_SIZE(_s) * FTRUNCATE_SIZE(_nodes_size)) == -1) { + if (ftruncate(_fd, ANNOYLIB_FTRUNCATE_SIZE(_s) * ANNOYLIB_FTRUNCATE_SIZE(_nodes_size)) == -1) { set_error_from_errno(error, "Unable to truncate"); return false; } @@ -958,7 +975,7 @@ memcpy(_get(_n_nodes + (S)i), _get(_roots[i]), _s); _n_nodes += _roots.size(); - if (_verbose) showUpdate("has %d nodes\n", _n_nodes); + if (_verbose) annoylib_showUpdate("has %d nodes\n", _n_nodes); if (_on_disk) { if (!remap_memory_and_truncate(&_nodes, _fd, @@ -1027,7 +1044,7 @@ _n_nodes = 0; _nodes_size = 0; _on_disk = false; - _is_seeded = false; + _seed = Random::default_seed; _roots.clear(); } @@ -1046,7 +1063,7 @@ } } reinitialize(); - if (_verbose) showUpdate("unloaded\n"); + if (_verbose) annoylib_showUpdate("unloaded\n"); } bool load(const char* filename, bool prefault=false, char** error=NULL) { @@ -1074,7 +1091,7 @@ #ifdef MAP_POPULATE flags |= MAP_POPULATE; #else - showUpdate("prefault is set to true, but MAP_POPULATE is not defined on this platform"); + annoylib_showUpdate("prefault is set to true, but MAP_POPULATE is not defined on this platform"); #endif } _nodes = (Node*)mmap(0, size, PROT_READ, flags, _fd, 0); @@ -1098,7 +1115,7 @@ _loaded = true; _built = true; _n_items = m; - if (_verbose) showUpdate("found %lu roots with degree %d\n", _roots.size(), m); + if (_verbose) annoylib_showUpdate("found %lu roots with degree %d\n", _roots.size(), m); return true; } @@ -1134,16 +1151,13 @@ memcpy(v, m->v, (_f) * sizeof(T)); } - void set_seed(int seed) { - _is_seeded = true; + void set_seed(R seed) { _seed = seed; } void thread_build(int q, int thread_idx, ThreadedBuildPolicy& threaded_build_policy) { - Random _random; // Each thread needs its own seed, otherwise each thread would be building the same tree(s) - int seed = _is_seeded ? _seed + thread_idx : thread_idx; - _random.set_seed(seed); + Random _random(_seed + thread_idx); vector<S> thread_roots; while (1) { @@ -1160,7 +1174,7 @@ } } - if (_verbose) showUpdate("pass %zd...\n", thread_roots.size()); + if (_verbose) annoylib_showUpdate("pass %zd...\n", thread_roots.size()); vector<S> indices; threaded_build_policy.lock_shared_nodes(); @@ -1190,14 +1204,14 @@ static_cast<size_t>(_s) * static_cast<size_t>(_nodes_size), static_cast<size_t>(_s) * static_cast<size_t>(new_nodes_size)) && _verbose) - showUpdate("File truncation error\n"); + annoylib_showUpdate("File truncation error\n"); } else { _nodes = realloc(_nodes, _s * new_nodes_size); memset((char *) _nodes + (_nodes_size * _s) / sizeof(char), 0, (new_nodes_size - _nodes_size) * _s); } _nodes_size = new_nodes_size; - if (_verbose) showUpdate("Reallocating to %d nodes: old_address=%p, new_address=%p\n", new_nodes_size, old, _nodes); + if (_verbose) annoylib_showUpdate("Reallocating to %d nodes: old_address=%p, new_address=%p\n", new_nodes_size, old, _nodes); } void _allocate_size(S n, ThreadedBuildPolicy& threaded_build_policy) { @@ -1279,7 +1293,7 @@ bool side = D::side(m, n->v, _f, _random); children_indices[side].push_back(j); } else { - showUpdate("No node for index %d?\n", j); + annoylib_showUpdate("No node for index %d?\n", j); } } @@ -1291,7 +1305,7 @@ // If we didn't find a hyperplane, just randomize sides as a last option while (_split_imbalance(children_indices[0], children_indices[1]) > 0.99) { if (_verbose) - showUpdate("\tNo hyperplane found (left has %ld children, right has %ld children)\n", + annoylib_showUpdate("\tNo hyperplane found (left has %ld children, right has %ld children)\n", children_indices[0].size(), children_indices[1].size()); children_indices[0].clear(); @@ -1475,5 +1489,7 @@ }; #endif +} + #endif // vim: tabstop=2 shiftwidth=2 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.17.0/src/annoyluamodule.cc new/annoy-1.17.1/src/annoyluamodule.cc --- old/annoy-1.17.0/src/annoyluamodule.cc 2020-09-18 18:03:55.000000000 +0200 +++ new/annoy-1.17.1/src/annoyluamodule.cc 2022-08-08 11:31:20.000000000 +0200 @@ -28,6 +28,8 @@ #define compat_rawlen lua_rawlen #endif +using namespace Annoy; + template<typename Distance> class LuaAnnoy { public: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.17.0/src/annoymodule.cc new/annoy-1.17.1/src/annoymodule.cc --- old/annoy-1.17.0/src/annoymodule.cc 2020-09-18 18:03:55.000000000 +0200 +++ new/annoy-1.17.1/src/annoymodule.cc 2022-08-08 11:31:20.000000000 +0200 @@ -24,9 +24,9 @@ #endif -#if defined(USE_AVX512) +#if defined(ANNOYLIB_USE_AVX512) #define AVX_INFO "Using 512-bit AVX instructions" -#elif defined(USE_AVX128) +#elif defined(ANNOYLIB_USE_AVX128) #define AVX_INFO "Using 128-bit AVX instructions" #else #define AVX_INFO "Not using AVX instructions" @@ -54,13 +54,15 @@ #define PyInt_FromLong PyLong_FromLong #endif +using namespace Annoy; + #ifdef ANNOYLIB_MULTITHREADED_BUILD typedef AnnoyIndexMultiThreadedBuildPolicy AnnoyIndexThreadedBuildPolicy; #else typedef AnnoyIndexSingleThreadedBuildPolicy AnnoyIndexThreadedBuildPolicy; #endif -template class AnnoyIndexInterface<int32_t, float>; +template class Annoy::AnnoyIndexInterface<int32_t, float>; class HammingWrapper : public AnnoyIndexInterface<int32_t, float> { // Wrapper class for Hamming distance, using composition. @@ -123,7 +125,7 @@ _index.get_item(item, &v_internal[0]); _unpack(&v_internal[0], v); }; - void set_seed(int q) { _index.set_seed(q); }; + void set_seed(uint64_t q) { _index.set_seed(q); }; bool on_disk_build(const char* filename, char** error) { return _index.on_disk_build(filename, error); }; }; @@ -236,21 +238,46 @@ PyObject* get_nns_to_python(const vector<int32_t>& result, const vector<float>& distances, int include_distances) { - PyObject* l = PyList_New(result.size()); - for (size_t i = 0; i < result.size(); i++) - PyList_SetItem(l, i, PyInt_FromLong(result[i])); + PyObject* l = NULL; + PyObject* d = NULL; + PyObject* t = NULL; + + if ((l = PyList_New(result.size())) == NULL) { + goto error; + } + for (size_t i = 0; i < result.size(); i++) { + PyObject* res = PyInt_FromLong(result[i]); + if (res == NULL) { + goto error; + } + PyList_SetItem(l, i, res); + } if (!include_distances) return l; - PyObject* d = PyList_New(distances.size()); - for (size_t i = 0; i < distances.size(); i++) - PyList_SetItem(d, i, PyFloat_FromDouble(distances[i])); - - PyObject* t = PyTuple_New(2); - PyTuple_SetItem(t, 0, l); - PyTuple_SetItem(t, 1, d); + if ((d = PyList_New(distances.size())) == NULL) { + goto error; + } + + for (size_t i = 0; i < distances.size(); i++) { + PyObject* dist = PyFloat_FromDouble(distances[i]); + if (dist == NULL) { + goto error; + } + PyList_SetItem(d, i, dist); + } + + if ((t = PyTuple_Pack(2, l, d)) == NULL) { + goto error; + } return t; + + error: + Py_XDECREF(l); + Py_XDECREF(d); + Py_XDECREF(t); + return NULL; } @@ -293,24 +320,31 @@ bool convert_list_to_vector(PyObject* v, int f, vector<float>* w) { - if (PyObject_Size(v) == -1) { - char buf[256]; - snprintf(buf, 256, "Expected an iterable, got an object of type \"%s\"", v->ob_type->tp_name); - PyErr_SetString(PyExc_ValueError, buf); + Py_ssize_t length = PyObject_Size(v); + if (length == -1) { return false; } - if (PyObject_Size(v) != f) { - char buf[128]; - snprintf(buf, 128, "Vector has wrong length (expected %d, got %ld)", f, PyObject_Size(v)); - PyErr_SetString(PyExc_IndexError, buf); + if (length != f) { + PyErr_Format(PyExc_IndexError, "Vector has wrong length (expected %d, got %ld)", f, length); return false; } + for (int z = 0; z < f; z++) { PyObject *key = PyInt_FromLong(z); + if (key == NULL) { + return false; + } PyObject *pf = PyObject_GetItem(v, key); - (*w)[z] = PyFloat_AsDouble(pf); Py_DECREF(key); + if (pf == NULL) { + return false; + } + double value = PyFloat_AsDouble(pf); Py_DECREF(pf); + if (value == -1.0 && PyErr_Occurred()) { + return false; + } + (*w)[z] = value; } return true; } @@ -357,11 +391,22 @@ vector<float> v(self->f); self->ptr->get_item(item, &v[0]); PyObject* l = PyList_New(self->f); + if (l == NULL) { + return NULL; + } for (int z = 0; z < self->f; z++) { - PyList_SetItem(l, z, PyFloat_FromDouble(v[z])); + PyObject* dist = PyFloat_FromDouble(v[z]); + if (dist == NULL) { + goto error; + } + PyList_SetItem(l, z, dist); } return l; + + error: + Py_XDECREF(l); + return NULL; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.17.0/src/kissrandom.h new/annoy-1.17.1/src/kissrandom.h --- old/annoy-1.17.0/src/kissrandom.h 2020-09-18 18:03:55.000000000 +0200 +++ new/annoy-1.17.1/src/kissrandom.h 2022-08-08 11:31:20.000000000 +0200 @@ -1,5 +1,5 @@ -#ifndef KISSRANDOM_H -#define KISSRANDOM_H +#ifndef ANNOY_KISSRANDOM_H +#define ANNOY_KISSRANDOM_H #if defined(_MSC_VER) && _MSC_VER == 1500 typedef unsigned __int32 uint32_t; @@ -8,6 +8,8 @@ #include <stdint.h> #endif +namespace Annoy { + // KISS = "keep it simple, stupid", but high quality random number generator // http://www0.cs.ucl.ac.uk/staff/d.jones/GoodPracticeRNG.pdf -> "Use a good RNG and build it into your code" // http://mathforum.org/kb/message.jspa?messageID=6627731 @@ -20,8 +22,13 @@ uint32_t z; uint32_t c; + static const uint32_t default_seed = 123456789; +#if __cplusplus < 201103L + typedef uint32_t seed_type; +#endif + // seed must be != 0 - Kiss32Random(uint32_t seed = 123456789) { + Kiss32Random(uint32_t seed = default_seed) { x = seed; y = 362436000; z = 521288629; @@ -64,8 +71,13 @@ uint64_t z; uint64_t c; + static const uint64_t default_seed = 1234567890987654321ULL; +#if __cplusplus < 201103L + typedef uint64_t seed_type; +#endif + // seed must be != 0 - Kiss64Random(uint64_t seed = 1234567890987654321ULL) { + Kiss64Random(uint64_t seed = default_seed) { x = seed; y = 362436362436362436ULL; z = 1066149217761810ULL; @@ -97,10 +109,12 @@ // Draw random integer between 0 and n-1 where n is at most the number of data points you have return kiss() % n; } - inline void set_seed(uint32_t seed) { + inline void set_seed(uint64_t seed) { x = seed; } }; +} + #endif // vim: tabstop=2 shiftwidth=2 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.17.0/test/angular_index_test.py new/annoy-1.17.1/test/angular_index_test.py --- old/annoy-1.17.0/test/angular_index_test.py 2020-09-18 18:03:55.000000000 +0200 +++ new/annoy-1.17.1/test/angular_index_test.py 2022-08-08 11:31:20.000000000 +0200 @@ -205,8 +205,8 @@ idx.save('foo.idx') idx = AnnoyIndex(100, 'angular') idx.load('foo.idx') - self.assertEquals(idx.get_n_items(), 1) - self.assertEquals(idx.get_nns_by_vector(vector=numpy.random.randn(100), n=50, include_distances=False), [0]) + self.assertEqual(idx.get_n_items(), 1) + self.assertEqual(idx.get_nns_by_vector(vector=numpy.random.randn(100), n=50, include_distances=False), [0]) def test_no_items(self): idx = AnnoyIndex(100, 'angular') @@ -214,8 +214,8 @@ idx.save('foo.idx') idx = AnnoyIndex(100, 'angular') idx.load('foo.idx') - self.assertEquals(idx.get_n_items(), 0) - self.assertEquals(idx.get_nns_by_vector(vector=numpy.random.randn(100), n=50, include_distances=False), []) + self.assertEqual(idx.get_n_items(), 0) + self.assertEqual(idx.get_nns_by_vector(vector=numpy.random.randn(100), n=50, include_distances=False), []) def test_single_vector(self): # https://github.com/spotify/annoy/issues/194 @@ -224,6 +224,6 @@ a.build(10) a.save('1.ann') indices, dists = a.get_nns_by_vector([1, 0, 0], 3, include_distances=True) - self.assertEquals(indices, [0]) + self.assertEqual(indices, [0]) self.assertAlmostEqual(dists[0] ** 2, 0.0) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.17.0/test/annoy_test.go new/annoy-1.17.1/test/annoy_test.go --- old/annoy-1.17.0/test/annoy_test.go 2020-09-18 18:03:55.000000000 +0200 +++ new/annoy-1.17.1/test/annoy_test.go 2022-08-08 11:31:20.000000000 +0200 @@ -107,17 +107,20 @@ func (suite *AnnoyTestSuite) TestOnDiskBuild() { index := annoyindex.NewAnnoyIndexAngular(3) index.OnDiskBuild("go_test.ann"); - + info, err := os.Stat("go_test.ann") if err != nil { assert.Fail(suite.T(), "Failed to create file, file not found") } - + if info.Size() == 0 { + assert.Fail(suite.T(), "Failed to create file, file size zero") + } + index.AddItem(0, []float32{0, 0, 1}) index.AddItem(1, []float32{0, 1, 0}) index.AddItem(2, []float32{1, 0, 0}) index.Build(10) - + index.Unload(); index.Load("go_test.ann"); @@ -132,7 +135,7 @@ assert.Equal(suite.T(), []int{2, 0, 1}, result) annoyindex.DeleteAnnoyIndexAngular(index) - + os.Remove("go_test.ann") } @@ -206,6 +209,18 @@ annoyindex.DeleteAnnoyIndexAngular(index) } +func (suite *AnnoyTestSuite) TestGetDotProductDistance() { + index := annoyindex.NewAnnoyIndexDotProduct(2) + index.AddItem(0, []float32{0, 1}) + index.AddItem(1, []float32{1, 1}) + index.Build(10) + + assert.True(suite.T(), + math.Abs(1.0-float64(index.GetDistance(0, 1))) < 0.00001) + + annoyindex.DeleteAnnoyIndexDotProduct(index) +} + func (suite *AnnoyTestSuite) TestLargeEuclideanIndex() { index := annoyindex.NewAnnoyIndexEuclidean(10) @@ -241,5 +256,3 @@ func TestAnnoyTestSuite(t *testing.T) { suite.Run(t, new(AnnoyTestSuite)) } - - diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.17.0/test/hamming_index_test.py new/annoy-1.17.1/test/hamming_index_test.py --- old/annoy-1.17.0/test/hamming_index_test.py 2020-09-18 18:03:55.000000000 +0200 +++ new/annoy-1.17.1/test/hamming_index_test.py 2022-08-08 11:31:20.000000000 +0200 @@ -44,10 +44,10 @@ i.add_item(0, u) i.add_item(1, v) i.build(10) - self.assertEquals(i.get_nns_by_item(0, 99), [0, 1]) - self.assertEquals(i.get_nns_by_item(1, 99), [1, 0]) + self.assertEqual(i.get_nns_by_item(0, 99), [0, 1]) + self.assertEqual(i.get_nns_by_item(1, 99), [1, 0]) rs, ds = i.get_nns_by_item(0, 99, include_distances=True) - self.assertEquals(rs, [0, 1]) + self.assertEqual(rs, [0, 1]) self.assertAlmostEqual(ds[0], 0) self.assertAlmostEqual(ds[1], numpy.dot(u-v, u-v)) @@ -63,7 +63,7 @@ j = AnnoyIndex(f, 'hamming') j.load('blah.ann') rs, ds = j.get_nns_by_item(0, 99, include_distances=True) - self.assertEquals(rs, [0, 1]) + self.assertEqual(rs, [0, 1]) self.assertAlmostEqual(ds[0], 0) self.assertAlmostEqual(ds[1], numpy.dot(u-v, u-v)) @@ -114,5 +114,5 @@ idx = AnnoyIndex(f, 'hamming') idx.load('idx.ann') js, ds = idx.get_nns_by_item(0, 5, include_distances=True) - self.assertEquals(js[0], 0) - self.assertEquals(ds[:4], [0, 1, 1, 22]) + self.assertEqual(js[0], 0) + self.assertEqual(ds[:4], [0, 1, 1, 22]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.17.0/test/holes_test.py new/annoy-1.17.1/test/holes_test.py --- old/annoy-1.17.0/test/holes_test.py 2020-09-18 18:03:55.000000000 +0200 +++ new/annoy-1.17.1/test/holes_test.py 2022-08-08 11:31:20.000000000 +0200 @@ -43,7 +43,7 @@ annoy.add_item(base_i + i, numpy.random.normal(size=(f,))) annoy.build(100) res = annoy.get_nns_by_item(base_i, n) - self.assertEquals(set(res), set([base_i + i for i in range(n)])) + self.assertEqual(set(res), set([base_i + i for i in range(n)])) def test_root_one_child(self): # See https://github.com/spotify/annoy/issues/223 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.17.0/test/index_test.py new/annoy-1.17.1/test/index_test.py --- old/annoy-1.17.0/test/index_test.py 2020-09-18 18:03:55.000000000 +0200 +++ new/annoy-1.17.1/test/index_test.py 2022-08-08 11:31:20.000000000 +0200 @@ -244,4 +244,4 @@ self.assertLess(os.path.getsize(path), dangerous_size + 100e3) # Sanity check number of trees - self.assertEquals(m.get_n_trees(), n_trees) + self.assertEqual(m.get_n_trees(), n_trees) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.17.0/test/seed_test.py new/annoy-1.17.1/test/seed_test.py --- old/annoy-1.17.0/test/seed_test.py 2020-09-18 18:03:55.000000000 +0200 +++ new/annoy-1.17.1/test/seed_test.py 2022-08-08 11:31:20.000000000 +0200 @@ -34,6 +34,6 @@ indexes.append(index) for k in range(Y.shape[0]): - self.assertEquals(indexes[0].get_nns_by_vector(Y[k], 100), - indexes[1].get_nns_by_vector(Y[k], 100)) + self.assertEqual(indexes[0].get_nns_by_vector(Y[k], 100), + indexes[1].get_nns_by_vector(Y[k], 100)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.17.0/test/types_test.py new/annoy-1.17.1/test/types_test.py --- old/annoy-1.17.0/test/types_test.py 2020-09-18 18:03:55.000000000 +0200 +++ new/annoy-1.17.1/test/types_test.py 2022-08-08 11:31:20.000000000 +0200 @@ -13,6 +13,7 @@ # the License. import numpy +import sys import random from common import TestCase from annoy import AnnoyIndex @@ -56,3 +57,63 @@ self.assertRaises(IndexError, i.get_distance, 0, bad_index) self.assertRaises(IndexError, i.get_nns_by_item, bad_index, 1) self.assertRaises(IndexError, i.get_item_vector, bad_index) + + def test_missing_len(self): + """ + We should get a helpful error message if our vector doesn't have a + __len__ method. + """ + class FakeCollection: + pass + + i = AnnoyIndex(10, 'euclidean') + # Python 2.7 raises an AttributeError instead of a TypeError like newer versions of Python. + if sys.version_info.major == 2: + with self.assertRaises(AttributeError, msg="FakeCollection instance has no attribute '__len__'"): + i.add_item(1, FakeCollection()) + else: + with self.assertRaises(TypeError, msg="object of type 'FakeCollection' has no len()"): + i.add_item(1, FakeCollection()) + + def test_missing_getitem(self): + """ + We should get a helpful error message if our vector doesn't have a + __getitem__ method. + """ + class FakeCollection: + def __len__(self): + return 5 + + i = AnnoyIndex(5, 'euclidean') + # Python 2.7 raises an AttributeError instead of a TypeError like newer versions of Python. + if sys.version_info.major == 2: + with self.assertRaises(AttributeError, msg="FakeCollection instance has no attribute '__getitem__'"): + i.add_item(1, FakeCollection()) + else: + with self.assertRaises(TypeError, msg="'FakeCollection' object is not subscriptable"): + i.add_item(1, FakeCollection()) + + def test_short(self): + """ + Ensure we handle our vector not being long enough. + """ + class FakeCollection: + def __len__(self): + return 3 + + def __getitem__(self, i): + raise IndexError + + i = AnnoyIndex(3, 'euclidean') + with self.assertRaises(IndexError): + i.add_item(1, FakeCollection()) + + def test_non_float(self): + """ + We should error gracefully if non-floats are provided in our vector. + """ + array_strings = ["1", "2", "3"] + + i = AnnoyIndex(3, 'euclidean') + with self.assertRaises(TypeError, msg="must be real number, not str"): + i.add_item(1, array_strings)