Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package python-annoy for openSUSE:Factory 
checked in at 2021-01-20 18:29:26
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-annoy (Old)
 and      /work/SRC/openSUSE:Factory/.python-annoy.new.28504 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "python-annoy"

Wed Jan 20 18:29:26 2021 rev:10 rq:865041 version:1.17.0

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-annoy/python-annoy.changes        
2020-03-03 10:19:39.807075310 +0100
+++ /work/SRC/openSUSE:Factory/.python-annoy.new.28504/python-annoy.changes     
2021-01-20 18:29:36.527638055 +0100
@@ -1,0 +2,12 @@
+Wed Jan 20 15:12:43 UTC 2021 - Mark??ta Machov?? <mmach...@suse.com>
+
+- Update to 1.17.0
+  * multithreaded building + misc more
+- Add denose.patch to get rid of nose
+
+-------------------------------------------------------------------
+Wed Jan 20 15:09:53 UTC 2021 - John Vandenberg <jay...@gmail.com>
+
+- Skip one test flaky on Python 3.6
+
+-------------------------------------------------------------------

Old:
----
  v1.16.3.tar.gz

New:
----
  denose.patch
  v1.17.0.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-annoy.spec ++++++
--- /var/tmp/diff_new_pack.OuVL8C/_old  2021-01-20 18:29:37.175638676 +0100
+++ /var/tmp/diff_new_pack.OuVL8C/_new  2021-01-20 18:29:37.179638680 +0100
@@ -1,7 +1,7 @@
 #
 # spec file for package python-annoy
 #
-# Copyright (c) 2020 SUSE LLC
+# Copyright (c) 2021 SUSE LLC
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -18,7 +18,7 @@
 
 %{?!python_module:%define python_module() python-%{**} python3-%{**}}
 Name:           python-annoy
-Version:        1.16.3
+Version:        1.17.0
 Release:        0
 Summary:        Approximation of Nearest Neighbors
 License:        Apache-2.0
@@ -27,9 +27,10 @@
 Source:         https://github.com/spotify/annoy/archive/v%{version}.tar.gz
 # PATCH-FIX-OPENSUSE boo#1100677
 Patch0:         reproducible.patch
+Patch1:         denose.patch
+BuildRequires:  %{python_module cached-property}
 BuildRequires:  %{python_module devel}
 BuildRequires:  %{python_module h5py}
-BuildRequires:  %{python_module nose >= 1.0}
 BuildRequires:  %{python_module numpy}
 BuildRequires:  %{python_module pytest}
 BuildRequires:  %{python_module setuptools}
@@ -48,6 +49,7 @@
 %prep
 %setup -q -n annoy-%{version}
 %patch0 -p1
+%patch1 -p1
 # fix testdata location
 sed -i -e "s:'test/test:'test:g" test/index_test.py
 
@@ -63,8 +65,9 @@
 # online tests: test_fashion_mnist, test_glove_25, test_nytimes_16
 # fails on 32bit: test_distance_consistency
 # fails on 32bit: test_very_large_index
+# flakey on Python 3.6: AngularIndexTest.test_include_dists
 cd test
-%pytest_arch -k 'not (test_fashion_mnist or test_glove_25 or test_nytimes_16 
or test_distance_consistency or test_very_large_index)'
+%pytest_arch -k 'not (test_fashion_mnist or test_glove_25 or test_nytimes_16 
or test_distance_consistency or test_very_large_index or (AngularIndexTest and 
test_include_dists))'
 
 %files %{python_files}
 %doc README.rst

++++++ denose.patch ++++++
Index: annoy-1.17.0/test/accuracy_test.py
===================================================================
--- annoy-1.17.0.orig/test/accuracy_test.py
+++ annoy-1.17.0/test/accuracy_test.py
@@ -24,7 +24,6 @@ try:
 except ImportError:
     from urllib.request import urlretrieve # Python 3
 import gzip
-from nose.plugins.attrib import attr
 
 class AccuracyTest(unittest.TestCase):
     def _get_index(self, dataset):
Index: annoy-1.17.0/test/hamming_index_test.py
===================================================================
--- annoy-1.17.0.orig/test/hamming_index_test.py
+++ annoy-1.17.0/test/hamming_index_test.py
@@ -12,11 +12,11 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
+import unittest
 import numpy
 import random
 from common import TestCase
 from annoy import AnnoyIndex
-from nose.plugins.skip import SkipTest
 
 
 class HammingIndexTest(TestCase):
@@ -85,7 +85,7 @@ class HammingIndexTest(TestCase):
         avg_dist = 1.0 * sum(dists) / len(dists)
         self.assertLessEqual(avg_dist, 0.42)
 
-    @SkipTest  # will fix later
+    @unittest.skip('will fix later')
     def test_zero_vectors(self):
         # Mentioned on the annoy-user list
         bitstrings = [
Index: annoy-1.17.0/setup.py
===================================================================
--- annoy-1.17.0.orig/setup.py
+++ annoy-1.17.0/setup.py
@@ -84,6 +84,5 @@ setup(name='annoy',
           'Programming Language :: Python :: 3.6',
       ],
       keywords='nns, approximate nearest neighbor search',
-      setup_requires=['nose>=1.0'],
       tests_require=['numpy', 'h5py']
       )
++++++ reproducible.patch ++++++
--- /var/tmp/diff_new_pack.OuVL8C/_old  2021-01-20 18:29:37.211638710 +0100
+++ /var/tmp/diff_new_pack.OuVL8C/_new  2021-01-20 18:29:37.215638714 +0100
@@ -3,11 +3,11 @@
 
 https://bugzilla.opensuse.org/show_bug.cgi?id=1100677
 
-Index: annoy-1.16.3/setup.py
+Index: annoy-1.17.0/setup.py
 ===================================================================
---- annoy-1.16.3.orig/setup.py
-+++ annoy-1.16.3/setup.py
-@@ -38,22 +38,6 @@ with codecs.open('README.rst', encoding=
+--- annoy-1.17.0.orig/setup.py
++++ annoy-1.17.0/setup.py
+@@ -39,17 +39,6 @@ with codecs.open('README.rst', encoding=
  extra_compile_args = ['-D_CRT_SECURE_NO_WARNINGS']
  extra_link_args = []
  
@@ -22,10 +22,17 @@
 -if os.name != 'nt':
 -    extra_compile_args += ['-O3', '-ffast-math', '-fno-associative-math']
 -
+ # Add multithreaded build flag for all platforms using Python 3 and
+ # for non-Windows Python 2 platforms
+ python_major_version = sys.version_info[0]
+@@ -59,11 +48,6 @@ if python_major_version == 3 or (python_
+     if os.name != 'nt':
+         extra_compile_args += ['-std=c++14']
+ 
 -# #349: something with OS X Mojave causes libstd not to be found
 -if platform.system() == 'Darwin':
--    extra_compile_args += ['-std=c++11', '-mmacosx-version-min=10.9']
--    extra_link_args += ['-stdlib=libc++', '-mmacosx-version-min=10.9']
+-    extra_compile_args += ['-mmacosx-version-min=10.12']
+-    extra_link_args += ['-stdlib=libc++', '-mmacosx-version-min=10.12']
 -
  # Manual configuration, you're on your own here.
  manual_compiler_args = os.environ.get('ANNOY_COMPILER_ARGS', None)

++++++ v1.16.3.tar.gz -> v1.17.0.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.3/README.rst new/annoy-1.17.0/README.rst
--- old/annoy-1.16.3/README.rst 2019-12-26 22:11:45.000000000 +0100
+++ new/annoy-1.17.0/README.rst 2020-09-18 18:03:55.000000000 +0200
@@ -78,7 +78,7 @@
 
 * ``AnnoyIndex(f, metric)`` returns a new index that's read-write and stores 
vector of ``f`` dimensions. Metric can be ``"angular"``, ``"euclidean"``, 
``"manhattan"``, ``"hamming"``, or ``"dot"``.
 * ``a.add_item(i, v)`` adds item ``i`` (any nonnegative integer) with vector 
``v``. Note that it will allocate memory for ``max(i)+1`` items.
-* ``a.build(n_trees)`` builds a forest of ``n_trees`` trees. More trees gives 
higher precision when querying. After calling ``build``, no more items can be 
added.
+* ``a.build(n_trees, n_jobs=-1)`` builds a forest of ``n_trees`` trees. More 
trees gives higher precision when querying. After calling ``build``, no more 
items can be added. ``n_jobs`` specifies the number of threads used to build 
the trees. ``n_jobs=-1`` uses all available CPU cores.
 * ``a.save(fn, prefault=False)`` saves the index to disk and loads it (see 
next function). After saving, no more items can be added.
 * ``a.load(fn, prefault=False)`` loads (mmaps) an index from disk. If 
`prefault` is set to `True`, it will pre-read the entire file into memory 
(using mmap with `MAP_POPULATE`). Default is `False`.
 * ``a.unload()`` unloads.
@@ -89,6 +89,7 @@
 * ``a.get_n_items()`` returns the number of items in the index.
 * ``a.get_n_trees()`` returns the number of trees in the index.
 * ``a.on_disk_build(fn)`` prepares annoy to build the index in the specified 
file instead of RAM (execute before adding items, no need to save after build)
+* ``a.set_seed(seed)`` will initialize the random number generator with the 
given seed.  Only used for building up the tree, i. e. only necessary to pass 
this before adding the items.  Will have no effect after calling 
`a.build(n_trees)` or `a.load(fn)`.
 
 Notes:
 
@@ -106,7 +107,7 @@
 * ``n_trees`` is provided during build time and affects the build time and the 
index size. A larger value will give more accurate results, but larger indexes.
 * ``search_k`` is provided in runtime and affects the search performance. A 
larger value will give more accurate results, but will take longer time to 
return.
 
-If ``search_k`` is not provided, it will default to ``n * n_trees * D`` where 
``n`` is the number of approximate nearest neighbors and ``D`` is a constant 
depending on the metric. Otherwise, ``search_k`` and ``n_trees`` are roughly 
independent, i.e. a the value of ``n_trees`` will not affect search time if 
``search_k`` is held constant and vice versa. Basically it's recommended to set 
``n_trees`` as large as possible given the amount of memory you can afford, and 
it's recommended to set ``search_k`` as large as possible given the time 
constraints you have for the queries.
+If ``search_k`` is not provided, it will default to ``n * n_trees`` where 
``n`` is the number of approximate nearest neighbors. Otherwise, ``search_k`` 
and ``n_trees`` are roughly independent, i.e. the value of ``n_trees`` will not 
affect search time if ``search_k`` is held constant and vice versa. Basically 
it's recommended to set ``n_trees`` as large as possible given the amount of 
memory you can afford, and it's recommended to set ``search_k`` as large as 
possible given the time constraints you have for the queries.
 
 You can also accept slower search times in favour of reduced loading times, 
memory usage, and disk IO. On supported platforms the index is prefaulted 
during ``load`` and ``save``, causing the file to be pre-emptively read from 
disk into memory. If you set ``prefault`` to ``False``, pages of the mmapped 
index are instead read from disk and cached in memory on-demand, as necessary 
for a search to complete. This can significantly increase early search times 
but may be better suited for systems with low memory compared to index size, 
when few queries are executed against a loaded index, and/or when large areas 
of the index are unlikely to be relevant to search queries.
 
@@ -130,12 +131,13 @@
 * `Dirk Eddelbuettel <https://github.com/eddelbuettel>`__ provides an `R 
version of Annoy <http://dirk.eddelbuettel.com/code/rcpp.annoy.html>`__.
 * `Andy Sloane <https://github.com/a1k0n>`__ provides a `Java version of Annoy 
<https://github.com/spotify/annoy-java>`__ although currently limited to cosine 
and read-only.
 * `Pishen Tsai <https://github.com/pishen>`__ provides a `Scala wrapper of 
Annoy <https://github.com/pishen/annoy4s>`__ which uses JNA to call the C++ 
library of Annoy.
+* `Atsushi Tatsuma <https://github.com/yoshoku>`__ provides `Ruby bindings for 
Annoy <https://github.com/yoshoku/annoy.rb>`__.
 * There is `experimental support for Go 
<https://github.com/spotify/annoy/blob/master/README_GO.rst>`__ provided by 
`Taneli Lepp?? <https://github.com/rosmo>`__.
 * `Boris Nagaev <https://github.com/starius>`__ wrote `Lua bindings 
<https://github.com/spotify/annoy/blob/master/README_Lua.md>`__.
 * During part of Spotify Hack Week 2016 (and a bit afterward), `Jim Kang 
<https://github.com/jimkang>`__ wrote `Node bindings 
<https://github.com/jimkang/annoy-node>`__ for Annoy.
 * `Min-Seok Kim <https://github.com/mskimm>`__ built a `Scala version 
<https://github.com/mskimm/ann4s>`__ of Annoy.
 * `Presentation from New York Machine Learning meetup 
<http://www.slideshare.net/erikbern/approximate-nearest-neighbor-methods-and-vector-models-nyc-ml-meetup>`__
 about Annoy
-* Radim ??eh????ek's blog posts comparing Annoy to a couple of other similar 
Python libraries: `Intro 
<http://radimrehurek.com/2013/11/performance-shootout-of-nearest-neighbours-intro/>`__,
 `Contestants 
<http://radimrehurek.com/2013/12/performance-shootout-of-nearest-neighbours-contestants/>`__,
 `Querying 
<http://radimrehurek.com/2014/01/performance-shootout-of-nearest-neighbours-querying/>`__
+* Annoy is available as a `conda package 
<https://anaconda.org/conda-forge/python-annoy>`__ on Linux, OS X, and Windows.
 * `ann-benchmarks <https://github.com/erikbern/ann-benchmarks>`__ is a 
benchmark for several approximate nearest neighbor libraries. Annoy seems to be 
fairly competitive, especially at higher precisions:
 
 .. figure:: 
https://github.com/erikbern/ann-benchmarks/raw/master/results/glove-100-angular.png
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.3/README_GO.rst 
new/annoy-1.17.0/README_GO.rst
--- old/annoy-1.16.3/README_GO.rst      2019-12-26 22:11:45.000000000 +0100
+++ new/annoy-1.17.0/README_GO.rst      2020-09-18 18:03:55.000000000 +0200
@@ -58,7 +58,7 @@
 Full Go API
 ---------------
 
-See annoygomodule.h. Generally the same as Python API except some arguments 
are not optional. 
+See annoygomodule.h. Generally the same as Python API except some arguments 
are not optional. Go binding does not support multithreaded build.
 
 Tests
 -------
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.3/README_Lua.md 
new/annoy-1.17.0/README_Lua.md
--- old/annoy-1.16.3/README_Lua.md      2019-12-26 22:11:45.000000000 +0100
+++ new/annoy-1.17.0/README_Lua.md      2020-09-18 18:03:55.000000000 +0200
@@ -64,7 +64,7 @@
 Full Lua API
 ------------
 
-Lua API closely resembles Python API, see main README.
+Lua API closely resembles Python API, see main README. Lua binding does not 
support multithreaded build.
 
 
 Tests
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.3/examples/precision_test.cpp 
new/annoy-1.17.0/examples/precision_test.cpp
--- old/annoy-1.16.3/examples/precision_test.cpp        2019-12-26 
22:11:45.000000000 +0100
+++ new/annoy-1.17.0/examples/precision_test.cpp        2020-09-18 
18:03:55.000000000 +0200
@@ -25,7 +25,7 @@
 
        //******************************************************
        //Building the tree
-       AnnoyIndex<int, double, Angular, Kiss32Random> t = AnnoyIndex<int, 
double, Angular, Kiss32Random>(f);
+       AnnoyIndex<int, double, Angular, Kiss32Random, 
AnnoyIndexMultiThreadedBuildPolicy> t = AnnoyIndex<int, double, Angular, 
Kiss32Random, AnnoyIndexMultiThreadedBuildPolicy>(f);
 
        std::cout << "Building index ... be patient !!" << std::endl;
        std::cout << "\"Trees that are slow to grow bear the best fruit\" 
(Moliere)" << std::endl;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.3/examples/s_compile_cpp.sh 
new/annoy-1.17.0/examples/s_compile_cpp.sh
--- old/annoy-1.16.3/examples/s_compile_cpp.sh  2019-12-26 22:11:45.000000000 
+0100
+++ new/annoy-1.17.0/examples/s_compile_cpp.sh  2020-09-18 18:03:55.000000000 
+0200
@@ -2,6 +2,6 @@
 
 
 echo "compiling precision example..."
-cmd="g++ precision_test.cpp -o precision_test -std=c++11"
+cmd="g++ precision_test.cpp -DANNOYLIB_MULTITHREADED_BUILD -o precision_test 
-std=c++14 -pthread"
 eval $cmd
 echo "Done"
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.3/setup.py new/annoy-1.17.0/setup.py
--- old/annoy-1.16.3/setup.py   2019-12-26 22:11:45.000000000 +0100
+++ new/annoy-1.17.0/setup.py   2020-09-18 18:03:55.000000000 +0200
@@ -19,6 +19,7 @@
 import codecs
 import os
 import platform
+import sys
 
 readme_note = """\
 .. note::
@@ -49,10 +50,19 @@
 if os.name != 'nt':
     extra_compile_args += ['-O3', '-ffast-math', '-fno-associative-math']
 
+# Add multithreaded build flag for all platforms using Python 3 and
+# for non-Windows Python 2 platforms
+python_major_version = sys.version_info[0]
+if python_major_version == 3 or (python_major_version == 2 and os.name != 
'nt'):
+    extra_compile_args += ['-DANNOYLIB_MULTITHREADED_BUILD']
+
+    if os.name != 'nt':
+        extra_compile_args += ['-std=c++14']
+
 # #349: something with OS X Mojave causes libstd not to be found
 if platform.system() == 'Darwin':
-    extra_compile_args += ['-std=c++11', '-mmacosx-version-min=10.9']
-    extra_link_args += ['-stdlib=libc++', '-mmacosx-version-min=10.9']
+    extra_compile_args += ['-mmacosx-version-min=10.12']
+    extra_link_args += ['-stdlib=libc++', '-mmacosx-version-min=10.12']
 
 # Manual configuration, you're on your own here.
 manual_compiler_args = os.environ.get('ANNOY_COMPILER_ARGS', None)
@@ -63,7 +73,7 @@
     extra_link_args = manual_linker_args.split(',')
 
 setup(name='annoy',
-      version='1.16.3',
+      version='1.17.0',
       description='Approximate Nearest Neighbors in C++/Python optimized for 
memory usage and loading/saving to disk.',
       packages=['annoy'],
       ext_modules=[
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.3/src/annoygomodule.h 
new/annoy-1.17.0/src/annoygomodule.h
--- old/annoy-1.16.3/src/annoygomodule.h        2019-12-26 22:11:45.000000000 
+0100
+++ new/annoy-1.17.0/src/annoygomodule.h        2020-09-18 18:03:55.000000000 
+0200
@@ -17,7 +17,7 @@
     ptr->add_item(item, w);
   };
   void build(int q) {
-    ptr->build(q);
+    ptr->build(q, 1);
   };
   bool save(const char* filename, bool prefault) {
     return ptr->save(filename, prefault);
@@ -69,7 +69,7 @@
 {
  public:
   AnnoyIndexAngular(int f) {
-    ptr = new ::AnnoyIndex<int32_t, float, ::Angular, ::Kiss64Random>(f);
+    ptr = new ::AnnoyIndex<int32_t, float, ::Angular, ::Kiss64Random, 
AnnoyIndexSingleThreadedBuildPolicy>(f);
     this->f = f;
   }
 };
@@ -77,7 +77,7 @@
 class AnnoyIndexEuclidean : public AnnoyIndex {
  public:
   AnnoyIndexEuclidean(int f) {
-    ptr = new ::AnnoyIndex<int32_t, float, ::Euclidean, ::Kiss64Random>(f);
+    ptr = new ::AnnoyIndex<int32_t, float, ::Euclidean, ::Kiss64Random, 
AnnoyIndexSingleThreadedBuildPolicy>(f);
     this->f = f;
   }
 };
@@ -85,7 +85,7 @@
 class AnnoyIndexManhattan : public AnnoyIndex {
  public:
   AnnoyIndexManhattan(int f) {
-    ptr = new ::AnnoyIndex<int32_t, float, ::Manhattan, ::Kiss64Random>(f);
+    ptr = new ::AnnoyIndex<int32_t, float, ::Manhattan, ::Kiss64Random, 
AnnoyIndexSingleThreadedBuildPolicy>(f);
     this->f = f;
   }
 };
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.3/src/annoylib.h 
new/annoy-1.17.0/src/annoylib.h
--- old/annoy-1.16.3/src/annoylib.h     2019-12-26 22:11:45.000000000 +0100
+++ new/annoy-1.17.0/src/annoylib.h     2020-09-18 18:03:55.000000000 +0200
@@ -36,7 +36,6 @@
 #include <stdint.h>
 #endif
 
-
 #if defined(_MSC_VER) || defined(__MINGW32__)
  // a bit hacky, but override some definitions to support 64 bit
  #define off_t int64_t
@@ -59,6 +58,12 @@
 #include <queue>
 #include <limits>
 
+#ifdef ANNOYLIB_MULTITHREADED_BUILD
+#include <thread>
+#include <mutex>
+#include <shared_mutex>
+#endif
+
 #ifdef _MSC_VER
 // Needed for Visual Studio to disable runtime checks for mempcy
 #pragma runtime_checks("s", off)
@@ -72,7 +77,17 @@
   #define showUpdate(...) { __ERROR_PRINTER_OVERRIDE__( __VA_ARGS__ ); }
 #endif
 
-void set_error_from_errno(char **error, const char* msg) {
+// Portable alloc definition, cf Writing R Extensions, Section 1.6.4
+#ifdef __GNUC__
+  // Includes GCC, clang and Intel compilers
+  # undef alloca
+  # define alloca(x) __builtin_alloca((x))
+#elif defined(__sun) || defined(_AIX)
+  // this is necessary (and sufficient) for Solaris 10 and AIX 6:
+  # include <alloca.h>
+#endif
+
+inline void set_error_from_errno(char **error, const char* msg) {
   showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno);
   if (error) {
     *error = (char *)malloc(256);  // TODO: win doesn't support snprintf
@@ -80,7 +95,7 @@
   }
 }
 
-void set_error_from_string(char **error, const char* msg) {
+inline void set_error_from_string(char **error, const char* msg) {
   showUpdate("%s\n", msg);
   if (error) {
     *error = (char *)malloc(strlen(msg) + 1);
@@ -88,22 +103,21 @@
   }
 }
 
+// We let the v array in the Node struct take whatever space is needed, so 
this is a mostly insignificant number.
+// Compilers need *some* size defined for the v array, and some memory 
checking tools will flag for buffer overruns if this is set too low.
+#define V_ARRAY_SIZE 65536
 
 #ifndef _MSC_VER
 #define popcount __builtin_popcountll
 #else // See #293, #358
-#define isnan(x) _isnan(x)
 #define popcount cole_popcount
 #endif
 
 #if !defined(NO_MANUAL_VECTORIZATION) && defined(__GNUC__) && (__GNUC__ >6) && 
defined(__AVX512F__)  // See #402
-#pragma message "Using 512-bit AVX instructions"
 #define USE_AVX512
 #elif !defined(NO_MANUAL_VECTORIZATION) && defined(__AVX__) && defined 
(__SSE__) && defined(__SSE2__) && defined(__SSE3__)
-#pragma message "Using 128-bit AVX instructions"
 #define USE_AVX
 #else
-#pragma message "Using no AVX instructions"
 #endif
 
 #if defined(USE_AVX) || defined(USE_AVX512)
@@ -114,33 +128,31 @@
 #endif
 #endif
 
-#ifndef ANNOY_NODE_ATTRIBUTE
-    #ifndef _MSC_VER
-        #define ANNOY_NODE_ATTRIBUTE __attribute__((__packed__))
-        // TODO: this is turned on by default, but may not work for all 
architectures! Need to investigate.
-    #else
-        #define ANNOY_NODE_ATTRIBUTE
-    #endif
+#if !defined(__MINGW32__)
+#define FTRUNCATE_SIZE(x) static_cast<int64_t>(x)
+#else
+#define FTRUNCATE_SIZE(x) (x)
 #endif
 
-
 using std::vector;
 using std::pair;
 using std::numeric_limits;
 using std::make_pair;
 
-inline void* remap_memory(void* _ptr, int _fd, size_t old_size, size_t 
new_size) {
+inline bool remap_memory_and_truncate(void** _ptr, int _fd, size_t old_size, 
size_t new_size) {
 #ifdef __linux__
-  _ptr = mremap(_ptr, old_size, new_size, MREMAP_MAYMOVE);
+    *_ptr = mremap(*_ptr, old_size, new_size, MREMAP_MAYMOVE);
+    bool ok = ftruncate(_fd, new_size) != -1;
 #else
-  munmap(_ptr, old_size);
+    munmap(*_ptr, old_size);
+    bool ok = ftruncate(_fd, FTRUNCATE_SIZE(new_size)) != -1;
 #ifdef MAP_POPULATE
-  _ptr = mmap(_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED | 
MAP_POPULATE, _fd, 0);
+    *_ptr = mmap(*_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED | 
MAP_POPULATE, _fd, 0);
 #else
-  _ptr = mmap(_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, _fd, 0);
+    *_ptr = mmap(*_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, _fd, 0);
 #endif
 #endif
-  return _ptr;
+    return ok;
 }
 
 namespace {
@@ -420,7 +432,7 @@
 
 struct Angular : Base {
   template<typename S, typename T>
-  struct ANNOY_NODE_ATTRIBUTE Node {
+  struct Node {
     /*
      * We store a binary tree where each node has two things
      * - A vector associated with it
@@ -440,7 +452,7 @@
       S children[2]; // Will possibly store more than 2
       T norm;
     };
-    T v[1]; // We let this one overflow intentionally. Need to allocate at 
least 1 to make GCC happy
+    T v[V_ARRAY_SIZE];
   };
   template<typename S, typename T>
   static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) {
@@ -504,14 +516,14 @@
 
 struct DotProduct : Angular {
   template<typename S, typename T>
-  struct ANNOY_NODE_ATTRIBUTE Node {
+  struct Node {
     /*
      * This is an extension of the Angular node with an extra attribute for 
the scaled norm.
      */
     S n_descendants;
     S children[2]; // Will possibly store more than 2
     T dot_factor;
-    T v[1]; // We let this one overflow intentionally. Need to allocate at 
least 1 to make GCC happy
+    T v[V_ARRAY_SIZE];
   };
 
   static const char* name() {
@@ -587,8 +599,8 @@
     // Step one: compute the norm of each vector and store that in its extra 
dimension (f-1)
     for (S i = 0; i < node_count; i++) {
       Node* node = get_node_ptr<S, Node>(nodes, _s, i);
-      T norm = sqrt(dot(node->v, node->v, f));
-      if (isnan(norm)) norm = 0;
+      T d = dot(node->v, node->v, f);
+      T norm = d < 0 ? 0 : sqrt(d);
       node->dot_factor = norm;
     }
 
@@ -605,9 +617,8 @@
     for (S i = 0; i < node_count; i++) {
       Node* node = get_node_ptr<S, Node>(nodes, _s, i);
       T node_norm = node->dot_factor;
-
-      T dot_factor = sqrt(pow(max_norm, static_cast<T>(2.0)) - pow(node_norm, 
static_cast<T>(2.0)));
-      if (isnan(dot_factor)) dot_factor = 0;
+      T squared_norm_diff = pow(max_norm, static_cast<T>(2.0)) - 
pow(node_norm, static_cast<T>(2.0));
+      T dot_factor = squared_norm_diff < 0 ? 0 : sqrt(squared_norm_diff);
 
       node->dot_factor = dot_factor;
     }
@@ -616,10 +627,10 @@
 
 struct Hamming : Base {
   template<typename S, typename T>
-  struct ANNOY_NODE_ATTRIBUTE Node {
+  struct Node {
     S n_descendants;
     S children[2];
-    T v[1];
+    T v[V_ARRAY_SIZE];
   };
 
   static const size_t max_iterations = 20;
@@ -712,11 +723,11 @@
 
 struct Minkowski : Base {
   template<typename S, typename T>
-  struct ANNOY_NODE_ATTRIBUTE Node {
+  struct Node {
     S n_descendants;
     T a; // need an extra constant term to determine the offset of the plane
     S children[2];
-    T v[1];
+    T v[V_ARRAY_SIZE];
   };
   template<typename S, typename T>
   static inline T margin(const Node<S, T>* n, const T* y, int f) {
@@ -810,14 +821,14 @@
   // Note that the methods with an **error argument will allocate memory and 
write the pointer to that string if error is non-NULL
   virtual ~AnnoyIndexInterface() {};
   virtual bool add_item(S item, const T* w, char** error=NULL) = 0;
-  virtual bool build(int q, char** error=NULL) = 0;
+  virtual bool build(int q, int n_threads=-1, char** error=NULL) = 0;
   virtual bool unbuild(char** error=NULL) = 0;
   virtual bool save(const char* filename, bool prefault=false, char** 
error=NULL) = 0;
   virtual void unload() = 0;
   virtual bool load(const char* filename, bool prefault=false, char** 
error=NULL) = 0;
   virtual T get_distance(S i, S j) const = 0;
-  virtual void get_nns_by_item(S item, size_t n, size_t search_k, vector<S>* 
result, vector<T>* distances) const = 0;
-  virtual void get_nns_by_vector(const T* w, size_t n, size_t search_k, 
vector<S>* result, vector<T>* distances) const = 0;
+  virtual void get_nns_by_item(S item, size_t n, int search_k, vector<S>* 
result, vector<T>* distances) const = 0;
+  virtual void get_nns_by_vector(const T* w, size_t n, int search_k, 
vector<S>* result, vector<T>* distances) const = 0;
   virtual S get_n_items() const = 0;
   virtual S get_n_trees() const = 0;
   virtual void verbose(bool v) = 0;
@@ -826,7 +837,7 @@
   virtual bool on_disk_build(const char* filename, char** error=NULL) = 0;
 };
 
-template<typename S, typename T, typename Distance, typename Random>
+template<typename S, typename T, typename Distance, typename Random, class 
ThreadedBuildPolicy>
   class AnnoyIndex : public AnnoyIndexInterface<S, T> {
   /*
    * We use random projection to build a forest of binary trees of all items.
@@ -843,12 +854,13 @@
   const int _f;
   size_t _s;
   S _n_items;
-  Random _random;
   void* _nodes; // Could either be mmapped, or point to a memory buffer that 
we reallocate
   S _n_nodes;
   S _nodes_size;
   vector<S> _roots;
   S _K;
+  bool _is_seeded;
+  int _seed;
   bool _loaded;
   bool _verbose;
   int _fd;
@@ -856,7 +868,7 @@
   bool _built;
 public:
 
-   AnnoyIndex(int f) : _f(f), _random() {
+   AnnoyIndex(int f) : _f(f) {
     _s = offsetof(Node, v) + _f * sizeof(T); // Size of each node
     _verbose = false;
     _built = false;
@@ -910,7 +922,7 @@
       return false;
     }
     _nodes_size = 1;
-    if (ftruncate(_fd, _s * _nodes_size) == -1) {
+    if (ftruncate(_fd, FTRUNCATE_SIZE(_s) * FTRUNCATE_SIZE(_nodes_size)) == 
-1) {
       set_error_from_errno(error, "Unable to truncate");
       return false;
     }
@@ -922,7 +934,7 @@
     return true;
   }
     
-  bool build(int q, char** error=NULL) {
+  bool build(int q, int n_threads=-1, char** error=NULL) {
     if (_loaded) {
       set_error_from_string(error, "You can't build a loaded index");
       return false;
@@ -936,21 +948,8 @@
     D::template preprocess<T, S, Node>(_nodes, _s, _n_items, _f);
 
     _n_nodes = _n_items;
-    while (1) {
-      if (q == -1 && _n_nodes >= _n_items * 2)
-        break;
-      if (q != -1 && _roots.size() >= (size_t)q)
-        break;
-      if (_verbose) showUpdate("pass %zd...\n", _roots.size());
 
-      vector<S> indices;
-      for (S i = 0; i < _n_items; i++) {
-        if (_get(i)->n_descendants >= 1) // Issue #223
-          indices.push_back(i);
-      }
-
-      _roots.push_back(_make_tree(indices, true));
-    }
+    ThreadedBuildPolicy::template build<S, T>(this, q, n_threads);
 
     // Also, copy the roots into the last segment of the array
     // This way we can load them faster without reading the whole file
@@ -962,8 +961,9 @@
     if (_verbose) showUpdate("has %d nodes\n", _n_nodes);
     
     if (_on_disk) {
-      _nodes = remap_memory(_nodes, _fd, _s * _nodes_size, _s * _n_nodes);
-      if (ftruncate(_fd, _s * _n_nodes)) {
+      if (!remap_memory_and_truncate(&_nodes, _fd,
+          static_cast<size_t>(_s) * static_cast<size_t>(_nodes_size),
+          static_cast<size_t>(_s) * static_cast<size_t>(_n_nodes))) {
         // TODO: this probably creates an index in a corrupt state... not sure 
what to do
         set_error_from_errno(error, "Unable to truncate");
         return false;
@@ -1027,6 +1027,7 @@
     _n_nodes = 0;
     _nodes_size = 0;
     _on_disk = false;
+    _is_seeded = false;
     _roots.clear();
   }
 
@@ -1064,7 +1065,7 @@
       return false;
     } else if (size % _s) {
       // Something is fishy with this index!
-      set_error_from_errno(error, "Index size is not a multiple of vector 
size");
+      set_error_from_errno(error, "Index size is not a multiple of vector 
size. Ensure you are opening using the same metric you used to create the 
index.");
       return false;
     }
 
@@ -1105,13 +1106,13 @@
     return D::normalized_distance(D::distance(_get(i), _get(j), _f));
   }
 
-  void get_nns_by_item(S item, size_t n, size_t search_k, vector<S>* result, 
vector<T>* distances) const {
+  void get_nns_by_item(S item, size_t n, int search_k, vector<S>* result, 
vector<T>* distances) const {
     // TODO: handle OOB
     const Node* m = _get(item);
     _get_all_nns(m->v, n, search_k, result, distances);
   }
 
-  void get_nns_by_vector(const T* w, size_t n, size_t search_k, vector<S>* 
result, vector<T>* distances) const {
+  void get_nns_by_vector(const T* w, size_t n, int search_k, vector<S>* 
result, vector<T>* distances) const {
     _get_all_nns(w, n, search_k, result, distances);
   }
 
@@ -1134,35 +1135,97 @@
   }
 
   void set_seed(int seed) {
+    _is_seeded = true;
+    _seed = seed;
+  }
+
+  void thread_build(int q, int thread_idx, ThreadedBuildPolicy& 
threaded_build_policy) {
+    Random _random;
+    // Each thread needs its own seed, otherwise each thread would be building 
the same tree(s)
+    int seed = _is_seeded ? _seed + thread_idx : thread_idx;
     _random.set_seed(seed);
+
+    vector<S> thread_roots;
+    while (1) {
+      if (q == -1) {
+        threaded_build_policy.lock_n_nodes();
+        if (_n_nodes >= 2 * _n_items) {
+          threaded_build_policy.unlock_n_nodes();
+          break;
+        }
+        threaded_build_policy.unlock_n_nodes();
+      } else {
+        if (thread_roots.size() >= (size_t)q) {
+          break;
+        }
+      }
+
+      if (_verbose) showUpdate("pass %zd...\n", thread_roots.size());
+
+      vector<S> indices;
+      threaded_build_policy.lock_shared_nodes();
+      for (S i = 0; i < _n_items; i++) {
+        if (_get(i)->n_descendants >= 1) { // Issue #223
+          indices.push_back(i);
+        }
+      }
+      threaded_build_policy.unlock_shared_nodes();
+
+      thread_roots.push_back(_make_tree(indices, true, _random, 
threaded_build_policy));
+    }
+
+    threaded_build_policy.lock_roots();
+    _roots.insert(_roots.end(), thread_roots.begin(), thread_roots.end());
+    threaded_build_policy.unlock_roots();
   }
 
 protected:
+  void _reallocate_nodes(S n) {
+    const double reallocation_factor = 1.3;
+    S new_nodes_size = std::max(n, (S) ((_nodes_size + 1) * 
reallocation_factor));
+    void *old = _nodes;
+    
+    if (_on_disk) {
+      if (!remap_memory_and_truncate(&_nodes, _fd, 
+          static_cast<size_t>(_s) * static_cast<size_t>(_nodes_size), 
+          static_cast<size_t>(_s) * static_cast<size_t>(new_nodes_size)) && 
+          _verbose)
+          showUpdate("File truncation error\n");
+    } else {
+      _nodes = realloc(_nodes, _s * new_nodes_size);
+      memset((char *) _nodes + (_nodes_size * _s) / sizeof(char), 0, 
(new_nodes_size - _nodes_size) * _s);
+    }
+    
+    _nodes_size = new_nodes_size;
+    if (_verbose) showUpdate("Reallocating to %d nodes: old_address=%p, 
new_address=%p\n", new_nodes_size, old, _nodes);
+  }
+
+  void _allocate_size(S n, ThreadedBuildPolicy& threaded_build_policy) {
+    if (n > _nodes_size) {
+      threaded_build_policy.lock_nodes();
+      _reallocate_nodes(n);
+      threaded_build_policy.unlock_nodes();
+    }
+  }
+
   void _allocate_size(S n) {
     if (n > _nodes_size) {
-      const double reallocation_factor = 1.3;
-      S new_nodes_size = std::max(n, (S) ((_nodes_size + 1) * 
reallocation_factor));
-      void *old = _nodes;
-      
-      if (_on_disk) {
-        int rc = ftruncate(_fd, _s * new_nodes_size);
-        if (_verbose && rc) showUpdate("File truncation error\n");
-        _nodes = remap_memory(_nodes, _fd, _s * _nodes_size, _s * 
new_nodes_size);
-      } else {
-        _nodes = realloc(_nodes, _s * new_nodes_size);
-        memset((char *) _nodes + (_nodes_size * _s) / sizeof(char), 0, 
(new_nodes_size - _nodes_size) * _s);
-      }
-      
-      _nodes_size = new_nodes_size;
-      if (_verbose) showUpdate("Reallocating to %d nodes: old_address=%p, 
new_address=%p\n", new_nodes_size, old, _nodes);
+      _reallocate_nodes(n);
     }
   }
 
-  inline Node* _get(const S i) const {
+  Node* _get(const S i) const {
     return get_node_ptr<S, Node>(_nodes, _s, i);
   }
 
-  S _make_tree(const vector<S >& indices, bool is_root) {
+  double _split_imbalance(const vector<S>& left_indices, const vector<S>& 
right_indices) {
+    double ls = (float)left_indices.size();
+    double rs = (float)right_indices.size();
+    float f = ls / (ls + rs + 1e-9);  // Avoid 0/0
+    return std::max(f, 1-f);
+  }
+
+  S _make_tree(const vector<S>& indices, bool is_root, Random& _random, 
ThreadedBuildPolicy& threaded_build_policy) {
     // The basic rule is that if we have <= _K items, then it's a leaf node, 
otherwise it's a split node.
     // There's some regrettable complications caused by the problem that root 
nodes have to be "special":
     // 1. We identify root nodes by the arguable logic that _n_items == 
n->n_descendants, regardless of how many descendants they actually have
@@ -1172,8 +1235,12 @@
       return indices[0];
 
     if (indices.size() <= (size_t)_K && (!is_root || (size_t)_n_items <= 
(size_t)_K || indices.size() == 1)) {
-      _allocate_size(_n_nodes + 1);
+      threaded_build_policy.lock_n_nodes();
+      _allocate_size(_n_nodes + 1, threaded_build_policy);
       S item = _n_nodes++;
+      threaded_build_policy.unlock_n_nodes();
+
+      threaded_build_policy.lock_shared_nodes();
       Node* m = _get(item);
       m->n_descendants = is_root ? _n_items : (S)indices.size();
 
@@ -1183,9 +1250,12 @@
       // Only copy when necessary to avoid crash in MSVC 9. #293
       if (!indices.empty())
         memcpy(m->children, &indices[0], indices.size() * sizeof(S));
+
+      threaded_build_policy.unlock_shared_nodes();
       return item;
     }
 
+    threaded_build_policy.lock_shared_nodes();
     vector<Node*> children;
     for (size_t i = 0; i < indices.size(); i++) {
       S j = indices[i];
@@ -1196,26 +1266,33 @@
 
     vector<S> children_indices[2];
     Node* m = (Node*)alloca(_s);
-    D::create_split(children, _f, _s, _random, m);
 
-    for (size_t i = 0; i < indices.size(); i++) {
-      S j = indices[i];
-      Node* n = _get(j);
-      if (n) {
-        bool side = D::side(m, n->v, _f, _random);
-        children_indices[side].push_back(j);
-      } else {
-        showUpdate("No node for index %d?\n", j);
+    for (int attempt = 0; attempt < 3; attempt++) {
+      children_indices[0].clear();
+      children_indices[1].clear();
+      D::create_split(children, _f, _s, _random, m);
+
+      for (size_t i = 0; i < indices.size(); i++) {
+        S j = indices[i];
+        Node* n = _get(j);
+        if (n) {
+          bool side = D::side(m, n->v, _f, _random);
+          children_indices[side].push_back(j);
+        } else {
+          showUpdate("No node for index %d?\n", j);
+        }
       }
+
+      if (_split_imbalance(children_indices[0], children_indices[1]) < 0.95)
+        break;
     }
+    threaded_build_policy.unlock_shared_nodes();
 
     // If we didn't find a hyperplane, just randomize sides as a last option
-    while (children_indices[0].size() == 0 || children_indices[1].size() == 0) 
{
+    while (_split_imbalance(children_indices[0], children_indices[1]) > 0.99) {
       if (_verbose)
         showUpdate("\tNo hyperplane found (left has %ld children, right has 
%ld children)\n",
           children_indices[0].size(), children_indices[1].size());
-      if (_verbose && indices.size() > 100000)
-        showUpdate("Failed splitting %lu items\n", indices.size());
 
       children_indices[0].clear();
       children_indices[1].clear();
@@ -1236,17 +1313,22 @@
     m->n_descendants = is_root ? _n_items : (S)indices.size();
     for (int side = 0; side < 2; side++) {
       // run _make_tree for the smallest child first (for cache locality)
-      m->children[side^flip] = _make_tree(children_indices[side^flip], false);
+      m->children[side^flip] = _make_tree(children_indices[side^flip], false, 
_random, threaded_build_policy);
     }
 
-    _allocate_size(_n_nodes + 1);
+    threaded_build_policy.lock_n_nodes();
+    _allocate_size(_n_nodes + 1, threaded_build_policy);
     S item = _n_nodes++;
+    threaded_build_policy.unlock_n_nodes();
+
+    threaded_build_policy.lock_shared_nodes();
     memcpy(_get(item), m, _s);
+    threaded_build_policy.unlock_shared_nodes();
 
     return item;
   }
 
-  void _get_all_nns(const T* v, size_t n, size_t search_k, vector<S>* result, 
vector<T>* distances) const {
+  void _get_all_nns(const T* v, size_t n, int search_k, vector<S>* result, 
vector<T>* distances) const {
     Node* v_node = (Node *)alloca(_s);
     D::template zero_value<Node>(v_node);
     memcpy(v_node->v, v, sizeof(T) * _f);
@@ -1254,7 +1336,7 @@
 
     std::priority_queue<pair<T, S> > q;
 
-    if (search_k == (size_t)-1) {
+    if (search_k == -1) {
       search_k = n * _roots.size();
     }
 
@@ -1263,7 +1345,7 @@
     }
 
     std::vector<S> nns;
-    while (nns.size() < search_k && !q.empty()) {
+    while (nns.size() < (size_t)search_k && !q.empty()) {
       const pair<T, S>& top = q.top();
       T d = top.first;
       S i = top.second;
@@ -1287,7 +1369,7 @@
     vector<pair<T, S> > nns_dist;
     S last = -1;
     for (size_t i = 0; i < nns.size(); i++) {
-      S j = nns[i];
+      S j = nns[i]; 
       if (j == last)
         continue;
       last = j;
@@ -1306,5 +1388,92 @@
   }
 };
 
+class AnnoyIndexSingleThreadedBuildPolicy {
+public:
+  template<typename S, typename T, typename D, typename Random>
+  static void build(AnnoyIndex<S, T, D, Random, 
AnnoyIndexSingleThreadedBuildPolicy>* annoy, int q, int n_threads) {
+    AnnoyIndexSingleThreadedBuildPolicy threaded_build_policy;
+    annoy->thread_build(q, 0, threaded_build_policy);
+  }
+
+  void lock_n_nodes() {}
+  void unlock_n_nodes() {}
+
+  void lock_nodes() {}
+  void unlock_nodes() {}
+
+  void lock_shared_nodes() {}
+  void unlock_shared_nodes() {}
+
+  void lock_roots() {}
+  void unlock_roots() {}
+};
+
+#ifdef ANNOYLIB_MULTITHREADED_BUILD
+class AnnoyIndexMultiThreadedBuildPolicy {
+private:
+  std::shared_timed_mutex nodes_mutex;
+  std::mutex n_nodes_mutex;
+  std::mutex roots_mutex;
+
+public:
+  template<typename S, typename T, typename D, typename Random>
+  static void build(AnnoyIndex<S, T, D, Random, 
AnnoyIndexMultiThreadedBuildPolicy>* annoy, int q, int n_threads) {
+    AnnoyIndexMultiThreadedBuildPolicy threaded_build_policy;
+    if (n_threads == -1) {
+      // If the hardware_concurrency() value is not well defined or not 
computable, it returns 0.
+      // We guard against this by using at least 1 thread.
+      n_threads = std::max(1, (int)std::thread::hardware_concurrency());
+    }
+
+    vector<std::thread> threads(n_threads);
+
+    for (int thread_idx = 0; thread_idx < n_threads; thread_idx++) {
+      int trees_per_thread = q == -1 ? -1 : (int)floor((q + thread_idx) / 
n_threads);
+
+      threads[thread_idx] = std::thread(
+        &AnnoyIndex<S, T, D, Random, 
AnnoyIndexMultiThreadedBuildPolicy>::thread_build,
+        annoy,
+        trees_per_thread,
+        thread_idx,
+        std::ref(threaded_build_policy)
+      );
+    }
+
+    for (auto& thread : threads) {
+      thread.join();
+    }
+  }
+
+  void lock_n_nodes() {
+    n_nodes_mutex.lock();
+  }
+  void unlock_n_nodes() {
+    n_nodes_mutex.unlock();
+  }
+
+  void lock_nodes() {
+    nodes_mutex.lock();
+  }
+  void unlock_nodes() {
+    nodes_mutex.unlock();
+  }
+
+  void lock_shared_nodes() {
+    nodes_mutex.lock_shared();
+  }
+  void unlock_shared_nodes() {
+    nodes_mutex.unlock_shared();
+  }
+
+  void lock_roots() {
+    roots_mutex.lock();
+  }
+  void unlock_roots() {
+    roots_mutex.unlock();
+  }
+};
+#endif
+
 #endif
 // vim: tabstop=2 shiftwidth=2
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.3/src/annoyluamodule.cc 
new/annoy-1.17.0/src/annoyluamodule.cc
--- old/annoy-1.16.3/src/annoyluamodule.cc      2019-12-26 22:11:45.000000000 
+0100
+++ new/annoy-1.17.0/src/annoyluamodule.cc      2020-09-18 18:03:55.000000000 
+0200
@@ -33,7 +33,7 @@
 public:
   typedef int32_t AnnoyS;
   typedef float AnnoyT;
-  typedef AnnoyIndex<AnnoyS, AnnoyT, Distance, Kiss64Random> Impl;
+  typedef AnnoyIndex<AnnoyS, AnnoyT, Distance, Kiss64Random, 
AnnoyIndexSingleThreadedBuildPolicy> Impl;
   typedef LuaAnnoy<Distance> ThisClass;
 
   class LuaArrayProxy {
@@ -118,9 +118,10 @@
   }
 
   static int build(lua_State* L) {
+    int nargs = lua_gettop(L);
     Impl* self = getAnnoy(L, 1);
     int n_trees = luaL_checkinteger(L, 2);
-    self->build(n_trees);
+    self->build(n_trees, 1);
     lua_pushboolean(L, true);
     return 1;
   }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.3/src/annoymodule.cc 
new/annoy-1.17.0/src/annoymodule.cc
--- old/annoy-1.16.3/src/annoymodule.cc 2019-12-26 22:11:45.000000000 +0100
+++ new/annoy-1.17.0/src/annoymodule.cc 2020-09-18 18:03:55.000000000 +0200
@@ -24,6 +24,24 @@
 #endif
 
 
+#if defined(USE_AVX512)
+#define AVX_INFO "Using 512-bit AVX instructions"
+#elif defined(USE_AVX128)
+#define AVX_INFO "Using 128-bit AVX instructions"
+#else
+#define AVX_INFO "Not using AVX instructions"
+#endif
+
+#if defined(_MSC_VER)
+#define COMPILER_INFO "Compiled using MSC"
+#elif defined(__GNUC__)
+#define COMPILER_INFO "Compiled on GCC"
+#else
+#define COMPILER_INFO "Compiled on unknown platform"
+#endif
+
+#define ANNOY_DOC (COMPILER_INFO ". " AVX_INFO ".")
+
 #if PY_MAJOR_VERSION >= 3
 #define IS_PY3K
 #endif
@@ -36,6 +54,11 @@
     #define PyInt_FromLong PyLong_FromLong 
 #endif
 
+#ifdef ANNOYLIB_MULTITHREADED_BUILD
+  typedef AnnoyIndexMultiThreadedBuildPolicy AnnoyIndexThreadedBuildPolicy;
+#else
+  typedef AnnoyIndexSingleThreadedBuildPolicy AnnoyIndexThreadedBuildPolicy;
+#endif
 
 template class AnnoyIndexInterface<int32_t, float>;
 
@@ -45,7 +68,7 @@
   // This is questionable from a performance point of view. Should reconsider 
this solution.
 private:
   int32_t _f_external, _f_internal;
-  AnnoyIndex<int32_t, uint64_t, Hamming, Kiss64Random> _index;
+  AnnoyIndex<int32_t, uint64_t, Hamming, Kiss64Random, 
AnnoyIndexThreadedBuildPolicy> _index;
   void _pack(const float* src, uint64_t* dst) const {
     for (int32_t i = 0; i < _f_internal; i++) {
       dst[i] = 0;
@@ -66,13 +89,13 @@
     _pack(w, &w_internal[0]);
     return _index.add_item(item, &w_internal[0], error);
   };
-  bool build(int q, char** error) { return _index.build(q, error); };
+  bool build(int q, int n_threads, char** error) { return _index.build(q, 
n_threads, error); };
   bool unbuild(char** error) { return _index.unbuild(error); };
   bool save(const char* filename, bool prefault, char** error) { return 
_index.save(filename, prefault, error); };
   void unload() { _index.unload(); };
   bool load(const char* filename, bool prefault, char** error) { return 
_index.load(filename, prefault, error); };
   float get_distance(int32_t i, int32_t j) const { return 
_index.get_distance(i, j); };
-  void get_nns_by_item(int32_t item, size_t n, size_t search_k, 
vector<int32_t>* result, vector<float>* distances) const {
+  void get_nns_by_item(int32_t item, size_t n, int search_k, vector<int32_t>* 
result, vector<float>* distances) const {
     if (distances) {
       vector<uint64_t> distances_internal;
       _index.get_nns_by_item(item, n, search_k, result, &distances_internal);
@@ -81,7 +104,7 @@
       _index.get_nns_by_item(item, n, search_k, result, NULL);
     }
   };
-  void get_nns_by_vector(const float* w, size_t n, size_t search_k, 
vector<int32_t>* result, vector<float>* distances) const {
+  void get_nns_by_vector(const float* w, size_t n, int search_k, 
vector<int32_t>* result, vector<float>* distances) const {
     vector<uint64_t> w_internal(_f_internal, 0);
     _pack(w, &w_internal[0]);
     if (distances) {
@@ -127,17 +150,17 @@
     // This keeps coming up, see #368 etc
     PyErr_WarnEx(PyExc_FutureWarning, "The default argument for metric will be 
removed "
                 "in future version of Annoy. Please pass metric='angular' 
explicitly.", 1);
-    self->ptr = new AnnoyIndex<int32_t, float, Angular, Kiss64Random>(self->f);
+    self->ptr = new AnnoyIndex<int32_t, float, Angular, Kiss64Random, 
AnnoyIndexThreadedBuildPolicy>(self->f);
   } else if (!strcmp(metric, "angular")) {
-   self->ptr = new AnnoyIndex<int32_t, float, Angular, Kiss64Random>(self->f);
+   self->ptr = new AnnoyIndex<int32_t, float, Angular, Kiss64Random, 
AnnoyIndexThreadedBuildPolicy>(self->f);
   } else if (!strcmp(metric, "euclidean")) {
-    self->ptr = new AnnoyIndex<int32_t, float, Euclidean, 
Kiss64Random>(self->f);
+    self->ptr = new AnnoyIndex<int32_t, float, Euclidean, Kiss64Random, 
AnnoyIndexThreadedBuildPolicy>(self->f);
   } else if (!strcmp(metric, "manhattan")) {
-    self->ptr = new AnnoyIndex<int32_t, float, Manhattan, 
Kiss64Random>(self->f);
+    self->ptr = new AnnoyIndex<int32_t, float, Manhattan, Kiss64Random, 
AnnoyIndexThreadedBuildPolicy>(self->f);
   } else if (!strcmp(metric, "hamming")) {
     self->ptr = new HammingWrapper(self->f);
   } else if (!strcmp(metric, "dot")) {
-    self->ptr = new AnnoyIndex<int32_t, float, DotProduct, 
Kiss64Random>(self->f);
+    self->ptr = new AnnoyIndex<int32_t, float, DotProduct, Kiss64Random, 
AnnoyIndexThreadedBuildPolicy>(self->f);
   } else {
     PyErr_SetString(PyExc_ValueError, "No such metric");
     return NULL;
@@ -390,16 +413,17 @@
 static PyObject *
 py_an_build(py_annoy *self, PyObject *args, PyObject *kwargs) {
   int q;
+  int n_jobs = -1;
   if (!self->ptr) 
     return NULL;
-  static char const * kwlist[] = {"n_trees", NULL};
-  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i", (char**)kwlist, &q))
+  static char const * kwlist[] = {"n_trees", "n_jobs", NULL};
+  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", (char**)kwlist, &q, 
&n_jobs))
     return NULL;
 
   bool res;
   char* error;
   Py_BEGIN_ALLOW_THREADS;
-  res = self->ptr->build(q, &error);
+  res = self->ptr->build(q, n_jobs, &error);
   Py_END_ALLOW_THREADS;
   if (!res) {
     PyErr_SetString(PyExc_Exception, error);
@@ -509,7 +533,7 @@
   {"get_item_vector",(PyCFunction)py_an_get_item_vector, METH_VARARGS, 
"Returns the vector for item `i` that was previously added."},
   {"add_item",(PyCFunction)py_an_add_item, METH_VARARGS | METH_KEYWORDS, "Adds 
item `i` (any nonnegative integer) with vector `v`.\n\nNote that it will 
allocate memory for `max(i)+1` items."},
   {"on_disk_build",(PyCFunction)py_an_on_disk_build, METH_VARARGS | 
METH_KEYWORDS, "Build will be performed with storage on disk instead of RAM."},
-  {"build",(PyCFunction)py_an_build, METH_VARARGS | METH_KEYWORDS, "Builds a 
forest of `n_trees` trees.\n\nMore trees give higher precision when querying. 
After calling `build`,\nno more items can be added."},
+  {"build",(PyCFunction)py_an_build, METH_VARARGS | METH_KEYWORDS, "Builds a 
forest of `n_trees` trees.\n\nMore trees give higher precision when querying. 
After calling `build`,\nno more items can be added. `n_jobs` specifies the 
number of threads used to build the trees. `n_jobs=-1` uses all available CPU 
cores."},
   {"unbuild",(PyCFunction)py_an_unbuild, METH_NOARGS, "Unbuilds the tree in 
order to allows adding new items.\n\nbuild() has to be called again afterwards 
in order to\nrun queries."},
   {"unload",(PyCFunction)py_an_unload, METH_NOARGS, "Unloads an index from 
disk."},
   {"get_distance",(PyCFunction)py_an_get_distance, METH_VARARGS, "Returns the 
distance between items `i` and `j`."},
@@ -542,7 +566,7 @@
   0,                      /*tp_setattro*/
   0,                      /*tp_as_buffer*/
   Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
-  "annoy objects",        /* tp_doc */
+  ANNOY_DOC,              /* tp_doc */
   0,                      /* tp_traverse */
   0,                      /* tp_clear */
   0,                      /* tp_richcompare */
@@ -570,7 +594,7 @@
   static struct PyModuleDef moduledef = {
     PyModuleDef_HEAD_INIT,
     "annoylib",          /* m_name */
-    "",                  /* m_doc */
+    ANNOY_DOC,           /* m_doc */
     -1,                  /* m_size */
     module_methods,      /* m_methods */
     NULL,                /* m_reload */
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.3/src/mman.h new/annoy-1.17.0/src/mman.h
--- old/annoy-1.16.3/src/mman.h 2019-12-26 22:11:45.000000000 +0100
+++ new/annoy-1.17.0/src/mman.h 2020-09-18 18:03:55.000000000 +0200
@@ -210,17 +210,22 @@
 }
 
 #if !defined(__MINGW32__)
-inline int ftruncate(int fd, unsigned int size) {
+inline int ftruncate(const int fd, const int64_t size) {
     if (fd < 0) {
         errno = EBADF;
         return -1;
     }
 
-    HANDLE h = (HANDLE)_get_osfhandle(fd);
-    unsigned int cur = SetFilePointer(h, 0, NULL, FILE_CURRENT);
-    if (cur == ~0 || SetFilePointer(h, size, NULL, FILE_BEGIN) == ~0 || 
!SetEndOfFile(h)) {
-        int error = GetLastError();
-        switch (GetLastError()) {
+    HANDLE h = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
+    LARGE_INTEGER li_start, li_size;
+    li_start.QuadPart = static_cast<int64_t>(0);
+    li_size.QuadPart = size;
+    if (SetFilePointerEx(h, li_start, NULL, FILE_CURRENT) == ~0 ||
+        SetFilePointerEx(h, li_size, NULL, FILE_BEGIN) == ~0 ||
+        !SetEndOfFile(h)) {
+        unsigned long error = GetLastError();
+        fprintf(stderr, "I/O error while truncating: %lu\n", error);
+        switch (error) {
             case ERROR_INVALID_HANDLE:
                 errno = EBADF;
                 break;
@@ -229,8 +234,7 @@
                 break;
         }
         return -1;
-    }
-
+    }        
     return 0;
 }
 #endif
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.3/test/accuracy_test.py 
new/annoy-1.17.0/test/accuracy_test.py
--- old/annoy-1.16.3/test/accuracy_test.py      2019-12-26 22:11:45.000000000 
+0100
+++ new/annoy-1.17.0/test/accuracy_test.py      2020-09-18 18:03:55.000000000 
+0200
@@ -36,7 +36,7 @@
             print('downloading', url, '->', vectors_fn)
             urlretrieve(url, vectors_fn)
 
-        dataset_f = h5py.File(vectors_fn)
+        dataset_f = h5py.File(vectors_fn, 'r')
         distance = dataset_f.attrs['distance']
         f = dataset_f['train'].shape[1]
         annoy = AnnoyIndex(f, distance)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.3/test/multithreaded_build_test.py 
new/annoy-1.17.0/test/multithreaded_build_test.py
--- old/annoy-1.16.3/test/multithreaded_build_test.py   1970-01-01 
01:00:00.000000000 +0100
+++ new/annoy-1.17.0/test/multithreaded_build_test.py   2020-09-18 
18:03:55.000000000 +0200
@@ -0,0 +1,26 @@
+import numpy
+import unittest
+from annoy import AnnoyIndex
+
+
+class MultithreadedBuildTest(unittest.TestCase):
+    def _test_building_with_threads(self, n_jobs):
+        n, f = 10000, 10
+        n_trees = 31
+        i = AnnoyIndex(f, 'euclidean')
+        for j in range(n):
+            i.add_item(j, numpy.random.normal(size=f))
+        self.assertTrue(i.build(n_trees, n_jobs=n_jobs))
+        self.assertEqual(n_trees, i.get_n_trees())
+
+    def test_one_thread(self):
+        self._test_building_with_threads(1)
+
+    def test_two_threads(self):
+        self._test_building_with_threads(2)
+
+    def test_four_threads(self):
+        self._test_building_with_threads(4)
+    
+    def test_eight_threads(self):
+        self._test_building_with_threads(8)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.3/tox.ini new/annoy-1.17.0/tox.ini
--- old/annoy-1.16.3/tox.ini    2019-12-26 22:11:45.000000000 +0100
+++ new/annoy-1.17.0/tox.ini    2020-09-18 18:03:55.000000000 +0200
@@ -7,7 +7,7 @@
 commands =
   pip install numpy h5py
   pip install .
-  python setup.py nosetests
+  python setup.py nosetests --verbosity=3
 
 [testenv:go]
 setenv =

Reply via email to