On 2019-05-07 15:28, amirou...@hyper.dev wrote:
I am pleased to announce the immediate availability of
guile-snowball-stemmer.
This is binding library that allows to compute the stem of words in
various
languages. The list of supported language is available in the following
REPL
run.
This is a binding library. The official website is at
https://snowballstem.org/
It is mostly useful in the context of information retrieval.
The code is at https://git.sr.ht/~amz3/guile-snowball-stemmer
The libstemmer shared library path is hardcoded as guix path of the
library.
A guix package definition of the C library is available in my guix
channel at:
https://git.sr.ht/~amz3/guix-amz3-channel
That said there is no guix package for the bindings. Just include the
file
attached to this mail in you project.
Here is a demo:
scheme@(guile-user)> (import (snowball-stemmer))
scheme@(guile-user)> (stemmers)
$1 = ("turkish" "swedish" "spanish" "russian" "romanian" "portuguese"
"porter" "norwegian" "italian" "hungarian" "german" "french" "finnish"
"english" "dutch" "danish")
scheme@(guile-user)> (make-stemmer "amazigh")
ERROR: In procedure scm-error:
ERROR: snowball-stemmer "Oops! Stemmer not found" "amazigh"
scheme@(guile-user)> (define english (make-stemmer "english"))
scheme@(guile-user)> (stem english "cycling")
$2 = "cycl"
scheme@(guile-user)> (stem english "ecology")
$3 = "ecolog"
scheme@(guile-user)> (stem english "library")
$4 = "librari"
scheme@(guile-user)> (stem english "virtual")
$5 = "virtual"
scheme@(guile-user)> (stem english "environment")
$6 = "environ"
scheme@(guile-user)> (define french (make-stemmer "french"))
scheme@(guile-user)> (stem french "environnement")
$7 = "environ"
scheme@(guile-user)> (stem french "bibliotheque")
$8 = "bibliothequ"
scheme@(guile-user)> (stem french "gazette")
$9 = "gazet"
scheme@(guile-user)> (stem french "constituant")
$10 = "constitu"
Small update, I forgot to actually guard the stemmer.
Here is the patch:
diff --git a/snowball-stemmer.scm b/snowball-stemmer.scm
index b754808..603a97e 100644
--- a/snowball-stemmer.scm
+++ b/snowball-stemmer.scm
@@ -67,6 +67,7 @@
(let ((out (proc (string->pointer algorithm) NULL)))
(when(eq? out NULL)
(error 'snowball-stemmer "Oops! Stemmer not found"
algorithm))
+ (stemmers-guardian out)
out))))
(define (reap-stemmers)
You will find attached to this mail the fixed version.
;; guile-snowball-stemmer
;; Copyright (C) 2019 Amirouche Boubekki <amirou...@hyper.dev>
;; This library is free software; you can redistribute it and/or
;; modify it under the terms of the GNU Lesser General Public
;; License as published by the Free Software Foundation; either
;; version 3 of the License, or (at your option) any later version.
;; This library is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; Lesser General Public License for more details.
;; You should have received a copy of the GNU Lesser General Public License
;; along with this library; if not, write to the Free Software Foundation,
;; Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
(define-module (snowball-stemmer))
(import (system foreign))
(import (only (rnrs bytevectors)
bytevector-length
string->utf8
utf8->string))
(export stemmers make-stemmer stem)
;;; ffi helpers
(define NULL %null-pointer)
(define POINTER '*)
;; XXX: only use that procedure in your project if you don't need to
;; access static variables
(define (dynamic-link* library-name)
(let ((shared-object (dynamic-link library-name)))
(lambda (return-value function-name . arguments)
(let ((function (dynamic-func function-name shared-object)))
(pointer->procedure return-value function arguments)))))
;; bindings
(define snowball-stemmer
(dynamic-link*
"/gnu/store/rzvlish3vsidfmvv74f74s2854wn8yii-stemmer-0.0.0/lib/libstemmer.so"))
(define stemmers
(let ((proc (snowball-stemmer POINTER "sb_stemmer_list")))
(lambda ()
(let ((array (pointer-address (proc))))
(let loop ((out '())
(index 0))
(let ((pointer (dereference-pointer (make-pointer (+ array (* 8
index))))))
(if (eq? pointer NULL)
out
(loop (cons (pointer->string pointer) out)
(+ index 1)))))))))
(define %stemmer-delete
(let ((proc (snowball-stemmer void "sb_stemmer_delete" POINTER)))
(lambda (stemmer)
(proc stemmer))))
(define stemmers-guardian (make-guardian))
(define make-stemmer
(let ((proc (snowball-stemmer POINTER "sb_stemmer_new" POINTER POINTER)))
(lambda (algorithm)
(let ((out (proc (string->pointer algorithm) NULL)))
(when (eq? out NULL)
(error 'snowball-stemmer "Oops! Stemmer not found" algorithm))
(stemmers-guardian out)
out))))
(define (reap-stemmers)
(let loop ()
(let ((stemmer (stemmers-guardian)))
(when stemmer
(%stemmer-delete stemmer)
(loop)))))
(add-hook! after-gc-hook reap-stemmers)
(define %stemmer-length
(let ((proc (snowball-stemmer int "sb_stemmer_length" POINTER)))
(lambda (stemmer)
(proc stemmer))))
(define stem
(let ((proc (snowball-stemmer POINTER "sb_stemmer_stem" POINTER POINTER int)))
(lambda (stemmer word)
(let ((bv (string->utf8 word)))
(let ((pointer (proc stemmer (bytevector->pointer bv)
(bytevector-length bv))))
(utf8->string (pointer->bytevector pointer (%stemmer-length
stemmer))))))))