On 2019-05-07 15:28, amirou...@hyper.dev wrote:
I am pleased to announce the immediate availability of guile-snowball-stemmer.

This is binding library that allows to compute the stem of words in various languages. The list of supported language is available in the following REPL
run.

This is a binding library. The official website is at https://snowballstem.org/

It is mostly useful in the context of information retrieval.

The code is at https://git.sr.ht/~amz3/guile-snowball-stemmer

The libstemmer shared library path is hardcoded as guix path of the library. A guix package definition of the C library is available in my guix channel at:

  https://git.sr.ht/~amz3/guix-amz3-channel

That said there is no guix package for the bindings. Just include the file
attached to this mail in you project.

Here is a demo:

scheme@(guile-user)> (import (snowball-stemmer))

scheme@(guile-user)> (stemmers)
$1 = ("turkish" "swedish" "spanish" "russian" "romanian" "portuguese"
"porter" "norwegian" "italian" "hungarian" "german" "french" "finnish"
"english" "dutch" "danish")

scheme@(guile-user)> (make-stemmer "amazigh")
ERROR: In procedure scm-error:
ERROR: snowball-stemmer "Oops! Stemmer not found" "amazigh"

scheme@(guile-user)> (define english (make-stemmer "english"))
scheme@(guile-user)> (stem english "cycling")
$2 = "cycl"
scheme@(guile-user)> (stem english "ecology")
$3 = "ecolog"
scheme@(guile-user)> (stem english "library")
$4 = "librari"
scheme@(guile-user)> (stem english "virtual")
$5 = "virtual"
scheme@(guile-user)> (stem english "environment")
$6 = "environ"

scheme@(guile-user)> (define french (make-stemmer "french"))
scheme@(guile-user)> (stem french "environnement")
$7 = "environ"
scheme@(guile-user)> (stem french "bibliotheque")
$8 = "bibliothequ"
scheme@(guile-user)> (stem french "gazette")
$9 = "gazet"
scheme@(guile-user)> (stem french "constituant")
$10 = "constitu"


Small update, I forgot to actually guard the stemmer.

Here is the patch:

diff --git a/snowball-stemmer.scm b/snowball-stemmer.scm
index b754808..603a97e 100644
--- a/snowball-stemmer.scm
+++ b/snowball-stemmer.scm
@@ -67,6 +67,7 @@
       (let ((out (proc (string->pointer algorithm) NULL)))
         (when(eq? out NULL)
(error 'snowball-stemmer "Oops! Stemmer not found" algorithm))
+        (stemmers-guardian out)
         out))))

 (define (reap-stemmers)

You will find attached to this mail the fixed version.
;; guile-snowball-stemmer
;; Copyright (C) 2019 Amirouche Boubekki <amirou...@hyper.dev>

;; This library is free software; you can redistribute it and/or
;; modify it under the terms of the GNU Lesser General Public
;; License as published by the Free Software Foundation; either
;; version 3 of the License, or (at your option) any later version.

;; This library is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; Lesser General Public License for more details.

;; You should have received a copy of the GNU Lesser General Public License
;; along with this library; if not, write to the Free Software Foundation,
;; Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
(define-module (snowball-stemmer))

(import (system foreign))
(import (only (rnrs bytevectors)
              bytevector-length
              string->utf8
              utf8->string))

(export stemmers make-stemmer stem)

;;; ffi helpers

(define NULL %null-pointer)
(define POINTER '*)

;; XXX: only use that procedure in your project if you don't need to
;; access static variables
(define (dynamic-link* library-name)
  (let ((shared-object (dynamic-link library-name)))
    (lambda (return-value function-name . arguments)
      (let ((function (dynamic-func function-name shared-object)))
        (pointer->procedure return-value function arguments)))))

;; bindings

(define snowball-stemmer
  (dynamic-link* 
"/gnu/store/rzvlish3vsidfmvv74f74s2854wn8yii-stemmer-0.0.0/lib/libstemmer.so"))

(define stemmers
  (let ((proc (snowball-stemmer POINTER "sb_stemmer_list")))
    (lambda ()
      (let ((array (pointer-address (proc))))
        (let loop ((out '())
                   (index 0))
          (let ((pointer (dereference-pointer (make-pointer (+ array (* 8 
index))))))
            (if (eq? pointer NULL)
                out
                (loop (cons (pointer->string pointer) out)
                      (+ index 1)))))))))

(define %stemmer-delete
  (let ((proc (snowball-stemmer void "sb_stemmer_delete" POINTER)))
    (lambda (stemmer)
      (proc stemmer))))

(define stemmers-guardian (make-guardian))

(define make-stemmer
  (let ((proc (snowball-stemmer POINTER "sb_stemmer_new" POINTER POINTER)))
    (lambda (algorithm)
      (let ((out (proc (string->pointer algorithm) NULL)))
        (when (eq? out NULL)
          (error 'snowball-stemmer "Oops! Stemmer not found" algorithm))
        (stemmers-guardian out)
        out))))

(define (reap-stemmers)
  (let loop ()
    (let ((stemmer (stemmers-guardian)))
      (when stemmer
        (%stemmer-delete stemmer)
        (loop)))))

(add-hook! after-gc-hook reap-stemmers)

(define %stemmer-length
  (let ((proc (snowball-stemmer int "sb_stemmer_length" POINTER)))
    (lambda (stemmer)
      (proc stemmer))))

(define stem
  (let ((proc (snowball-stemmer POINTER "sb_stemmer_stem" POINTER POINTER int)))
    (lambda (stemmer word)
      (let ((bv (string->utf8 word)))
        (let ((pointer (proc stemmer (bytevector->pointer bv) 
(bytevector-length bv))))
          (utf8->string (pointer->bytevector pointer (%stemmer-length 
stemmer))))))))

Reply via email to