Changeset: fabfd34343c3 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=fabfd34343c3
Added Files:
        gdk/gdk_strimps.c
        gdk/gdk_strimps.h
Modified Files:
        gdk/CMakeLists.txt
        gdk/gdk.h
        monetdb5/modules/mal/01_calc.mal
        monetdb5/modules/mal/batcalc.c
Branch: string_imprints
Log Message:

Count the total number of digrams in a string bat


diffs (190 lines):

diff --git a/gdk/CMakeLists.txt b/gdk/CMakeLists.txt
--- a/gdk/CMakeLists.txt
+++ b/gdk/CMakeLists.txt
@@ -78,6 +78,7 @@ target_sources(bat
   gdk_analytic_func.c
   gdk_analytic.h
   gdk_tracer.c gdk_tracer.h
+  gdk_strimps.c gdk_strimps.h
   PUBLIC
   ${gdk_public_headers})
 
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2113,4 +2113,9 @@ gdk_export BAT *BATsample_with_seed(BAT 
  */
 #define MAXPARAMS      32
 
+/*
+ * String Imprints Development/Testing. TODO: remove the following.
+ */
+
+#include "gdk_strimps.h"
 #endif /* _GDK_H_ */
diff --git a/gdk/gdk_strimps.c b/gdk/gdk_strimps.c
new file mode 100644
--- /dev/null
+++ b/gdk/gdk_strimps.c
@@ -0,0 +1,55 @@
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0.  If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
+ */
+
+#include "monetdb_config.h"
+#include "gdk.h"
+#include "gdk_private.h"
+
+/* This counts how many unicode codepoints the given string
+ * contains.
+ */
+static size_t
+GDKstrimp_strlen(const char *s)
+{
+       return strlen(s);
+}
+
+/* Given a BAT return the number of digrams in it. The observation is
+ * that the number of digrams is the number of characters - 1:
+ *
+ * 1 digram starting at character 1
+ * 1 digram starting at character 2
+ * [...]
+ * 1 digram starting at character n - 1
+ */
+gdk_return
+GDKstrimp_ndigrams(BAT *b, size_t *n)
+{
+       // lng t0;
+       BUN i;
+       BATiter bi;
+       char *s;
+       // GDKtracer_set_component_level("ALGO", "DEBUG");
+       // struct canditer ci;
+
+       // t0 = GDKusec();
+       // BATcheck(b, NULL);
+       assert(b->ttype == TYPE_str);
+
+       bi = bat_iterator(b);
+       *n = 0;
+       for (i = 0; i < b->batCount; i++) {
+               s = (char *)BUNtail(bi, i);
+                *n += GDKstrimp_strlen(s) - 1;
+               // TRC_DEBUG(ALGO, "s["LLFMT"]=%s\n", i, (char *)BUNtail(bi, 
i));
+       }
+
+       // TRC_DEBUG(ALGO, LLFMT "usec\n", GDKusec() - t0);
+
+       return GDK_SUCCEED;
+}
diff --git a/gdk/gdk_strimps.h b/gdk/gdk_strimps.h
new file mode 100644
--- /dev/null
+++ b/gdk/gdk_strimps.h
@@ -0,0 +1,27 @@
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0.  If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
+ */
+
+#ifndef _GDK_STRIMPS_H_
+#define _GDK_STRIMPS_H_
+
+#include <stdint.h>
+
+#define HISTSIZE 64
+
+typedef struct {
+       uint64_t counts[HISTSIZE];
+       char foo;
+} Histogram;
+
+typedef struct {
+       Histogram* hist;
+} Strimp;
+
+gdk_export gdk_return GDKstrimp_ndigrams(BAT *b, size_t *n);
+
+#endif /* _GDK_STRIMPS_H_ */
diff --git a/monetdb5/modules/mal/01_calc.mal b/monetdb5/modules/mal/01_calc.mal
--- a/monetdb5/modules/mal/01_calc.mal
+++ b/monetdb5/modules/mal/01_calc.mal
@@ -5593,3 +5593,9 @@ comment "Calculate aggregate string conc
 pattern 
str_group_concat(b:bat[:str],sep:bat[:str],s:bat[:oid],nil_if_empty:bit) :str
 address CMDBATstr_group_concat
 comment "Calculate aggregate string concatenate of B with candidate list and 
separator SEP.";
+
+
+# foo
+pattern str_iterate_bat(b:bat[:str]) :void;
+address CMDstr_iterate_bat
+comment "iterate through a bat";
diff --git a/monetdb5/modules/mal/batcalc.c b/monetdb5/modules/mal/batcalc.c
--- a/monetdb5/modules/mal/batcalc.c
+++ b/monetdb5/modules/mal/batcalc.c
@@ -1368,6 +1368,39 @@ CMDifthen(Client cntxt, MalBlkPtr mb, Ma
        return MAL_SUCCEED;
 }
 
+
+/*
+ * String imprints dev/testing. TODO: remove.
+ */
+static str
+CMDstrimp_ndigrams(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       bat bid;
+       BAT *b;
+       size_t n;
+
+       (void)cntxt;
+       (void)mb;
+
+       // return mythrow(MAL, "batcalc.striter", OPERATION_FAILED);
+       bid = *getArgReference_bat(stk, pci, 1);
+       if ((b = BATdescriptor(bid)) == NULL)
+               throw(MAL, "batcalc.ndigrams", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
+
+       if (!GDKstrimp_ndigrams(b, &n)) {
+               throw(MAL, "batcalc.ndigrams", SQLSTATE(HY002) 
OPERATION_FAILED);
+       }
+
+       *getArgReference_lng(stk, pci, 0) = n;
+
+       return MAL_SUCCEED;
+}
+
+
+/*
+ * String imprints dev/testing. TODO: end remove.
+ */
+
 #include "mel.h"
 
 static str
@@ -2187,7 +2220,17 @@ static mel_func batcalc_init_funcs[] = {
  pattern("batcalc", "ifthenelse", CMDifthen, false, "If-then-else operation to 
assemble a conditional result", args(1,4, 
batargany("",1),batarg("b",bit),batargany("b1",1),argany("v2",1))),
  pattern("batcalc", "ifthenelse", CMDifthen, false, "If-then-else operation to 
assemble a conditional result", args(1,4, 
batargany("",1),batarg("b",bit),argany("v1",1),batargany("b2",1))),
  pattern("batcalc", "ifthenelse", CMDifthen, false, "If-then-else operation to 
assemble a conditional result", args(1,4, 
batargany("",1),batarg("b",bit),batargany("b1",1),batargany("b2",1))),
+
+ /*
+  * String imprints dev/testing. TODO: remove.
+  */
+ pattern("batcalc", "count_digrams", CMDstrimp_ndigrams, false, "count digrams 
in a string bat", args(1, 2, arg("",lng), batarg("b", str))),
+
+ /*
+  * String imprints dev/testing. TODO: end remove.
+  */
  { .imp=NULL }
+
 };
 #include "mal_import.h"
 #ifdef _MSC_VER
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to