glib/demo/text.c     |  182 +++++++++++++++++++++++++++++++++++++++++++++------
 glib/poppler-page.cc |   91 +++++++++++++++++++++++++
 glib/poppler-page.h  |    4 -
 3 files changed, 258 insertions(+), 19 deletions(-)

New commits:
commit 35e87d2062b1d82db0d765de5a6187122a0fa99c
Author: Carlos Garcia Campos <[email protected]>
Date:   Wed Jun 16 11:52:25 2010 +0200

    [gib-demo] Add demo for poppler_page_get_text_layout()

diff --git a/glib/demo/text.c b/glib/demo/text.c
index e119082..b7a5c91 100644
--- a/glib/demo/text.c
+++ b/glib/demo/text.c
@@ -20,11 +20,22 @@
 
 #include "text.h"
 
+enum {
+       TEXT_X1_COLUMN,
+       TEXT_Y1_COLUMN,
+       TEXT_X2_COLUMN,
+       TEXT_Y2_COLUMN,
+       TEXT_OFFSET_COLUMN,
+       TEXT_OFFPTR_COLUMN,
+       N_COLUMNS
+};
+
 typedef struct {
        PopplerDocument *doc;
 
        GtkWidget       *timer_label;
        GtkTextBuffer   *buffer;
+       GtkListStore    *model;
 
        gint             page;
 } PgdTextDemo;
@@ -45,6 +56,11 @@ pgd_text_free (PgdTextDemo *demo)
                demo->buffer = NULL;
        }
 
+       if (demo->model) {
+               g_object_unref (demo->model);
+               demo->model = NULL;
+       }
+
        g_free (demo);
 }
 
@@ -52,16 +68,21 @@ static void
 pgd_text_get_text (GtkWidget   *button,
                   PgdTextDemo *demo)
 {
-       PopplerPage     *page;
-       PopplerRectangle rect;
-       gdouble          width, height;
-       gchar           *text;
-       GTimer          *timer;
+       PopplerPage      *page;
+       PopplerRectangle  rect;
+       PopplerRectangle *recs = NULL;
+       guint             n_recs;
+       gdouble           width, height;
+       gchar            *text;
+       GTimer           *timer;
+       gint              i;
 
        page = poppler_document_get_page (demo->doc, demo->page);
        if (!page)
                return;
 
+       gtk_list_store_clear (demo->model);
+
        poppler_page_get_size (page, &width, &height);
        rect.x1 = rect.y1 = 0;
        rect.x2 = width;
@@ -72,10 +93,17 @@ pgd_text_get_text (GtkWidget   *button,
        g_timer_stop (timer);
 
        if (text) {
-               gchar *str;
+               gchar  *str;
+               gdouble text_elapsed;
 
-               str = g_strdup_printf ("<i>got text in %.4f seconds</i>",
-                                      g_timer_elapsed (timer, NULL));
+               text_elapsed = g_timer_elapsed (timer, NULL);
+
+               g_timer_start (timer);
+               poppler_page_get_text_layout (page, &recs, &n_recs);
+               g_timer_stop (timer);
+
+               str = g_strdup_printf ("<i>got text in %.4f seconds, text 
layout in %.4f seconds</i>",
+                                      text_elapsed, g_timer_elapsed (timer, 
NULL));
                gtk_label_set_markup (GTK_LABEL (demo->timer_label), str);
                g_free (str);
        } else {
@@ -89,8 +117,62 @@ pgd_text_get_text (GtkWidget   *button,
                gtk_text_buffer_set_text (demo->buffer, text, strlen (text));
                g_free (text);
        }
+
+       for (i = 0; i < n_recs; i++) {
+               GtkTreeIter iter;
+               gchar      *x1, *y1, *x2, *y2;
+               gchar      *offset;
+
+               x1 = g_strdup_printf ("%.2f", recs[i].x1);
+               y1 = g_strdup_printf ("%.2f", recs[i].y1);
+               x2 = g_strdup_printf ("%.2f", recs[i].x2);
+               y2 = g_strdup_printf ("%.2f", recs[i].y2);
+
+               offset = g_strdup_printf ("%d", i);
+
+               gtk_list_store_append (demo->model, &iter);
+               gtk_list_store_set (demo->model, &iter,
+                                   TEXT_X1_COLUMN, x1,
+                                   TEXT_Y1_COLUMN, y1,
+                                   TEXT_X2_COLUMN, x2,
+                                   TEXT_Y2_COLUMN, y2,
+                                   TEXT_OFFSET_COLUMN, offset,
+                                   TEXT_OFFPTR_COLUMN, GINT_TO_POINTER (i),
+                                   -1);
+
+               g_free (x1);
+               g_free (y1);
+               g_free (x2);
+               g_free (y2);
+               g_free (offset);
+       }
+
+       g_free (recs);
+}
+
+static void
+pgd_text_selection_changed (GtkTreeSelection *treeselection,
+                           PgdTextDemo      *demo)
+{
+       GtkTreeModel *model;
+       GtkTreeIter   iter;
+
+       if (gtk_tree_selection_get_selected (treeselection, &model, &iter)) {
+               gpointer    offset;
+               GtkTextIter begin_iter, end_iter;
+
+               gtk_tree_model_get (model, &iter,
+                                   TEXT_OFFPTR_COLUMN, &offset,
+                                   -1);
+
+               gtk_text_buffer_get_iter_at_offset (demo->buffer, &begin_iter, 
GPOINTER_TO_INT (offset));
+               end_iter = begin_iter;
+               gtk_text_iter_forward_char (&end_iter);
+               gtk_text_buffer_select_range (demo->buffer, &begin_iter, 
&end_iter);
+       }
 }
 
+
 static void
 pgd_text_page_selector_value_changed (GtkSpinButton *spinbutton,
                                      PgdTextDemo   *demo)
@@ -101,14 +183,17 @@ pgd_text_page_selector_value_changed (GtkSpinButton 
*spinbutton,
 GtkWidget *
 pgd_text_create_widget (PopplerDocument *document)
 {
-       PgdTextDemo *demo;
-       GtkWidget   *label;
-       GtkWidget   *vbox;
-       GtkWidget   *hbox, *page_selector;
-       GtkWidget   *button;
-       GtkWidget   *swindow, *textview;
-       gchar       *str;
-       gint         n_pages;
+       PgdTextDemo      *demo;
+       GtkWidget        *label;
+       GtkWidget        *vbox;
+       GtkWidget        *hbox, *page_selector;
+       GtkWidget        *button;
+       GtkWidget        *swindow, *textview, *treeview;
+       GtkTreeSelection *selection;
+       GtkWidget        *hpaned;
+       GtkCellRenderer  *renderer;
+       gchar            *str;
+       gint              n_pages;
 
        demo = g_new0 (PgdTextDemo, 1);
 
@@ -153,20 +238,81 @@ pgd_text_create_widget (PopplerDocument *document)
        gtk_box_pack_start (GTK_BOX (vbox), demo->timer_label, FALSE, TRUE, 0);
        gtk_widget_show (demo->timer_label);
 
+       hpaned = gtk_hpaned_new ();
+       gtk_paned_set_position (GTK_PANED (hpaned), 300);
+
+       swindow = gtk_scrolled_window_new (NULL, NULL);
+       gtk_scrolled_window_set_policy (GTK_SCROLLED_WINDOW (swindow),
+                                       GTK_POLICY_AUTOMATIC,
+                                       GTK_POLICY_AUTOMATIC);
+
+       demo->model = gtk_list_store_new (N_COLUMNS,
+                                         G_TYPE_STRING,
+                                         G_TYPE_STRING, G_TYPE_STRING,
+                                         G_TYPE_STRING, G_TYPE_STRING,
+                                         G_TYPE_POINTER);
+       treeview = gtk_tree_view_new_with_model (GTK_TREE_MODEL (demo->model));
+
+       renderer = gtk_cell_renderer_text_new ();
+       gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview),
+                                                    TEXT_X1_COLUMN, "X1",
+                                                    renderer,
+                                                    "text", TEXT_X1_COLUMN,
+                                                    NULL);
+       renderer = gtk_cell_renderer_text_new ();
+       gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview),
+                                                    TEXT_Y1_COLUMN, "Y1",
+                                                    renderer,
+                                                    "text", TEXT_Y1_COLUMN,
+                                                    NULL);
+       renderer = gtk_cell_renderer_text_new ();
+       gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview),
+                                                    TEXT_X2_COLUMN, "X2",
+                                                    renderer,
+                                                    "text", TEXT_X2_COLUMN,
+                                                    NULL);
+       renderer = gtk_cell_renderer_text_new ();
+       gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview),
+                                                    TEXT_Y2_COLUMN, "Y2",
+                                                    renderer,
+                                                    "text", TEXT_Y2_COLUMN,
+                                                    NULL);
+
+       renderer = gtk_cell_renderer_text_new ();
+       gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview),
+                                                    TEXT_OFFSET_COLUMN, 
"Offset",
+                                                    renderer,
+                                                    "text", TEXT_OFFSET_COLUMN,
+                                                    NULL);
+
+       selection = gtk_tree_view_get_selection (GTK_TREE_VIEW (treeview));
+       g_signal_connect (selection, "changed",
+                         G_CALLBACK (pgd_text_selection_changed),
+                         (gpointer) demo);
+
+       gtk_container_add (GTK_CONTAINER (swindow), treeview);
+       gtk_widget_show (treeview);
+
+       gtk_paned_add1 (GTK_PANED (hpaned), swindow);
+       gtk_widget_show (swindow);
+
        swindow = gtk_scrolled_window_new (NULL, NULL);
        gtk_scrolled_window_set_policy (GTK_SCROLLED_WINDOW (swindow),
                                        GTK_POLICY_AUTOMATIC,
                                        GTK_POLICY_AUTOMATIC);
-       
+
        demo->buffer = gtk_text_buffer_new (NULL);
        textview = gtk_text_view_new_with_buffer (demo->buffer);
 
        gtk_container_add (GTK_CONTAINER (swindow), textview);
        gtk_widget_show (textview);
 
-       gtk_box_pack_start (GTK_BOX (vbox), swindow, TRUE, TRUE, 0);
+       gtk_paned_add2 (GTK_PANED (hpaned), swindow);
        gtk_widget_show (swindow);
 
+       gtk_box_pack_start (GTK_BOX (vbox), hpaned, TRUE, TRUE, 0);
+       gtk_widget_show (hpaned);
+
        g_object_weak_ref (G_OBJECT (vbox),
                           (GWeakNotify)pgd_text_free,
                           demo);
commit ddcea568b3a7334e062d6214f43d0a2c2ec95be4
Author: Daniel Garcia <[email protected]>
Date:   Tue Jun 15 16:57:32 2010 +0200

    [glib] Add poppler_page_get_text_layout()
    
    Returns an array of PopplerRectangle items and each Rectangle is a
    text character position.
    
    The position in this array represent the offset in text returned by
    poppler_page_get_text

diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc
index 19ea941..01d5540 100644
--- a/glib/poppler-page.cc
+++ b/glib/poppler-page.cc
@@ -1736,3 +1736,94 @@ poppler_page_get_crop_box (PopplerPage *page, 
PopplerRectangle *rect)
   rect->y2 = cropBox->y2;
 }
 
+/**
+ * poppler_page_get_text_layout:
+ * @page: A #PopplerPage
+ * @rectangles: return location for an array of #PopplerRectangle
+ * @n_rectangles: length of returned array
+ *
+ * Obtains the layout of the text as a list of #PopplerRectangle
+ * This array must be freed with g_free () when done.
+ *
+ * The position in the array represents an offset in the text returned by
+ * poppler_page_get_text
+ *
+ * Return value: %TRUE if the page contains text, %FALSE otherwise
+ **/
+gboolean
+poppler_page_get_text_layout (PopplerPage       *page,
+                              PopplerRectangle **rectangles,
+                              guint             *n_rectangles)
+{
+  TextPage *text;
+  TextWordList *wordlist;
+  TextWord *word, *nextword;
+  PopplerRectangle *rect;
+  int i, j, offset = 0;
+  gdouble x1, y1, x2, y2;
+  gdouble x3, y3, x4, y4;
+
+  g_return_val_if_fail (POPPLER_IS_PAGE (page), FALSE);
+
+  *n_rectangles = 0;
+
+  text = poppler_page_get_text_page (page);
+  wordlist = text->makeWordList (gFalse);
+
+  if (wordlist->getLength () <= 0)
+    return FALSE;
+
+  // Getting the array size
+  for (i = 0; i < wordlist->getLength (); i++)
+    {
+      word = wordlist->get (i);
+      *n_rectangles += word->getLength () + 1;
+    }
+
+  *rectangles = g_new (PopplerRectangle, *n_rectangles);
+
+  // Calculating each char position
+  for (i = 0; i < wordlist->getLength (); i++)
+    {
+      word = wordlist->get (i);
+      for (j = 0; j < word->getLength (); j++)
+        {
+          rect = *rectangles + offset;
+         word->getCharBBox (j,
+                            &(rect->x1),
+                            &(rect->y1),
+                            &(rect->x2),
+                            &(rect->y2));
+         offset++;
+       }
+
+      // adding spaces and break lines
+      rect = *rectangles + offset;
+      word->getBBox (&x1, &y1, &x2, &y2);
+
+      nextword = word->getNext ();
+      if (nextword)
+        {
+         nextword->getBBox (&x3, &y3, &x4, &y4);
+         // space is from one word to other and with the same height as
+         // first word.
+         rect->x1 = x2;
+         rect->y1 = y1;
+         rect->x2 = x3;
+         rect->y2 = y2;
+       }
+      else
+        {
+         // end of line
+         rect->x1 = x2;
+         rect->y1 = y2;
+         rect->x2 = x2;
+         rect->y2 = y2;
+       }
+      offset++;
+    }
+
+  delete wordlist;
+
+  return TRUE;
+}
diff --git a/glib/poppler-page.h b/glib/poppler-page.h
index 20dc20f..3a31acd 100644
--- a/glib/poppler-page.h
+++ b/glib/poppler-page.h
@@ -114,7 +114,9 @@ GList                 *poppler_page_get_annot_mapping    
(PopplerPage        *pa
 void                   poppler_page_free_annot_mapping   (GList              
*list);
 void                 poppler_page_get_crop_box          (PopplerPage        
*page,
                                                          PopplerRectangle   
*rect);
-
+gboolean               poppler_page_get_text_layout      (PopplerPage        
*page,
+                                                          PopplerRectangle  
**rectangles,
+                                                          guint              
*n_rectangles);
 
 /* A rectangle on a page, with coordinates in PDF points. */
 #define POPPLER_TYPE_RECTANGLE             (poppler_rectangle_get_type ())
_______________________________________________
poppler mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/poppler

Reply via email to