glib/demo/text.c | 182 +++++++++++++++++++++++++++++++++++++++++++++------ glib/poppler-page.cc | 91 +++++++++++++++++++++++++ glib/poppler-page.h | 4 - 3 files changed, 258 insertions(+), 19 deletions(-)
New commits: commit 35e87d2062b1d82db0d765de5a6187122a0fa99c Author: Carlos Garcia Campos <[email protected]> Date: Wed Jun 16 11:52:25 2010 +0200 [gib-demo] Add demo for poppler_page_get_text_layout() diff --git a/glib/demo/text.c b/glib/demo/text.c index e119082..b7a5c91 100644 --- a/glib/demo/text.c +++ b/glib/demo/text.c @@ -20,11 +20,22 @@ #include "text.h" +enum { + TEXT_X1_COLUMN, + TEXT_Y1_COLUMN, + TEXT_X2_COLUMN, + TEXT_Y2_COLUMN, + TEXT_OFFSET_COLUMN, + TEXT_OFFPTR_COLUMN, + N_COLUMNS +}; + typedef struct { PopplerDocument *doc; GtkWidget *timer_label; GtkTextBuffer *buffer; + GtkListStore *model; gint page; } PgdTextDemo; @@ -45,6 +56,11 @@ pgd_text_free (PgdTextDemo *demo) demo->buffer = NULL; } + if (demo->model) { + g_object_unref (demo->model); + demo->model = NULL; + } + g_free (demo); } @@ -52,16 +68,21 @@ static void pgd_text_get_text (GtkWidget *button, PgdTextDemo *demo) { - PopplerPage *page; - PopplerRectangle rect; - gdouble width, height; - gchar *text; - GTimer *timer; + PopplerPage *page; + PopplerRectangle rect; + PopplerRectangle *recs = NULL; + guint n_recs; + gdouble width, height; + gchar *text; + GTimer *timer; + gint i; page = poppler_document_get_page (demo->doc, demo->page); if (!page) return; + gtk_list_store_clear (demo->model); + poppler_page_get_size (page, &width, &height); rect.x1 = rect.y1 = 0; rect.x2 = width; @@ -72,10 +93,17 @@ pgd_text_get_text (GtkWidget *button, g_timer_stop (timer); if (text) { - gchar *str; + gchar *str; + gdouble text_elapsed; - str = g_strdup_printf ("<i>got text in %.4f seconds</i>", - g_timer_elapsed (timer, NULL)); + text_elapsed = g_timer_elapsed (timer, NULL); + + g_timer_start (timer); + poppler_page_get_text_layout (page, &recs, &n_recs); + g_timer_stop (timer); + + str = g_strdup_printf ("<i>got text in %.4f seconds, text layout in %.4f seconds</i>", + text_elapsed, g_timer_elapsed (timer, NULL)); gtk_label_set_markup (GTK_LABEL (demo->timer_label), str); g_free (str); } else { @@ -89,8 +117,62 @@ pgd_text_get_text (GtkWidget *button, gtk_text_buffer_set_text (demo->buffer, text, strlen (text)); g_free (text); } + + for (i = 0; i < n_recs; i++) { + GtkTreeIter iter; + gchar *x1, *y1, *x2, *y2; + gchar *offset; + + x1 = g_strdup_printf ("%.2f", recs[i].x1); + y1 = g_strdup_printf ("%.2f", recs[i].y1); + x2 = g_strdup_printf ("%.2f", recs[i].x2); + y2 = g_strdup_printf ("%.2f", recs[i].y2); + + offset = g_strdup_printf ("%d", i); + + gtk_list_store_append (demo->model, &iter); + gtk_list_store_set (demo->model, &iter, + TEXT_X1_COLUMN, x1, + TEXT_Y1_COLUMN, y1, + TEXT_X2_COLUMN, x2, + TEXT_Y2_COLUMN, y2, + TEXT_OFFSET_COLUMN, offset, + TEXT_OFFPTR_COLUMN, GINT_TO_POINTER (i), + -1); + + g_free (x1); + g_free (y1); + g_free (x2); + g_free (y2); + g_free (offset); + } + + g_free (recs); +} + +static void +pgd_text_selection_changed (GtkTreeSelection *treeselection, + PgdTextDemo *demo) +{ + GtkTreeModel *model; + GtkTreeIter iter; + + if (gtk_tree_selection_get_selected (treeselection, &model, &iter)) { + gpointer offset; + GtkTextIter begin_iter, end_iter; + + gtk_tree_model_get (model, &iter, + TEXT_OFFPTR_COLUMN, &offset, + -1); + + gtk_text_buffer_get_iter_at_offset (demo->buffer, &begin_iter, GPOINTER_TO_INT (offset)); + end_iter = begin_iter; + gtk_text_iter_forward_char (&end_iter); + gtk_text_buffer_select_range (demo->buffer, &begin_iter, &end_iter); + } } + static void pgd_text_page_selector_value_changed (GtkSpinButton *spinbutton, PgdTextDemo *demo) @@ -101,14 +183,17 @@ pgd_text_page_selector_value_changed (GtkSpinButton *spinbutton, GtkWidget * pgd_text_create_widget (PopplerDocument *document) { - PgdTextDemo *demo; - GtkWidget *label; - GtkWidget *vbox; - GtkWidget *hbox, *page_selector; - GtkWidget *button; - GtkWidget *swindow, *textview; - gchar *str; - gint n_pages; + PgdTextDemo *demo; + GtkWidget *label; + GtkWidget *vbox; + GtkWidget *hbox, *page_selector; + GtkWidget *button; + GtkWidget *swindow, *textview, *treeview; + GtkTreeSelection *selection; + GtkWidget *hpaned; + GtkCellRenderer *renderer; + gchar *str; + gint n_pages; demo = g_new0 (PgdTextDemo, 1); @@ -153,20 +238,81 @@ pgd_text_create_widget (PopplerDocument *document) gtk_box_pack_start (GTK_BOX (vbox), demo->timer_label, FALSE, TRUE, 0); gtk_widget_show (demo->timer_label); + hpaned = gtk_hpaned_new (); + gtk_paned_set_position (GTK_PANED (hpaned), 300); + + swindow = gtk_scrolled_window_new (NULL, NULL); + gtk_scrolled_window_set_policy (GTK_SCROLLED_WINDOW (swindow), + GTK_POLICY_AUTOMATIC, + GTK_POLICY_AUTOMATIC); + + demo->model = gtk_list_store_new (N_COLUMNS, + G_TYPE_STRING, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_POINTER); + treeview = gtk_tree_view_new_with_model (GTK_TREE_MODEL (demo->model)); + + renderer = gtk_cell_renderer_text_new (); + gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview), + TEXT_X1_COLUMN, "X1", + renderer, + "text", TEXT_X1_COLUMN, + NULL); + renderer = gtk_cell_renderer_text_new (); + gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview), + TEXT_Y1_COLUMN, "Y1", + renderer, + "text", TEXT_Y1_COLUMN, + NULL); + renderer = gtk_cell_renderer_text_new (); + gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview), + TEXT_X2_COLUMN, "X2", + renderer, + "text", TEXT_X2_COLUMN, + NULL); + renderer = gtk_cell_renderer_text_new (); + gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview), + TEXT_Y2_COLUMN, "Y2", + renderer, + "text", TEXT_Y2_COLUMN, + NULL); + + renderer = gtk_cell_renderer_text_new (); + gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview), + TEXT_OFFSET_COLUMN, "Offset", + renderer, + "text", TEXT_OFFSET_COLUMN, + NULL); + + selection = gtk_tree_view_get_selection (GTK_TREE_VIEW (treeview)); + g_signal_connect (selection, "changed", + G_CALLBACK (pgd_text_selection_changed), + (gpointer) demo); + + gtk_container_add (GTK_CONTAINER (swindow), treeview); + gtk_widget_show (treeview); + + gtk_paned_add1 (GTK_PANED (hpaned), swindow); + gtk_widget_show (swindow); + swindow = gtk_scrolled_window_new (NULL, NULL); gtk_scrolled_window_set_policy (GTK_SCROLLED_WINDOW (swindow), GTK_POLICY_AUTOMATIC, GTK_POLICY_AUTOMATIC); - + demo->buffer = gtk_text_buffer_new (NULL); textview = gtk_text_view_new_with_buffer (demo->buffer); gtk_container_add (GTK_CONTAINER (swindow), textview); gtk_widget_show (textview); - gtk_box_pack_start (GTK_BOX (vbox), swindow, TRUE, TRUE, 0); + gtk_paned_add2 (GTK_PANED (hpaned), swindow); gtk_widget_show (swindow); + gtk_box_pack_start (GTK_BOX (vbox), hpaned, TRUE, TRUE, 0); + gtk_widget_show (hpaned); + g_object_weak_ref (G_OBJECT (vbox), (GWeakNotify)pgd_text_free, demo); commit ddcea568b3a7334e062d6214f43d0a2c2ec95be4 Author: Daniel Garcia <[email protected]> Date: Tue Jun 15 16:57:32 2010 +0200 [glib] Add poppler_page_get_text_layout() Returns an array of PopplerRectangle items and each Rectangle is a text character position. The position in this array represent the offset in text returned by poppler_page_get_text diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc index 19ea941..01d5540 100644 --- a/glib/poppler-page.cc +++ b/glib/poppler-page.cc @@ -1736,3 +1736,94 @@ poppler_page_get_crop_box (PopplerPage *page, PopplerRectangle *rect) rect->y2 = cropBox->y2; } +/** + * poppler_page_get_text_layout: + * @page: A #PopplerPage + * @rectangles: return location for an array of #PopplerRectangle + * @n_rectangles: length of returned array + * + * Obtains the layout of the text as a list of #PopplerRectangle + * This array must be freed with g_free () when done. + * + * The position in the array represents an offset in the text returned by + * poppler_page_get_text + * + * Return value: %TRUE if the page contains text, %FALSE otherwise + **/ +gboolean +poppler_page_get_text_layout (PopplerPage *page, + PopplerRectangle **rectangles, + guint *n_rectangles) +{ + TextPage *text; + TextWordList *wordlist; + TextWord *word, *nextword; + PopplerRectangle *rect; + int i, j, offset = 0; + gdouble x1, y1, x2, y2; + gdouble x3, y3, x4, y4; + + g_return_val_if_fail (POPPLER_IS_PAGE (page), FALSE); + + *n_rectangles = 0; + + text = poppler_page_get_text_page (page); + wordlist = text->makeWordList (gFalse); + + if (wordlist->getLength () <= 0) + return FALSE; + + // Getting the array size + for (i = 0; i < wordlist->getLength (); i++) + { + word = wordlist->get (i); + *n_rectangles += word->getLength () + 1; + } + + *rectangles = g_new (PopplerRectangle, *n_rectangles); + + // Calculating each char position + for (i = 0; i < wordlist->getLength (); i++) + { + word = wordlist->get (i); + for (j = 0; j < word->getLength (); j++) + { + rect = *rectangles + offset; + word->getCharBBox (j, + &(rect->x1), + &(rect->y1), + &(rect->x2), + &(rect->y2)); + offset++; + } + + // adding spaces and break lines + rect = *rectangles + offset; + word->getBBox (&x1, &y1, &x2, &y2); + + nextword = word->getNext (); + if (nextword) + { + nextword->getBBox (&x3, &y3, &x4, &y4); + // space is from one word to other and with the same height as + // first word. + rect->x1 = x2; + rect->y1 = y1; + rect->x2 = x3; + rect->y2 = y2; + } + else + { + // end of line + rect->x1 = x2; + rect->y1 = y2; + rect->x2 = x2; + rect->y2 = y2; + } + offset++; + } + + delete wordlist; + + return TRUE; +} diff --git a/glib/poppler-page.h b/glib/poppler-page.h index 20dc20f..3a31acd 100644 --- a/glib/poppler-page.h +++ b/glib/poppler-page.h @@ -114,7 +114,9 @@ GList *poppler_page_get_annot_mapping (PopplerPage *pa void poppler_page_free_annot_mapping (GList *list); void poppler_page_get_crop_box (PopplerPage *page, PopplerRectangle *rect); - +gboolean poppler_page_get_text_layout (PopplerPage *page, + PopplerRectangle **rectangles, + guint *n_rectangles); /* A rectangle on a page, with coordinates in PDF points. */ #define POPPLER_TYPE_RECTANGLE (poppler_rectangle_get_type ()) _______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
