Milimetric has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/382733 )

Change subject: Correct bugs in mediawiki_history scripts
......................................................................


Correct bugs in mediawiki_history scripts

A bug was introduced in mediawiki-history creation
schema, and another one in mediawiki-history druid
loading job.
This patch corrects both.

Change-Id: I4db0a630b3a0e7dc13c4f18158ef1b0210b447a0
---
M hive/mediawiki/history/create_mediawiki_history_table.hql
M oozie/mediawiki/history/druid/generate_json_mediawiki_history.hql
2 files changed, 17 insertions(+), 7 deletions(-)

Approvals:
  Milimetric: Verified; Looks good to me, approved



diff --git a/hive/mediawiki/history/create_mediawiki_history_table.hql 
b/hive/mediawiki/history/create_mediawiki_history_table.hql
index 74e9e5d..f707b26 100644
--- a/hive/mediawiki/history/create_mediawiki_history_table.hql
+++ b/hive/mediawiki/history/create_mediawiki_history_table.hql
@@ -1,5 +1,10 @@
 -- Creates table statement for mediawiki_history table.
 --
+-- WARNING: Timestamp fields are commented in that files
+-- because our version of hive doesn't support them.
+-- Waiting for us to upgrade to hive 1.2 or higher
+-- to update the fields.
+--
 -- Parameters:
 --     <none>
 --
@@ -12,7 +17,8 @@
   `wiki_db`                                       string        COMMENT 
'enwiki, dewiki, eswiktionary, etc.',
   `event_entity`                                  string        COMMENT 
'revision, user or page',
   `event_type`                                    string        COMMENT 
'create, move, delete, etc.  Detailed explanation in the docs under 
#Event_types',
-  `event_timestamp`                               timestamp     COMMENT 'When 
this event ocurred',
+  `event_timestamp`                               string        COMMENT 'When 
this event ocurred',
+  --`event_timestamp`                               timestamp     COMMENT 
'When this event ocurred',
   `event_comment`                                 string        COMMENT 
'Comment related to this event, sourced from log_comment, rev_comment, etc.',
   `event_user_id`                                 bigint        COMMENT 'Id of 
the user that caused the event',
   `event_user_text`                               string        COMMENT 
'Historical text of the user that caused the event',
@@ -26,7 +32,8 @@
   `event_user_is_created_by_peer`                 boolean       COMMENT 
'Whether the event_user account was created by another user',
   `event_user_is_anonymous`                       boolean       COMMENT 
'Whether the event_user is not registered',
   `event_user_is_bot_by_name`                     boolean       COMMENT 
'Whether the event_user\'s name matches patterns we use to identify bots',
-  `event_user_creation_timestamp`                 timestamp     COMMENT 
'Registration timestamp of the user that caused the event',
+  --`event_user_creation_timestamp`                 timestamp     COMMENT 
'Registration timestamp of the user that caused the event',
+  `event_user_creation_timestamp`                 string        COMMENT 
'Registration timestamp of the user that caused the event',
   `event_user_revision_count`                     bigint        COMMENT 
'Cumulative revision count per user for the current event_user_id (only 
available in revision-create events so far)',
   `event_user_seconds_since_previous_revision`    bigint        COMMENT 'In 
revision events: seconds elapsed since the previous revision made by the 
current event_user_id (only available in revision-create events so far)',
 
@@ -38,7 +45,8 @@
   `page_namespace_latest`                         int           COMMENT 'In 
revision/page events: current namespace of the page',
   `page_namespace_is_content_latest`              boolean       COMMENT 'In 
revision/page events: current namespace of the page is categorized as content',
   `page_is_redirect_latest`                       boolean       COMMENT 'In 
revision/page events: whether the page is currently a redirect',
-  `page_creation_timestamp`                       timestamp     COMMENT 'In 
revision/page events: creation timestamp of the page',
+  --`page_creation_timestamp`                       timestamp     COMMENT 'In 
revision/page events: creation timestamp of the page',
+  `page_creation_timestamp`                       string        COMMENT 'In 
revision/page events: creation timestamp of the page',
   `page_revision_count`                           bigint        COMMENT 'In 
revision/page events: Cumulative revision count per page for the current 
page_id (only available in revision-create events so far)',
   `page_seconds_since_previous_revision`          bigint        COMMENT 'In 
revision/page events: seconds elapsed since the previous revision made on the 
current page_id (only available in revision-create events so far)',
 
@@ -54,7 +62,8 @@
   `user_is_created_by_peer`                       boolean       COMMENT 'In 
user events: whether the user account was created by another user',
   `user_is_anonymous`                             boolean       COMMENT 'In 
user events: whether the user is not registered',
   `user_is_bot_by_name`                           boolean       COMMENT 'In 
user events: whether the user\'s name matches patterns we use to identify bots',
-  `user_creation_timestamp`                       timestamp     COMMENT 'In 
user events: registration timestamp of the user.',
+  --`user_creation_timestamp`                       timestamp     COMMENT 'In 
user events: registration timestamp of the user.',
+  `user_creation_timestamp`                       string        COMMENT 'In 
user events: registration timestamp of the user.',
 
   `revision_id`                                   bigint        COMMENT 'In 
revision events: id of the revision',
   `revision_parent_id`                            bigint        COMMENT 'In 
revision events: id of the parent revision',
@@ -65,7 +74,8 @@
   `revision_content_model`                        string        COMMENT 'In 
revision events: content model of revision',
   `revision_content_format`                       string        COMMENT 'In 
revision events: content format of revision',
   `revision_is_deleted`                           boolean       COMMENT 'In 
revision events: whether this revision has been deleted (moved to archive 
table)',
-  `revision_deleted_timestamp`                    timestamp     COMMENT 'In 
revision events: the timestamp when the revision was deleted',
+  --`revision_deleted_timestamp`                    timestamp     COMMENT 'In 
revision events: the timestamp when the revision was deleted',
+  `revision_deleted_timestamp`                    string        COMMENT 'In 
revision events: the timestamp when the revision was deleted',
   `revision_is_identity_reverted`                 boolean       COMMENT 'In 
revision events: whether this revision was reverted by another future revision',
   `revision_first_identity_reverting_revision_id` bigint        COMMENT 'In 
revision events: id of the revision that reverted this revision',
   `revision_seconds_to_identity_revert`           bigint        COMMENT 'In 
revision events: seconds elapsed between revision posting and its revert (if 
there was one)',
diff --git a/oozie/mediawiki/history/druid/generate_json_mediawiki_history.hql 
b/oozie/mediawiki/history/druid/generate_json_mediawiki_history.hql
index fc32a1a..d28b3a3 100644
--- a/oozie/mediawiki/history/druid/generate_json_mediawiki_history.hql
+++ b/oozie/mediawiki/history/druid/generate_json_mediawiki_history.hql
@@ -116,7 +116,7 @@
     CASE WHEN event_user_is_bot_by_name THEN 1 ELSE 0 END AS 
event_user_is_bot_by_name,
     event_user_creation_timestamp,
     event_user_revision_count,
-    event_user_seconds_to_previous_revision,
+    event_user_seconds_since_previous_revision,
 
     page_id,
     page_title,
@@ -128,7 +128,7 @@
     CASE WHEN page_is_redirect_latest THEN 1 ELSE 0 END AS 
page_is_redirect_latest,
     page_creation_timestamp,
     page_revision_count,
-    page_seconds_to_previous_revision,
+    page_seconds_since_previous_revision,
 
     user_id,
     user_text,

-- 
To view, visit https://gerrit.wikimedia.org/r/382733
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I4db0a630b3a0e7dc13c4f18158ef1b0210b447a0
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: Joal <j...@wikimedia.org>
Gerrit-Reviewer: Milimetric <dandree...@wikimedia.org>
Gerrit-Reviewer: Ottomata <ao...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to