Ejegg has uploaded a new change for review.
https://gerrit.wikimedia.org/r/259404
Change subject: Create and populate donatewiki_unique
......................................................................
Create and populate donatewiki_unique
Little bit repetitive, but should be safe.
Next: keep the counts table up to date.
Bug: T114010
Change-Id: I2122cd0010f1f3a519b9bbb8632a8048356acfe2
---
M fundraiser/analytics/management/commands/LoadLPImpressions.py
A sql/001_create_donatewiki_unique.sql
M sql/create.sql
3 files changed, 98 insertions(+), 10 deletions(-)
git pull
ssh://gerrit.wikimedia.org:29418/wikimedia/fundraising/tools/DjangoBannerStats
refs/changes/04/259404/1
diff --git a/fundraiser/analytics/management/commands/LoadLPImpressions.py
b/fundraiser/analytics/management/commands/LoadLPImpressions.py
index 0444561..1a18da2 100644
--- a/fundraiser/analytics/management/commands/LoadLPImpressions.py
+++ b/fundraiser/analytics/management/commands/LoadLPImpressions.py
@@ -54,8 +54,10 @@
help = 'Parses the specified squid log file and stores the impression in
the database.'
impression_sql = "INSERT INTO `landingpageimpression_raw%s` (timestamp,
squid_id, squid_sequence, utm_source, utm_campaign, utm_key, utm_medium,
landingpage, project_id, language_id, country_id) VALUES %s"
+ unique_sql = "INSERT INTO `donatewiki_unique` (timestamp, utm_source,
utm_campaign, contact_id, link_id) VALUES %s"
pending_impressions = []
+ pending_uniques = []
debug_info = []
debug_count = 0
@@ -244,6 +246,8 @@
utm_campaign = ""
utm_medium = ""
utm_key = ""
+ contact_id = ""
+ link_id = ""
if "utm_source" in qs:
utm_source = qs["utm_source"][0].replace("%", "%%")
@@ -253,6 +257,10 @@
utm_medium = qs["utm_medium"][0].replace("%", "%%")
if "utm_key" in qs:
utm_key = qs["utm_key"][0].replace("%", "%%")
+ if "contact_id" in qs:
+ contact_id = qs["contact_id"][0].replace("%", "%%")
+ if "link_id" in qs:
+ link_id = qs["link_id"][0].replace("%", "%%")
landingpage = ""
language = None
@@ -323,15 +331,18 @@
# truncate to db max lengths
utm_campaign = utm_campaign[:255]
utm_source = utm_source[:255]
+ contact_id = contact_id[:31]
+ link_id = link_id[:127]
squid = lookup_squidhost(hostname=m.group("squid"),
verbose=self.verbose)
seq = 0 # Can't do int(m.group("sequence")) because
its > 2^32 and I don't want to run an alter
timestamp = dateparse(m.group("timestamp"))
+ sql_time =
MySQLdb.escape_string(str(timestamp.strftime("%Y-%m-%d %H:%M:%S")))
# not using the models here saves a lot of wall time
try:
lp_tmp = "('%s',%s, %s, '%s', '%s', '%s', '%s',
'%s', %s, %s, %s)" % (
-
MySQLdb.escape_string(str(timestamp.strftime("%Y-%m-%d %H:%M:%S"))),
+ sql_time,
MySQLdb.escape_string(str(squid.id)),
MySQLdb.escape_string(str(seq)),
MySQLdb.escape_string(utm_source),
@@ -354,15 +365,44 @@
finally:
lp_tmp = ""
+ if url.hostname == 'donate.wikimedia.org' and
contact_id != '':
+ try:
+ uq_tmp = "('%s', '%s', '%s', '%s', '%s')" % (
+ sql_time,
+ MySQLdb.escape_string(utm_source),
+ MySQLdb.escape_string(utm_campaign),
+ MySQLdb.escape_string(contact_id),
+ MySQLdb.escape_string(link_id),
+ )
+ uq_tmp = uq_tmp.replace("%", "")
+ self.pending_uniques.append(uq_tmp)
+
+ except Exception:
+ results["impression"]["error"] += 1
+ self.logger.exception("** UNHANDLED EXCEPTION
WHILE PROCESSING DONATEWIKI HIT **")
+
self.logger.error("********************\n%s\n********************" % l)
+
+ finally:
+ uq_tmp = ""
+
# write the models in batch
if len(self.pending_impressions) % batch_size == 0:
try:
if not self.debug:
- self.write(self.pending_impressions)
+ self.write(self.impression_sql,
self.pending_impressions)
except Exception:
self.logger.exception("Error writing
impressions to the database")
finally:
self.pending_impressions = []
+
+ if len(self.pending_uniques) % batch_size == 0:
+ try:
+ if not self.debug:
+ self.write(self.unique_sql,
self.pending_uniques)
+ except Exception:
+ self.logger.exception("Error writing
donatewiki uniques to the database")
+ finally:
+ self.pending_uniques = []
except Exception as e:
results["impression"]["error"] += 1
@@ -372,8 +412,10 @@
try:
# write out any remaining records
if not self.debug:
- self.write(self.pending_impressions)
+ self.write(self.impression_sql, self.pending_impressions)
+ self.write(self.unique_sql, self.pending_uniques)
self.pending_impressions = []
+ self.pending_uniques = []
except Exception as e:
self.logger.exception("** UNHANDLED EXCEPTION WHILE PROCESSING
LANDING PAGE IMPRESSION **")
@@ -389,22 +431,22 @@
return results
@transaction.commit_manually
- def write(self, impressions):
+ def write(self, base_sql, impressions):
"""
Commits a batch of transactions. Attempts a single query per model by
splitting the
tuples of each banner impression and grouping by model. If that
fails, the function
falls back to a single transaction per banner impression
"""
- cursor = connections['default'].cursor()
-
i_len = len(impressions)
if not i_len:
return
+ cursor = connections['default'].cursor()
+
try:
# attempt to create all in batches
- cursor.execute(self.impression_sql % ', '.join(impressions))
+ cursor.execute(base_sql % ', '.join(impressions))
transaction.commit('default')
except IntegrityError as e:
@@ -415,7 +457,7 @@
return
for i in impressions:
- self.write([i])
+ self.write(base_sql, [i])
except Exception as e:
transaction.rollback()
@@ -432,4 +474,4 @@
return
for i in impressions:
- self.write([i])
+ self.write(base_sql, [i])
diff --git a/sql/001_create_donatewiki_unique.sql
b/sql/001_create_donatewiki_unique.sql
new file mode 100644
index 0000000..a11bd43
--- /dev/null
+++ b/sql/001_create_donatewiki_unique.sql
@@ -0,0 +1,23 @@
+CREATE TABLE IF NOT EXISTS `donatewiki_unique` (
+ id INT(11) UNSIGNED AUTO_INCREMENT,
+ timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
+ utm_source VARCHAR(255) CHARACTER SET utf8 DEFAULT '' NOT NULL,
+ utm_campaign VARCHAR(255) CHARACTER SET utf8 DEFAULT '' NOT NULL,
+ contact_id VARCHAR(32) CHARACTER SET utf8 DEFAULT '' NOT NULL,
+ link_id VARCHAR(128) CHARACTER SET utf8 DEFAULT '' NOT NULL,
+
+ PRIMARY KEY (id),
+ UNIQUE KEY (utm_source, utm_campaign, contact_id)
+);
+
+CREATE TABLE IF NOT EXISTS `donatewiki_counts` (
+ id INT(11) UNSIGNED AUTO_INCREMENT,
+ utm_source VARCHAR(255) CHARACTER SET utf8 DEFAULT '' NOT NULL,
+ utm_campaign VARCHAR(255) CHARACTER SET utf8 DEFAULT '' NOT NULL,
+ link_id VARCHAR(128) CHARACTER SET utf8 DEFAULT '' NOT NULL,
+ count MEDIUMINT(11) UNSIGNED DEFAULT 0,
+
+ PRIMARY KEY (id),
+ UNIQUE KEY (utm_source, utm_campaign, link_id)
+);
+
diff --git a/sql/create.sql b/sql/create.sql
index b5506b3..feeb2bd 100644
--- a/sql/create.sql
+++ b/sql/create.sql
@@ -135,6 +135,29 @@
landingpage, project_id, language_id, country_id) -- WE SHOULD
OPTIMIZE THIS KEY, WHAT ORDER IS BEST?
) DEFAULT CHARACTER SET = utf8 ENGINE = InnoDB;
+CREATE TABLE IF NOT EXISTS `donatewiki_unique` (
+ id INT(11) UNSIGNED AUTO_INCREMENT,
+ timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
+ utm_source VARCHAR(255) CHARACTER SET utf8 DEFAULT '' NOT NULL,
+ utm_campaign VARCHAR(255) CHARACTER SET utf8 DEFAULT '' NOT NULL,
+ contact_id VARCHAR(32) CHARACTER SET utf8 DEFAULT '' NOT NULL,
+ link_id VARCHAR(128) CHARACTER SET utf8 DEFAULT '' NOT NULL,
+
+ PRIMARY KEY (id),
+ UNIQUE KEY (utm_source, utm_campaign, contact_id)
+) DEFAULT CHARACTER SET = utf8 ENGINE = InnoDB;
+
+CREATE TABLE IF NOT EXISTS `donatewiki_counts` (
+ id INT(11) UNSIGNED AUTO_INCREMENT,
+ utm_source VARCHAR(255) CHARACTER SET utf8 DEFAULT '' NOT NULL,
+ utm_campaign VARCHAR(255) CHARACTER SET utf8 DEFAULT '' NOT NULL,
+ link_id VARCHAR(128) CHARACTER SET utf8 DEFAULT '' NOT NULL,
+ count MEDIUMINT(11) UNSIGNED DEFAULT 0,
+
+ PRIMARY KEY (id),
+ UNIQUE KEY (utm_source, utm_campaign, link_id)
+) DEFAULT CHARACTER SET = utf8 ENGINE = InnoDB;
+
CREATE TABLE IF NOT EXISTS `globalcollect_orderids` (
orderid BIGINT(11) UNSIGNED NOT NULL,
@@ -147,4 +170,4 @@
PRIMARY KEY (orderid),
INDEX (allocated)
-) DEFAULT CHARACTER SET = utf8 ENGINE = InnoDB;
\ No newline at end of file
+) DEFAULT CHARACTER SET = utf8 ENGINE = InnoDB;
--
To view, visit https://gerrit.wikimedia.org/r/259404
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I2122cd0010f1f3a519b9bbb8632a8048356acfe2
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/fundraising/tools/DjangoBannerStats
Gerrit-Branch: master
Gerrit-Owner: Ejegg <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits