Ottomata has uploaded a new change for review.
https://gerrit.wikimedia.org/r/265591
Change subject: Revert "Use webrequest_source text for AppSessionMetrics,
mobile is merging with text"
......................................................................
Revert "Use webrequest_source text for AppSessionMetrics, mobile is merging
with text"
mobile -> text cache is on hold.
This reverts commit 35eb651aac794727ec31c2654ae15ebe0e076b45.
Change-Id: I201146ffa497296f24ab89b2d5738743ac37e18b
---
M
refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
M
refinery-job/src/test/scala/org/wikimedia/analytics/refinery/job/AppSessionSuite.scala
2 files changed, 11 insertions(+), 13 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery/source
refs/changes/91/265591/1
diff --git
a/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
b/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
index 9ef5b95..6d6cf00 100644
---
a/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
+++
b/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
@@ -186,11 +186,11 @@
/**
* Generate list of Parquet file paths over a range of dates
- * @param webrequestTextPath Base path to webrequest text parquet data
+ * @param webrequestMobilePath Base path to webrequest mobile parquet data
* @param datesInfo Hashmap with report date related info
* @return List of path strings like [".../day=1", ".../day=2"]
*/
- def dateRangeToPathList(webrequestTextPath: String, datesInfo: Map[String,
Int]): List[String] = {
+ def dateRangeToPathList(webrequestMobilePath: String, datesInfo: Map[String,
Int]): List[String] = {
//Custom iterator for stepping through LocalDate objects
def makeDateRange(from: LocalDate, to: LocalDate, step: Period):
Iterator[LocalDate] =
Iterator.iterate(from)(_.plus(step)).takeWhile(_.isBefore(to))
@@ -198,7 +198,7 @@
val dateStart = new LocalDate(datesInfo("year"), datesInfo("month"),
datesInfo("day"))
val dateEnd = dateStart.plusDays(datesInfo("periodDays"))
val dateRange = makeDateRange(dateStart, dateEnd, new Period().withDays(1))
- dateRange.toList.map(dt =>
"%s/year=%d/month=%d/day=%d".format(webrequestTextPath, dt.getYear,
dt.getMonthOfYear, dt.getDayOfMonth))
+ dateRange.toList.map(dt =>
"%s/year=%d/month=%d/day=%d".format(webrequestMobilePath, dt.getYear,
dt.getMonthOfYear, dt.getDayOfMonth))
}
/**
@@ -211,8 +211,7 @@
*/
def pathListToUuidDataframe(paths: List[String], sqlContext: SQLContext):
DataFrame = {
sqlContext.parquetFile(paths: _*)
- .filter("is_pageview and access_method = 'mobile app' " +
- "and x_analytics_map['wmfuuid'] is not null and
x_analytics_map['wmfuuid'] != ''")
+ .filter("is_pageview and x_analytics_map['wmfuuid'] is not null and
x_analytics_map['wmfuuid'] != ''")
.selectExpr("x_analytics_map['wmfuuid'] as wmfuuid", "CAST(ts AS int) as
ts")
}
@@ -350,13 +349,12 @@
sqlContext.setConf("spark.sql.parquet.compression.codec", "snappy")
// Generate a list of all parquet file paths to read given the
webrequest base path,
- // and all dates related information. NOTE: As of January 2016,
- // mobile web caches have been merged with text, so
webrequest_source=text.
- val webrequestTextPath = params.webrequestBasePath +
"/webrequest_source=text"
+ // and all dates related information
+ val webrequestMobilePath = params.webrequestBasePath +
"/webrequest_source=mobile"
// Helper hashmap with all date related information to avoid passing
around lots of params
val datesInfo = HashMap("year" -> params.year, "month" ->
params.month, "day" -> params.day, "periodDays" -> params.periodDays)
// List of path strings like [".../day=1", ".../day=2"]
- val webrequestPaths = dateRangeToPathList(webrequestTextPath,
datesInfo)
+ val webrequestPaths = dateRangeToPathList(webrequestMobilePath,
datesInfo)
// Get sessions data for all users, calculate stats for different
metrics,
// and get the stats in a printable string format to output
diff --git
a/refinery-job/src/test/scala/org/wikimedia/analytics/refinery/job/AppSessionSuite.scala
b/refinery-job/src/test/scala/org/wikimedia/analytics/refinery/job/AppSessionSuite.scala
index 254f317..27064ec 100644
---
a/refinery-job/src/test/scala/org/wikimedia/analytics/refinery/job/AppSessionSuite.scala
+++
b/refinery-job/src/test/scala/org/wikimedia/analytics/refinery/job/AppSessionSuite.scala
@@ -34,15 +34,15 @@
test("List of parquet paths is generated correctly based on the report run
date and period") {
val datesInfo = HashMap("year" -> 2015, "month" -> 5, "day" -> 10,
"periodDays" -> 10)
- val webrequestTextPath = ".../webrequest_source=text"
- val pathList = AppSessionMetrics.dateRangeToPathList(webrequestTextPath,
datesInfo)
+ val webrequestMobilePath = ".../webrequest_source=mobile"
+ val pathList = AppSessionMetrics.dateRangeToPathList(webrequestMobilePath,
datesInfo)
//Assert the length of the list equals report period in days
assert(pathList.length == datesInfo("periodDays"))
//Assert the paths are being generated correctly
- assert(pathList.head ==
".../webrequest_source=text/year=2015/month=5/day=10")
- assert(pathList.last ==
".../webrequest_source=text/year=2015/month=5/day=19")
+ assert(pathList.head ==
".../webrequest_source=mobile/year=2015/month=5/day=10")
+ assert(pathList.last ==
".../webrequest_source=mobile/year=2015/month=5/day=19")
}
}
\ No newline at end of file
--
To view, visit https://gerrit.wikimedia.org/r/265591
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I201146ffa497296f24ab89b2d5738743ac37e18b
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery/source
Gerrit-Branch: master
Gerrit-Owner: Ottomata <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits