Joal has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/391193 )
Change subject: Correct clickstream job ...................................................................... Correct clickstream job The job now doesn't create date folders in addition to per-wiki folders. This makes it easier o archive files. Change-Id: I88886c83a88dd67693db12a17fc1ea877a4a030c --- M refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/ClickstreamBuilder.scala 1 file changed, 3 insertions(+), 4 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery/source refs/changes/93/391193/1 diff --git a/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/ClickstreamBuilder.scala b/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/ClickstreamBuilder.scala index 5052636..d1894eb 100644 --- a/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/ClickstreamBuilder.scala +++ b/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/ClickstreamBuilder.scala @@ -356,7 +356,7 @@ * Config class for CLI argument parser using scopt */ case class Params( - outputBasePath: String = "/wmf/data/archive/clickstream", + outputBasePath: String = "/tmp/clickstream", projectNamespaceTable: String = "wmf_raw.mediawiki_project_namespace_map", pageTable: String = "wmf_raw.mediawiki_page", redirectTable: String = "wmf_raw.mediawiki_redirect", @@ -380,7 +380,7 @@ note( """ |This job computes a clickstream dataset from one or more wiki(s). - |It creates a date folder, and per-wiki folders to store the results. + |It creates per-wiki folders to store the results. """.stripMargin) help("help") text ("Prints this usage text") @@ -495,8 +495,7 @@ }) val projectList = domainList.map(_.stripSuffix(".org")) - val outputFolder = params.outputBasePath + - f"/${params.year}%04d-${params.month}%02d${params.day.map(d => f"-$d%02d").getOrElse("")}${params.hour.map(h => f"-$h%02d").getOrElse("")}" + val outputFolder = params.outputBasePath // Reused RDDs val pages = preparePages(sqlContext, params.pageTable, params.snapshot, params.wikiList).cache() -- To view, visit https://gerrit.wikimedia.org/r/391193 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I88886c83a88dd67693db12a17fc1ea877a4a030c Gerrit-PatchSet: 1 Gerrit-Project: analytics/refinery/source Gerrit-Branch: master Gerrit-Owner: Joal <j...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits