[
https://issues.apache.org/jira/browse/BAHIR-110?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16080456#comment-16080456
]
ASF GitHub Bot commented on BAHIR-110:
--------------------------------------
Github user emlaver commented on a diff in the pull request:
https://github.com/apache/bahir/pull/45#discussion_r126446901
--- Diff:
sql-cloudant/src/main/scala/org/apache/bahir/cloudant/CloudantConfig.scala ---
@@ -30,81 +28,83 @@ import org.apache.bahir.cloudant.common._
*/
class CloudantConfig(val protocol: String, val host: String,
- val dbName: String, val indexName: String = null, val viewName: String
= null)
+ val dbName: String, val indexName: String, val viewName: String)
(implicit val username: String, val password: String,
val partitions: Int, val maxInPartition: Int, val minInPartition: Int,
val requestTimeout: Long, val bulkSize: Int, val schemaSampleSize: Int,
- val createDBOnSave: Boolean, val selector: String, val useQuery:
Boolean = false,
- val queryLimit: Int)
- extends Serializable{
+ val createDBOnSave: Boolean, val apiReceiver: String,
+ val useQuery: Boolean = false, val queryLimit: Int)
+ extends Serializable {
- private lazy val dbUrl = {protocol + "://" + host + "/" + dbName}
+ lazy val dbUrl: String = {protocol + "://" + host + "/" + dbName}
val pkField = "_id"
- val defaultIndex = "_all_docs" // "_changes" does not work for partition
+ val defaultIndex: String = apiReceiver
val default_filter: String = "*:*"
- def getContinuousChangesUrl(): String = {
- var url = dbUrl +
"/_changes?include_docs=true&feed=continuous&heartbeat=3000"
- if (selector != null) {
- url = url + "&filter=_selector"
- }
- url
- }
-
- def getSelector() : String = {
- selector
- }
-
- def getDbUrl(): String = {
+ def getDbUrl: String = {
dbUrl
}
- def getSchemaSampleSize(): Int = {
+ def getSchemaSampleSize: Int = {
schemaSampleSize
}
- def getCreateDBonSave(): Boolean = {
+ def getCreateDBonSave: Boolean = {
createDBOnSave
}
- def getTotalUrl(url: String): String = {
- if (url.contains('?')) {
- url + "&limit=1"
- } else {
- url + "?limit=1"
- }
- }
-
- def getDbname(): String = {
- dbName
- }
-
- def queryEnabled(): Boolean = {useQuery && indexName==null &&
viewName==null}
-
- def allowPartition(queryUsed: Boolean): Boolean = {indexName==null &&
!queryUsed}
-
- def getAllDocsUrl(limit: Int, excludeDDoc: Boolean = false): String = {
+ def getLastNum(result: JsValue): JsValue = (result \ "last_seq").get
+ /* Url containing limit for docs in a Cloudant database.
+ * If a view is not defined, use the _all_docs endpoint.
+ * @return url with one doc limit for retrieving total doc count
+ */
+ def getUrl(limit: Int, excludeDDoc: Boolean = false): String = {
if (viewName == null) {
- val baseUrl = (
- if ( excludeDDoc) dbUrl +
"/_all_docs?startkey=%22_design0/%22&include_docs=true"
- else dbUrl + "/_all_docs?include_docs=true"
- )
- if (limit == JsonStoreConfigManager.ALL_DOCS_LIMIT) {
+ val baseUrl = {
+ if (excludeDDoc) {
+ dbUrl + "/_all_docs?startkey=%22_design0/%22&include_docs=true"
--- End diff --
I'll open a new JIRA issue for this.
> Replace use of _all_docs API with _changes API in all receivers
> ---------------------------------------------------------------
>
> Key: BAHIR-110
> URL: https://issues.apache.org/jira/browse/BAHIR-110
> Project: Bahir
> Issue Type: Improvement
> Reporter: Esteban Laver
> Original Estimate: 216h
> Remaining Estimate: 216h
>
> Today we use the _changes API for Spark streaming receiver and _all_docs API
> for non-streaming receiver. _all_docs API supports parallel reads (using
> offset and range) but performance of _changes API is still better in most
> cases (even with single threaded support).
> With this ticket we want to:
> a) re-implement all receivers using _changes API
> b) compare performance between the two implementations based on _changes and
> _all_docs
> Based on the results in b) we could decide to either
> - replace _all_docs implementation with _changes based implementation OR
> - allow customers to pick one (with a solid documentation about pros and
> cons)
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)