acelyc111 commented on code in PR #2285:
URL:
https://github.com/apache/incubator-pegasus/pull/2285#discussion_r2301635378
##########
go-client/session/replica_session.go:
##########
@@ -188,6 +188,8 @@ type ReplicaManager struct {
creator NodeSessionCreator
unresponsiveHandler UnresponsiveHandler
+
+ enablePerfCounter bool
Review Comment:
Maybe `Metrics` is more general than `PerfCounter`?
##########
go-client/session/meta_call.go:
##########
@@ -141,7 +141,7 @@ func (c *metaCall) issueSingleMeta(ctx context.Context,
curLeader int) bool {
c.lock.Lock()
c.metaIPAddrs = append(c.metaIPAddrs, addr)
c.metas = append(c.metas, &metaSession{
- NodeSession: newNodeSession(addr, NodeTypeMeta),
+ NodeSession: newNodeSession(addr, NodeTypeMeta,
false),
Review Comment:
The bool type true/false parameter is meaningless, how about using a `const`
to define enable or disable metrics?
##########
go-client/pegasus/table_connector.go:
##########
@@ -715,8 +719,35 @@ func (p *pegasusTableConnector) Incr(ctx context.Context,
hashKey []byte, sortKe
}
func (p *pegasusTableConnector) runPartitionOp(ctx context.Context, hashKey
[]byte, req op.Request, optype OpType) (interface{}, error) {
+ start := time.Now()
+ var errResult error
+ if p.enablePerfCounter {
+ defer func() {
+ elapsed := time.Since(start).Nanoseconds()
+ pm := metrics.GetPrometheusMetrics()
+ status := "success"
+ if errResult != nil {
+ if errors.Is(ctx.Err(),
context.DeadlineExceeded) {
+ status = "timeout"
+ } else {
+ status = "fail"
+ }
+ }
+
+ // The metaIP is added to the metric name because some
users may use multiple client instances
+ // within a single process to access tables with the
same name in different availability zones.
+ // Including the metaIP helps to distinguish monitoring
metrics for tables with the same name.
+ // The reason for not putting metaIP into labels
(Prometheus) or tags (Falcon) is that labels/tags
+ // are designed to be unique and constant for a single
process.
+ firstMetaIP :=
strings.ReplaceAll(p.meta.GetMetaIPAddrs()[0], ".", "_")
+
pm.MarkMeter(fmt.Sprintf("pegasus_client_%s_%s_%s_total_%s", p.tableName,
optype.String(), status, firstMetaIP), 1)
+
pm.ObserveSummary(fmt.Sprintf("pegasus_client_%s_%s_%s_latency_%s",
p.tableName, optype.String(), status, firstMetaIP), float64(elapsed))
Review Comment:
what abot shorten the metric name, but add more prometheus metric labels for
the metric? Then you can use flexible PromQL on these metrics.
##########
go-client/pegasus/table_connector.go:
##########
@@ -726,6 +757,7 @@ func (p *pegasusTableConnector) runPartitionOp(ctx
context.Context, hashKey []by
confUpdated, retry, err = p.handleReplicaError(err, part)
return
})
+ errResult = err
Review Comment:
where will errResult be used?
##########
go-client/metrics/falcon.go:
##########
@@ -0,0 +1,215 @@
+package metrics
Review Comment:
Is the Falcon supporting necessary? Promethues is a more popular monitoring
framework, is it possible to leave only Promethues supporting. The server side
is doing the work to remove Falcon.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]