JophieQu commented on code in PR #232:
URL: https://github.com/apache/skywalking-go/pull/232#discussion_r2347128124


##########
plugins/core/reporter/pprof_manager.go:
##########
@@ -0,0 +1,280 @@
+// Licensed to Apache Software Foundation (ASF) under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Apache Software Foundation (ASF) licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package reporter
+
+import (
+       "context"
+       "io"
+       "strconv"
+       "time"
+
+       "github.com/apache/skywalking-go/plugins/core/operator"
+       commonv3 "github.com/apache/skywalking-go/protocols/collect/common/v3"
+       pprofv10 "github.com/apache/skywalking-go/protocols/collect/pprof/v10"
+)
+
+const (
+       // Pprof event types
+       EventsTypeCPU   = "cpu"
+       EventsTypeHeap  = "heap"
+       EventsTypeBlock = "block"
+       EventsTypeMutex = "mutex"
+       // max chunk size for pprof data
+       maxChunkSize = 1 * 1024 * 1024
+)
+
+type PprofTaskCommand interface {
+       GetEvent() string
+       GetCreateTime() int64
+       GetDuration() time.Duration
+       StartTask() (io.Writer, error)
+       StopTask(io.Writer)
+}
+type PprofReporter interface {
+       ReportPprof(taskID string, content []byte)
+}
+
+var NewPprofTaskCommand func(taskID, events string, duration time.Duration,
+       createTime int64, dumpPeriod int, pprofFilePath string,
+       logger operator.LogOperator, manager PprofReporter) PprofTaskCommand
+
+type PprofTaskManager struct {
+       logger         operator.LogOperator
+       serverAddr     string
+       pprofInterval  time.Duration
+       PprofClient    pprofv10.PprofTaskClient // for grpc
+       connManager    *ConnectionManager
+       entity         *Entity
+       pprofFilePath  string
+       LastUpdateTime int64
+       commands       PprofTaskCommand
+}
+
+func NewPprofTaskManager(logger operator.LogOperator, serverAddr string,
+       pprofInterval time.Duration, connManager *ConnectionManager,
+       pprofFilePath string) (*PprofTaskManager, error) {
+       PprofManager := &PprofTaskManager{
+               logger:        logger,
+               serverAddr:    serverAddr,
+               pprofInterval: pprofInterval,
+               connManager:   connManager,
+               pprofFilePath: pprofFilePath,
+       }
+       if pprofInterval > 0 {
+               conn, err := connManager.GetConnection(serverAddr)
+               if err != nil {
+                       return nil, err
+               }
+               PprofManager.PprofClient = pprofv10.NewPprofTaskClient(conn)
+               PprofManager.commands = nil
+       }
+       return PprofManager, nil
+}
+
+func (r *PprofTaskManager) InitPprofTask(entity *Entity) {
+       if r.PprofClient == nil {
+               return
+       }
+       r.entity = entity
+       go func() {
+               for {
+                       switch r.connManager.GetConnectionStatus(r.serverAddr) {
+                       case ConnectionStatusShutdown:
+                               return
+                       case ConnectionStatusDisconnect:
+                               time.Sleep(r.pprofInterval)
+                               continue
+                       }
+                       pprofCommand, err := 
r.PprofClient.GetPprofTaskCommands(context.Background(), 
&pprofv10.PprofTaskCommandQuery{
+                               Service:         r.entity.ServiceName,
+                               ServiceInstance: r.entity.ServiceInstanceName,
+                               LastCommandTime: r.LastUpdateTime,
+                       })
+                       if err != nil {
+                               r.logger.Errorf("fetch pprof task commands 
error %v", err)
+                               time.Sleep(r.pprofInterval)
+                               continue
+                       }
+
+                       if len(pprofCommand.GetCommands()) > 0 && 
pprofCommand.GetCommands()[0].Command == "PprofTaskQuery" {
+                               rawCommand := pprofCommand.GetCommands()[0]
+                               r.HandleCommand(rawCommand)
+                       }
+
+                       time.Sleep(r.pprofInterval)
+               }
+       }()
+}
+
+func (r *PprofTaskManager) HandleCommand(rawCommand *commonv3.Command) {
+       command := r.deserializePprofTaskCommand(rawCommand)
+       if command.GetCreateTime() > r.LastUpdateTime {
+               r.LastUpdateTime = command.GetCreateTime()
+       }
+
+       if command.GetEvent() == EventsTypeHeap {
+               // direct sampling of Heap
+               writer, err := command.StartTask()
+               if err != nil {
+                       r.logger.Errorf("start %s pprof error %v \n", 
command.GetEvent(), err)
+                       return
+               }
+               command.StopTask(writer)
+       } else {
+               // The CPU, Block, and Mutex sampling lasts for a duration and 
then stops
+               writer, err := command.StartTask()
+               if err != nil {
+                       r.logger.Errorf("start CPU pprof error %v \n", err)
+                       return
+               }
+               time.AfterFunc(command.GetDuration(), func() {
+                       command.StopTask(writer)
+               })
+       }
+}
+
+func (r *PprofTaskManager) deserializePprofTaskCommand(command 
*commonv3.Command) PprofTaskCommand {
+       args := command.Args
+       taskID := ""
+       events := ""
+       duration := 0
+       dumpPeriod := 0 // Use -1 to indicate no explicit value provided
+       var createTime int64 = 0
+       for _, pair := range args {
+               if pair.GetKey() == "TaskId" {
+                       taskID = pair.GetValue()
+               } else if pair.GetKey() == "Events" {
+                       events = pair.GetValue()
+               } else if pair.GetKey() == "Duration" {
+                       if val, err := strconv.Atoi(pair.GetValue()); err == 
nil && val > 0 {
+                               duration = val
+                       }
+               } else if pair.GetKey() == "DumpPeriod" {
+                       if val, err := strconv.Atoi(pair.GetValue()); err == 
nil && val >= 0 {
+                               dumpPeriod = val
+                       }
+               } else if pair.GetKey() == "CreateTime" {
+                       createTime, _ = strconv.ParseInt(pair.GetValue(), 10, 
64)
+               }
+       }
+
+       return NewPprofTaskCommand(
+               taskID,
+               events,
+               time.Duration(duration)*time.Minute,
+               createTime,
+               dumpPeriod,
+               r.pprofFilePath,
+               r.logger,
+               r,
+       )
+}
+
+func (r *PprofTaskManager) ReportPprof(taskID string, content []byte) {
+       metaData := &pprofv10.PprofMetaData{
+               Service:         r.entity.ServiceName,
+               ServiceInstance: r.entity.ServiceInstanceName,
+               TaskId:          taskID,
+               Type:            
pprofv10.PprofProfilingStatus_PPROF_PROFILING_SUCCESS,
+               ContentSize:     int32(len(content)),
+       }
+
+       go r.uploadPprofData(metaData, content, taskID)

Review Comment:
   pprof profiling is on-demand profiling, driven by tasks, and it’s possible 
that for a long period of time no pprof task will be submitted. If we adopt the 
strategy of maintaining only one goroutine and sending pprof data to a channel, 
then when there is no profiling task, the goroutine will just wait idly. Will 
this cause resource waste?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to