Re: [PR] go-sdk: parallelize bundle discovery with bounded concurrency [airflow]

2026-02-08 Thread via GitHub


Nataneljpwd commented on PR #58827:
URL: https://github.com/apache/airflow/pull/58827#issuecomment-3866503921

   @prdai I think a rebase is needed to fix the failing tests


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] go-sdk: parallelize bundle discovery with bounded concurrency [airflow]

2026-02-05 Thread via GitHub


github-actions[bot] commented on PR #58827:
URL: https://github.com/apache/airflow/pull/58827#issuecomment-3857111984

   This pull request has been automatically marked as stale because it has not 
had recent activity. It will be closed in 5 days if no further activity occurs. 
Thank you for your contributions.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] go-sdk: parallelize bundle discovery with bounded concurrency [airflow]

2025-12-22 Thread via GitHub


Nataneljpwd commented on code in PR #58827:
URL: https://github.com/apache/airflow/pull/58827#discussion_r2639248425


##
go-sdk/pkg/bundles/shared/discovery.go:
##
@@ -147,60 +150,66 @@ func (d *Discovery) DiscoverBundles(ctx context.Context) 
error {
if err != nil {
self = ""
}
+   maxProcesses := runtime.GOMAXPROCS(0)
+   g, ctx := errgroup.WithContext(ctx)
+   g.SetLimit(maxProcesses)
+   var mu sync.Mutex
 
for _, file := range files {
-   if ctx.Err() != nil {
-   // Check if we are done.
-   return ctx.Err()
-   }
-
-   // Check if file is executable
-   if !isExecutable(file) {
-   continue
-   }
+   g.Go(func() error {
+   if ctx.Err() != nil {
+   // Check if we are done.
+   return ctx.Err()
+   }
 
-   abs, err := filepath.Abs(file)
-   if err != nil {
-   d.logger.Warn("Unable to load resolve file path", 
"file", file, "err", err)
-   continue
-   }
-   if self != "" && self == abs {
-   d.logger.Warn("Not trying to load ourselves as a 
plugin", "file", file)
-   continue
-   }
+   // Check if file is executable
+   if !isExecutable(file) {
+   return nil
+   }
 
-   d.logger.Debug("Found potential bundle", slog.String("path", 
file))
+   abs, err := filepath.Abs(file)
+   if err != nil {
+   d.logger.Warn("Unable to load resolve file 
path", "file", file, "err", err)
+   return nil
+   }
+   if self != "" && self == abs {
+   d.logger.Warn("Not trying to load ourselves as 
a plugin", "file", file)
+   return nil
+   }
 
-   // TODO: Use a sync.WaitGroup to parallelize running multiple 
procs without blowing concurrency up and fork-bombing
-   // the host
-   bundle, err := d.getBundleVersionInfo(file)
-   if err != nil {
-   d.logger.Warn("Unable to load BundleMetadata", "file", 
file, "err", err)
-   continue
-   }
+   d.logger.Debug("Found potential bundle", 
slog.String("path", file))
 
-   versions, exists := d.bundles[bundle.Name]
-   if !exists {
-   versions = make(map[string]string)
-   d.bundles[bundle.Name] = versions
-   }
+   bundle, err := d.getBundleVersionInfo(file)
+   if err != nil {
+   d.logger.Warn("Unable to load BundleMetadata", 
"file", file, "err", err)
+   return nil
+   }
+   mu.Lock()
+   versions, exists := d.bundles[bundle.Name]
+   if !exists {
+   versions = make(map[string]string)
+   d.bundles[bundle.Name] = versions
+   }
 
-   var key string
-   logAs := bundle.Name
-   if bundle.Version != nil {
-   key = *bundle.Version
-   logAs = fmt.Sprintf("%s@%s", bundle.Name, key)
-   }
-   d.logger.Info(
-   "Discovered bundle",
-   "key",
-   logAs,
-   "file",
-   file,
-   )
-   versions[key] = file
+   var key string
+   logAs := bundle.Name
+   if bundle.Version != nil {
+   key = *bundle.Version
+   logAs = fmt.Sprintf("%s@%s", bundle.Name, key)
+   }
+   d.logger.Info(
+   "Discovered bundle",
+   "key",
+   logAs,
+   "file",
+   file,
+   )
+   versions[key] = file
+   mu.Unlock()
+   return nil
+   })
}
-   return nil
+   return g.Wait()

Review Comment:
   Understood, thank you



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, 

Re: [PR] go-sdk: parallelize bundle discovery with bounded concurrency [airflow]

2025-12-22 Thread via GitHub


Nataneljpwd commented on code in PR #58827:
URL: https://github.com/apache/airflow/pull/58827#discussion_r2639244907


##
go-sdk/pkg/bundles/shared/discovery.go:
##
@@ -147,60 +150,66 @@ func (d *Discovery) DiscoverBundles(ctx context.Context) 
error {
if err != nil {
self = ""
}
+   maxProcesses := runtime.GOMAXPROCS(0)
+   g, ctx := errgroup.WithContext(ctx)

Review Comment:
   Ok, sounds great, resolved



##
go-sdk/pkg/bundles/shared/discovery.go:
##
@@ -147,60 +150,66 @@ func (d *Discovery) DiscoverBundles(ctx context.Context) 
error {
if err != nil {
self = ""
}
+   maxProcesses := runtime.GOMAXPROCS(0)
+   g, ctx := errgroup.WithContext(ctx)

Review Comment:
   Ok, sounds great, resolved



##
go-sdk/pkg/bundles/shared/discovery.go:
##
@@ -147,60 +150,66 @@ func (d *Discovery) DiscoverBundles(ctx context.Context) 
error {
if err != nil {
self = ""
}
+   maxProcesses := runtime.GOMAXPROCS(0)
+   g, ctx := errgroup.WithContext(ctx)

Review Comment:
   Ok, sounds great, resolved



##
go-sdk/pkg/bundles/shared/discovery.go:
##
@@ -147,60 +150,66 @@ func (d *Discovery) DiscoverBundles(ctx context.Context) 
error {
if err != nil {
self = ""
}
+   maxProcesses := runtime.GOMAXPROCS(0)
+   g, ctx := errgroup.WithContext(ctx)
+   g.SetLimit(maxProcesses)

Review Comment:
   Ok, sounds good



##
go-sdk/pkg/bundles/shared/discovery.go:
##
@@ -147,60 +150,66 @@ func (d *Discovery) DiscoverBundles(ctx context.Context) 
error {
if err != nil {
self = ""
}
+   maxProcesses := runtime.GOMAXPROCS(0)
+   g, ctx := errgroup.WithContext(ctx)
+   g.SetLimit(maxProcesses)

Review Comment:
   Ok, sounds good



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] go-sdk: parallelize bundle discovery with bounded concurrency [airflow]

2025-12-07 Thread via GitHub


prdai commented on code in PR #58827:
URL: https://github.com/apache/airflow/pull/58827#discussion_r2596202841


##
go-sdk/pkg/bundles/shared/discovery.go:
##
@@ -147,60 +150,66 @@ func (d *Discovery) DiscoverBundles(ctx context.Context) 
error {
if err != nil {
self = ""
}
+   maxProcesses := runtime.GOMAXPROCS(0)
+   g, ctx := errgroup.WithContext(ctx)
+   g.SetLimit(maxProcesses)
+   var mu sync.Mutex
 
for _, file := range files {
-   if ctx.Err() != nil {
-   // Check if we are done.
-   return ctx.Err()
-   }
-
-   // Check if file is executable
-   if !isExecutable(file) {
-   continue
-   }
+   g.Go(func() error {
+   if ctx.Err() != nil {
+   // Check if we are done.
+   return ctx.Err()
+   }
 
-   abs, err := filepath.Abs(file)
-   if err != nil {
-   d.logger.Warn("Unable to load resolve file path", 
"file", file, "err", err)
-   continue
-   }
-   if self != "" && self == abs {
-   d.logger.Warn("Not trying to load ourselves as a 
plugin", "file", file)
-   continue
-   }
+   // Check if file is executable
+   if !isExecutable(file) {
+   return nil
+   }
 
-   d.logger.Debug("Found potential bundle", slog.String("path", 
file))
+   abs, err := filepath.Abs(file)
+   if err != nil {
+   d.logger.Warn("Unable to load resolve file 
path", "file", file, "err", err)
+   return nil
+   }
+   if self != "" && self == abs {
+   d.logger.Warn("Not trying to load ourselves as 
a plugin", "file", file)
+   return nil
+   }
 
-   // TODO: Use a sync.WaitGroup to parallelize running multiple 
procs without blowing concurrency up and fork-bombing
-   // the host
-   bundle, err := d.getBundleVersionInfo(file)
-   if err != nil {
-   d.logger.Warn("Unable to load BundleMetadata", "file", 
file, "err", err)
-   continue
-   }
+   d.logger.Debug("Found potential bundle", 
slog.String("path", file))
 
-   versions, exists := d.bundles[bundle.Name]
-   if !exists {
-   versions = make(map[string]string)
-   d.bundles[bundle.Name] = versions
-   }
+   bundle, err := d.getBundleVersionInfo(file)
+   if err != nil {
+   d.logger.Warn("Unable to load BundleMetadata", 
"file", file, "err", err)
+   return nil
+   }
+   mu.Lock()
+   versions, exists := d.bundles[bundle.Name]
+   if !exists {
+   versions = make(map[string]string)
+   d.bundles[bundle.Name] = versions
+   }
 
-   var key string
-   logAs := bundle.Name
-   if bundle.Version != nil {
-   key = *bundle.Version
-   logAs = fmt.Sprintf("%s@%s", bundle.Name, key)
-   }
-   d.logger.Info(
-   "Discovered bundle",
-   "key",
-   logAs,
-   "file",
-   file,
-   )
-   versions[key] = file
+   var key string
+   logAs := bundle.Name
+   if bundle.Version != nil {
+   key = *bundle.Version
+   logAs = fmt.Sprintf("%s@%s", bundle.Name, key)
+   }
+   d.logger.Info(
+   "Discovered bundle",
+   "key",
+   logAs,
+   "file",
+   file,
+   )
+   versions[key] = file
+   mu.Unlock()
+   return nil
+   })
}
-   return nil
+   return g.Wait()

Review Comment:
   the aspect of us not being able to utilize context is the reasoning



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to

Re: [PR] go-sdk: parallelize bundle discovery with bounded concurrency [airflow]

2025-12-07 Thread via GitHub


prdai commented on code in PR #58827:
URL: https://github.com/apache/airflow/pull/58827#discussion_r2596202646


##
go-sdk/pkg/bundles/shared/discovery.go:
##
@@ -147,60 +150,66 @@ func (d *Discovery) DiscoverBundles(ctx context.Context) 
error {
if err != nil {
self = ""
}
+   maxProcesses := runtime.GOMAXPROCS(0)
+   g, ctx := errgroup.WithContext(ctx)
+   g.SetLimit(maxProcesses)

Review Comment:
   its dynamically determined at runtime, its not exactly to scale processes up 
or down, more of for us to be able to utilize the maximum no. of processes to 
whatever device in which this code is executed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] go-sdk: parallelize bundle discovery with bounded concurrency [airflow]

2025-12-07 Thread via GitHub


prdai commented on code in PR #58827:
URL: https://github.com/apache/airflow/pull/58827#discussion_r2596201882


##
go-sdk/pkg/bundles/shared/discovery.go:
##
@@ -147,60 +150,66 @@ func (d *Discovery) DiscoverBundles(ctx context.Context) 
error {
if err != nil {
self = ""
}
+   maxProcesses := runtime.GOMAXPROCS(0)
+   g, ctx := errgroup.WithContext(ctx)

Review Comment:
   it is because we don't have much control in that scenario and as there is a 
context that is already provided, we can utilize that in the scenario that the 
context is cancelled or Done, otherwise if we was to use plain goroutines we 
won't be able to cancel the routines we spun up



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] go-sdk: parallelize bundle discovery with bounded concurrency [airflow]

2025-11-30 Thread via GitHub


Nataneljpwd commented on code in PR #58827:
URL: https://github.com/apache/airflow/pull/58827#discussion_r2574928382


##
go-sdk/pkg/bundles/shared/discovery.go:
##
@@ -147,60 +150,66 @@ func (d *Discovery) DiscoverBundles(ctx context.Context) 
error {
if err != nil {
self = ""
}
+   maxProcesses := runtime.GOMAXPROCS(0)
+   g, ctx := errgroup.WithContext(ctx)

Review Comment:
   Why not just use plain goroutines with a channel sending the results instead?



##
go-sdk/pkg/bundles/shared/discovery.go:
##
@@ -147,60 +150,66 @@ func (d *Discovery) DiscoverBundles(ctx context.Context) 
error {
if err != nil {
self = ""
}
+   maxProcesses := runtime.GOMAXPROCS(0)
+   g, ctx := errgroup.WithContext(ctx)
+   g.SetLimit(maxProcesses)

Review Comment:
   Isn't it already set or dynamically determined during runtime? Scaling 
processes up or down to handle the workload and Io wait time?



##
go-sdk/pkg/bundles/shared/discovery.go:
##
@@ -147,60 +150,66 @@ func (d *Discovery) DiscoverBundles(ctx context.Context) 
error {
if err != nil {
self = ""
}
+   maxProcesses := runtime.GOMAXPROCS(0)
+   g, ctx := errgroup.WithContext(ctx)
+   g.SetLimit(maxProcesses)
+   var mu sync.Mutex
 
for _, file := range files {
-   if ctx.Err() != nil {
-   // Check if we are done.
-   return ctx.Err()
-   }
-
-   // Check if file is executable
-   if !isExecutable(file) {
-   continue
-   }
+   g.Go(func() error {
+   if ctx.Err() != nil {
+   // Check if we are done.
+   return ctx.Err()
+   }
 
-   abs, err := filepath.Abs(file)
-   if err != nil {
-   d.logger.Warn("Unable to load resolve file path", 
"file", file, "err", err)
-   continue
-   }
-   if self != "" && self == abs {
-   d.logger.Warn("Not trying to load ourselves as a 
plugin", "file", file)
-   continue
-   }
+   // Check if file is executable
+   if !isExecutable(file) {
+   return nil
+   }
 
-   d.logger.Debug("Found potential bundle", slog.String("path", 
file))
+   abs, err := filepath.Abs(file)
+   if err != nil {
+   d.logger.Warn("Unable to load resolve file 
path", "file", file, "err", err)
+   return nil
+   }
+   if self != "" && self == abs {
+   d.logger.Warn("Not trying to load ourselves as 
a plugin", "file", file)
+   return nil
+   }
 
-   // TODO: Use a sync.WaitGroup to parallelize running multiple 
procs without blowing concurrency up and fork-bombing
-   // the host
-   bundle, err := d.getBundleVersionInfo(file)
-   if err != nil {
-   d.logger.Warn("Unable to load BundleMetadata", "file", 
file, "err", err)
-   continue
-   }
+   d.logger.Debug("Found potential bundle", 
slog.String("path", file))
 
-   versions, exists := d.bundles[bundle.Name]
-   if !exists {
-   versions = make(map[string]string)
-   d.bundles[bundle.Name] = versions
-   }
+   bundle, err := d.getBundleVersionInfo(file)
+   if err != nil {
+   d.logger.Warn("Unable to load BundleMetadata", 
"file", file, "err", err)
+   return nil
+   }
+   mu.Lock()
+   versions, exists := d.bundles[bundle.Name]
+   if !exists {
+   versions = make(map[string]string)
+   d.bundles[bundle.Name] = versions
+   }
 
-   var key string
-   logAs := bundle.Name
-   if bundle.Version != nil {
-   key = *bundle.Version
-   logAs = fmt.Sprintf("%s@%s", bundle.Name, key)
-   }
-   d.logger.Info(
-   "Discovered bundle",
-   "key",
-   logAs,
-   "file",
-   file,
-   )
-   versions[key] = file
+   var key string
+   logAs := bundle.Name
+