Copilot commented on code in PR #255:
URL: https://github.com/apache/skywalking-eyes/pull/255#discussion_r2636257265


##########
pkg/deps/ruby.go:
##########
@@ -116,12 +133,104 @@ func (r *GemfileLockResolver) Resolve(lockfile string, 
config *ConfigDeps, repor
        return nil
 }
 
+// GemspecResolver resolves dependencies from a .gemspec file.
+// It extracts runtime dependencies defined in the gemspec and recursively 
resolves
+// their transitive dependencies by looking up installed gems in the local 
environment.
+type GemspecResolver struct {
+       Resolver
+}
+
+// CanResolve checks if the given file is a .gemspec file.
+func (r *GemspecResolver) CanResolve(file string) bool {
+       return strings.HasSuffix(file, ".gemspec")
+}
+
+// Resolve parses the gemspec file, identifies runtime dependencies, and 
resolves
+// them along with their transitive dependencies. It reports the found 
dependencies
+// and their licenses.
+func (r *GemspecResolver) Resolve(file string, config *ConfigDeps, report 
*Report) error {
+       f, err := os.Open(file)
+       if err != nil {
+               return err
+       }
+       defer f.Close()
+
+       scanner := bufio.NewScanner(f)
+       deps := make(map[string]string) // name -> version constraint
+       for scanner.Scan() {
+               line := scanner.Text()
+               trimLeft := strings.TrimLeft(line, " \t")
+               if strings.HasPrefix(trimLeft, "#") {
+                       continue
+               }
+               if m := gemspecRuntimeRe.FindStringSubmatch(line); len(m) == 2 {
+                       deps[m[1]] = "" // We don't extract version constraint 
yet, or we could improve regex
+               }
+       }
+       if err := scanner.Err(); err != nil {
+               return err
+       }
+
+       // Recursive resolution
+       queue := make([]string, 0, len(deps))
+       visited := make(map[string]struct{}, len(deps))
+       for name := range deps {
+               queue = append(queue, name)
+               visited[name] = struct{}{}
+       }
+
+       for i := 0; i < len(queue); i++ {
+               name := queue[i]
+               // Find installed gemspec for 'name'
+               path, err := findInstalledGemspec(name, "")
+               if err == nil && path != "" {
+                       // Parse dependencies of this gemspec
+                       newDeps, err := parseGemspecDependencies(path)
+                       if err == nil {
+                               for _, dep := range newDeps {
+                                       if _, ok := visited[dep]; !ok {
+                                               visited[dep] = struct{}{}
+                                               queue = append(queue, dep)
+                                               if _, ok := deps[dep]; !ok {
+                                                       deps[dep] = ""
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }

Review Comment:
   The BFS implementation for transitive dependency resolution (lines 182-201) 
could potentially create an infinite loop if there's a circular dependency in 
the gem ecosystem. While the `visited` map prevents revisiting the same gem 
name, consider adding a depth limit or maximum queue size check to prevent 
excessive resource consumption in pathological cases. For example, if gem A 
depends on B, B depends on C, and C depends on A (circular), the current code 
handles it correctly, but adding a safety check would make this more robust: 
`if len(queue) > 10000 { return fmt.Errorf("dependency graph too large") }`



##########
pkg/deps/ruby_test.go:
##########
@@ -117,6 +117,81 @@ func TestRubyGemfileLockResolver(t *testing.T) {
                        t.Fatalf("expected 1 dependency for library, got %d", 
len(report.Resolved)+len(report.Skipped))
                }
        }
+
+       // Citrus case: library with gemspec, no runtime deps
+       {
+               tmp := t.TempDir()
+               if err := copyRuby("testdata/ruby/citrus", tmp); err != nil {
+                       t.Fatal(err)
+               }
+               lock := filepath.Join(tmp, "Gemfile.lock")
+               if !resolver.CanResolve(lock) {
+                       t.Fatalf("GemfileLockResolver cannot resolve %s", lock)
+               }
+               cfg := &ConfigDeps{Files: []string{lock}}
+               report := Report{}
+               if err := resolver.Resolve(lock, cfg, &report); err != nil {
+                       t.Fatal(err)
+               }
+               // Should have 0 dependencies because citrus has no runtime deps
+               if len(report.Resolved)+len(report.Skipped) != 0 {
+                       t.Fatalf("expected 0 dependencies, got %d", 
len(report.Resolved)+len(report.Skipped))
+               }
+       }
+
+       // Local dependency case: App depends on local gem (citrus)
+       {
+               tmp := t.TempDir()
+               if err := copyRuby("testdata/ruby/local_dep", tmp); err != nil {
+                       t.Fatal(err)
+               }
+               lock := filepath.Join(tmp, "Gemfile.lock")
+               if !resolver.CanResolve(lock) {
+                       t.Fatalf("GemfileLockResolver cannot resolve %s", lock)
+               }
+               cfg := &ConfigDeps{Files: []string{lock}}
+               report := Report{}
+               if err := resolver.Resolve(lock, cfg, &report); err != nil {
+                       t.Fatal(err)
+               }
+
+               // We expect citrus to be resolved with MIT license.
+               // This validates that local path dependencies are correctly 
resolved.
+               found := false
+               for _, r := range report.Resolved {
+                       if r.Dependency == "citrus" {
+                               found = true
+                               if r.LicenseSpdxID != "MIT" {
+                                       t.Errorf("expected MIT license for 
citrus, got %s", r.LicenseSpdxID)
+                               }
+                       }
+               }
+               // Also check Skipped if it failed
+               for _, r := range report.Skipped {
+                       if strings.HasPrefix(r.Dependency, "citrus") {
+                               // Historically citrus landed here; keep this 
check to ensure it no longer appears in Skipped with Unknown license.
+                               t.Logf("citrus found in Skipped with license 
%s", r.LicenseSpdxID)
+                               if r.LicenseSpdxID == "Unknown" {
+                                       t.Errorf("citrus license is Unknown")
+                               }
+                       }
+               }
+
+               if !found {
+                       // If it's in Skipped, found is false.
+                       // We want it to be in Resolved.
+                       // But for now, let's just assert it is present in 
either.
+                       inSkipped := false
+                       for _, r := range report.Skipped {
+                               if strings.HasPrefix(r.Dependency, "citrus") {
+                                       inSkipped = true
+                               }
+                       }
+                       if !inSkipped {
+                               t.Fatal("expected citrus to be in the report")
+                       }
+               }

Review Comment:
   The comment on line 172 contains a typo: "Historically citrus landed here; 
keep this check to ensure it no longer appears in Skipped with Unknown 
license." This comment suggests the test is checking for a regression, but the 
logic seems contradictory. If we expect citrus to be in Resolved (line 
160-167), why do we then check if it's in Skipped and log that as acceptable? 
The test logic should be clarified: either citrus should always be in Resolved 
(fail if in Skipped), or it's acceptable in either location. The current 
implementation accepts both but prefers Resolved, which may mask issues.



##########
pkg/deps/ruby.go:
##########
@@ -116,12 +133,104 @@ func (r *GemfileLockResolver) Resolve(lockfile string, 
config *ConfigDeps, repor
        return nil
 }
 
+// GemspecResolver resolves dependencies from a .gemspec file.
+// It extracts runtime dependencies defined in the gemspec and recursively 
resolves
+// their transitive dependencies by looking up installed gems in the local 
environment.
+type GemspecResolver struct {
+       Resolver
+}
+
+// CanResolve checks if the given file is a .gemspec file.
+func (r *GemspecResolver) CanResolve(file string) bool {
+       return strings.HasSuffix(file, ".gemspec")
+}
+
+// Resolve parses the gemspec file, identifies runtime dependencies, and 
resolves
+// them along with their transitive dependencies. It reports the found 
dependencies
+// and their licenses.
+func (r *GemspecResolver) Resolve(file string, config *ConfigDeps, report 
*Report) error {
+       f, err := os.Open(file)
+       if err != nil {
+               return err
+       }
+       defer f.Close()
+
+       scanner := bufio.NewScanner(f)
+       deps := make(map[string]string) // name -> version constraint
+       for scanner.Scan() {
+               line := scanner.Text()
+               trimLeft := strings.TrimLeft(line, " \t")
+               if strings.HasPrefix(trimLeft, "#") {
+                       continue
+               }
+               if m := gemspecRuntimeRe.FindStringSubmatch(line); len(m) == 2 {
+                       deps[m[1]] = "" // We don't extract version constraint 
yet, or we could improve regex

Review Comment:
   The comment states "We don't extract version constraint yet, or we could 
improve regex". This suggests incomplete functionality - the version constraint 
information is being discarded but might be needed for proper dependency 
resolution. If version constraints are important for matching installed gems or 
resolving conflicts, this limitation should either be addressed or documented 
more clearly as a known limitation with potential implications.



##########
pkg/deps/ruby.go:
##########
@@ -104,7 +108,20 @@ func (r *GemfileLockResolver) Resolve(lockfile string, 
config *ConfigDeps, repor
                        continue
                }
 
-               licenseID, err := fetchRubyGemsLicense(name, version)
+               if localPath != "" {
+                       fullPath := filepath.Join(dir, localPath)
+                       license, err := fetchLocalLicense(fullPath, name)
+                       if err == nil && license != "" {
+                               report.Resolve(&Result{Dependency: name, 
LicenseSpdxID: license, Version: version})
+                               continue

Review Comment:
   The localPath value comes from parsing the Gemfile.lock remote field (line 
318) and is used directly in filepath.Join without validation (line 112). While 
Gemfile.lock is typically a trusted file generated by Bundler, a malicious or 
corrupted lockfile could contain path traversal sequences like 
"../../../etc/passwd". Consider validating that the resolved path stays within 
expected boundaries or at least doesn't traverse upward beyond the project 
directory. Use filepath.Clean and check if the result starts with the expected 
base directory.
   ```suggestion
                        baseDir, err := filepath.Abs(dir)
                        if err != nil {
                                logrus.WithError(err).Warn("failed to resolve 
base directory for local gem path")
                        } else {
                                candidatePath := 
filepath.Clean(filepath.Join(baseDir, localPath))
                                if candidatePath == baseDir || 
strings.HasPrefix(candidatePath, baseDir+string(os.PathSeparator)) {
                                        fullPath := candidatePath
                                        license, err := 
fetchLocalLicense(fullPath, name)
                                        if err == nil && license != "" {
                                                
report.Resolve(&Result{Dependency: name, LicenseSpdxID: license, Version: 
version})
                                                continue
                                        }
                                } else {
                                        logrus.WithField("path", 
localPath).Warn("ignoring potentially unsafe local gem path outside project 
directory")
                                }
   ```



##########
pkg/deps/ruby.go:
##########
@@ -104,7 +108,20 @@ func (r *GemfileLockResolver) Resolve(lockfile string, 
config *ConfigDeps, repor
                        continue
                }
 
-               licenseID, err := fetchRubyGemsLicense(name, version)
+               if localPath != "" {
+                       fullPath := filepath.Join(dir, localPath)
+                       license, err := fetchLocalLicense(fullPath, name)
+                       if err == nil && license != "" {
+                               report.Resolve(&Result{Dependency: name, 
LicenseSpdxID: license, Version: version})
+                               continue

Review Comment:
   When fetchLocalLicense fails (line 113), the error is silently ignored and 
the code falls through to check installed gems and the RubyGems API. While this 
provides graceful degradation, it could mask issues like permission errors or 
path traversal attempts. Consider logging the error at debug level to aid 
troubleshooting, especially since this is a new feature for local path 
dependencies. For example: `if err != nil { logrus.Debugf("Failed to fetch 
local license for %s at %s: %v", name, fullPath, err) }`
   ```suggestion
                                continue
                        } else if err != nil {
                                logrus.Debugf("Failed to fetch local license 
for %s at %s: %v", name, fullPath, err)
   ```



##########
pkg/deps/ruby.go:
##########
@@ -116,12 +133,104 @@ func (r *GemfileLockResolver) Resolve(lockfile string, 
config *ConfigDeps, repor
        return nil
 }
 
+// GemspecResolver resolves dependencies from a .gemspec file.
+// It extracts runtime dependencies defined in the gemspec and recursively 
resolves
+// their transitive dependencies by looking up installed gems in the local 
environment.
+type GemspecResolver struct {
+       Resolver
+}
+
+// CanResolve checks if the given file is a .gemspec file.
+func (r *GemspecResolver) CanResolve(file string) bool {
+       return strings.HasSuffix(file, ".gemspec")
+}
+
+// Resolve parses the gemspec file, identifies runtime dependencies, and 
resolves
+// them along with their transitive dependencies. It reports the found 
dependencies
+// and their licenses.
+func (r *GemspecResolver) Resolve(file string, config *ConfigDeps, report 
*Report) error {
+       f, err := os.Open(file)
+       if err != nil {
+               return err
+       }
+       defer f.Close()
+
+       scanner := bufio.NewScanner(f)
+       deps := make(map[string]string) // name -> version constraint
+       for scanner.Scan() {
+               line := scanner.Text()
+               trimLeft := strings.TrimLeft(line, " \t")
+               if strings.HasPrefix(trimLeft, "#") {
+                       continue
+               }
+               if m := gemspecRuntimeRe.FindStringSubmatch(line); len(m) == 2 {
+                       deps[m[1]] = "" // We don't extract version constraint 
yet, or we could improve regex
+               }
+       }
+       if err := scanner.Err(); err != nil {
+               return err
+       }
+
+       // Recursive resolution
+       queue := make([]string, 0, len(deps))
+       visited := make(map[string]struct{}, len(deps))
+       for name := range deps {
+               queue = append(queue, name)
+               visited[name] = struct{}{}
+       }
+
+       for i := 0; i < len(queue); i++ {
+               name := queue[i]
+               // Find installed gemspec for 'name'
+               path, err := findInstalledGemspec(name, "")
+               if err == nil && path != "" {
+                       // Parse dependencies of this gemspec
+                       newDeps, err := parseGemspecDependencies(path)
+                       if err == nil {
+                               for _, dep := range newDeps {
+                                       if _, ok := visited[dep]; !ok {
+                                               visited[dep] = struct{}{}
+                                               queue = append(queue, dep)
+                                               if _, ok := deps[dep]; !ok {
+                                                       deps[dep] = ""
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+
+       for name, version := range deps {
+               if exclude, _ := config.IsExcluded(name, version); exclude {
+                       continue
+               }
+               if l, ok := config.GetUserConfiguredLicense(name, version); ok {
+                       report.Resolve(&Result{Dependency: name, LicenseSpdxID: 
l, Version: version})
+                       continue
+               }
+
+               // Assume remote dependency
+               licenseID := fetchInstalledLicense(name, version)
+               var err error
+               if licenseID == "" {
+                       licenseID, err = fetchRubyGemsLicense(name, version)
+               }

Review Comment:
   The comment on line 212 states "Assume remote dependency" but this is placed 
after checking `fetchInstalledLicense`, which looks for locally installed gems. 
This comment is misleading - at this point we're checking both installed and 
remote licenses. The comment should be removed or clarified to say something 
like "Check installed gems first, then fallback to RubyGems API".



##########
pkg/deps/ruby.go:
##########
@@ -214,6 +370,10 @@ func hasGemspec(dir string) bool {
 }
 
 var gemspecRuntimeRe = 
regexp.MustCompile(`\badd_(?:runtime_)?dependency\s*\(?\s*["']([^"']+)["']`)
+var gemspecLicenseRe = regexp.MustCompile(`\.licenses?\s*=\s*([^#]*)`)

Review Comment:
   The regex pattern `\.licenses?\s*=\s*([^#]*)` used in gemspecLicenseRe will 
capture everything until a `#` character or end of line, including potential 
trailing whitespace, commas, and other characters. This could lead to capturing 
invalid content like `s.licenses = ['MIT'] # comment` capturing `['MIT'] ` 
(with trailing space). While the gemspecStringRe is used to extract quoted 
strings, the captured group `[^#]*` is overly permissive. Consider using a more 
specific pattern like `\.licenses?\s*=\s*(\[[^\]]*\]|['"][^'"]*['"])` to match 
arrays or single strings more precisely.
   ```suggestion
   var gemspecLicenseRe = 
regexp.MustCompile(`\.licenses?\s*=\s*(\[[^\]]*\]|['"][^'"]*['"])`)
   ```



##########
pkg/deps/ruby.go:
##########
@@ -136,68 +245,115 @@ func parseGemfileLock(s string) (graph gemGraph, roots 
[]string, err error) {
        scanner.Split(bufio.ScanLines)
        graph = make(gemGraph)
 
-       inSpecs := false
-       inDeps := false
-       var current *gemSpec
+       state := &lockParserState{
+               graph: graph,
+       }
 
        for scanner.Scan() {
-               line := scanner.Text()
-               if strings.HasPrefix(line, "GEM") {
-                       inSpecs = true
-                       inDeps = false
-                       current = nil
-                       continue
-               }
-               if strings.HasPrefix(line, "DEPENDENCIES") {
-                       inSpecs = false
-                       inDeps = true
-                       current = nil
-                       continue
-               }
-               if strings.TrimSpace(line) == "specs:" && inSpecs {
-                       // just a marker
-                       continue
-               }
+               state.processLine(scanner.Text())
+       }
+       if err := scanner.Err(); err != nil {
+               return nil, nil, err
+       }
+       return graph, state.roots, nil
+}
 
-               if inSpecs {
-                       if m := lockSpecHeader.FindStringSubmatch(line); len(m) 
== 3 {
-                               name := m[1]
-                               version := m[2]
-                               current = &gemSpec{Name: name, Version: version}
-                               graph[name] = current
-                               continue
-                       }
-                       if current != nil {
-                               if m := lockDepLine.FindStringSubmatch(line); 
len(m) == 2 {
-                                       depName := m[1]
-                                       current.Deps = append(current.Deps, 
depName)
-                               }
-                       }
-                       continue
+type lockParserState struct {
+       inSpecs           bool
+       inDeps            bool
+       inPath            bool
+       current           *gemSpec
+       currentRemotePath string

Review Comment:
   The variable name `currentRemotePath` is misleading. In the context of a 
PATH block in Gemfile.lock, the "remote:" field specifies a local filesystem 
path (like "." or "citrus"), not a remote URL. A more accurate name would be 
`currentLocalPath` or `currentPathSpec`. This would better reflect that it's 
capturing local filesystem paths for local dependencies, not remote repository 
URLs.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to