Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package rpm2docserv for openSUSE:Factory 
checked in at 2022-11-25 13:13:16
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/rpm2docserv (Old)
 and      /work/SRC/openSUSE:Factory/.rpm2docserv.new.1597 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "rpm2docserv"

Fri Nov 25 13:13:16 2022 rev:8 rq:1038048 version:20221125.be8d83b

Changes:
--------
--- /work/SRC/openSUSE:Factory/rpm2docserv/rpm2docserv.changes  2022-11-21 
16:35:17.333052900 +0100
+++ /work/SRC/openSUSE:Factory/.rpm2docserv.new.1597/rpm2docserv.changes        
2022-11-25 13:23:07.847671962 +0100
@@ -1,0 +2,6 @@
+Fri Nov 25 08:52:48 UTC 2022 - ku...@suse.com
+
+- Update to version 20221125.be8d83b:
+  * Split sitemap.xml into chunks, use yaml config
+
+-------------------------------------------------------------------

Old:
----
  rpm2docserv-20221121.c1d43dd.tar.xz

New:
----
  rpm2docserv-20221125.be8d83b.tar.xz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ rpm2docserv.spec ++++++
--- /var/tmp/diff_new_pack.7fmLiN/_old  2022-11-25 13:23:08.471675329 +0100
+++ /var/tmp/diff_new_pack.7fmLiN/_new  2022-11-25 13:23:08.475675351 +0100
@@ -17,7 +17,7 @@
 
 
 Name:           rpm2docserv
-Version:        20221121.c1d43dd
+Version:        20221125.be8d83b
 Release:        0
 Summary:        Make manpages from RPMs accessible in a web browser
 License:        Apache-2.0

++++++ _servicedata ++++++
--- /var/tmp/diff_new_pack.7fmLiN/_old  2022-11-25 13:23:08.519675588 +0100
+++ /var/tmp/diff_new_pack.7fmLiN/_new  2022-11-25 13:23:08.523675610 +0100
@@ -1,7 +1,7 @@
 <servicedata>
   <service name="tar_scm">
     <param name="url">https://github.com/thkukuk/rpm2docserv.git</param>
-  <param 
name="changesrevision">c1d43dd5f511041930af449d08a1c7e62baf8a29</param></service>
+  <param 
name="changesrevision">be8d83b7cad07b59db5563cc3cc29e392dd45e2d</param></service>
 </servicedata>
 (No newline at EOF)
 

++++++ rpm2docserv-20221121.c1d43dd.tar.xz -> 
rpm2docserv-20221125.be8d83b.tar.xz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/rpm2docserv-20221121.c1d43dd/cmd/docserv-sitemap/sitemap.go 
new/rpm2docserv-20221125.be8d83b/cmd/docserv-sitemap/sitemap.go
--- old/rpm2docserv-20221121.c1d43dd/cmd/docserv-sitemap/sitemap.go     
2022-11-21 15:41:21.000000000 +0100
+++ new/rpm2docserv-20221125.be8d83b/cmd/docserv-sitemap/sitemap.go     
2022-11-25 09:47:26.000000000 +0100
@@ -10,8 +10,12 @@
        "net/url"
        "os"
        "path/filepath"
+       "strconv"
+       "strings"
        "time"
 
+       "gopkg.in/yaml.v3"
+
        "github.com/thkukuk/rpm2docserv/pkg/sitemap"
        "github.com/thkukuk/rpm2docserv/pkg/write"
 )
@@ -25,6 +29,10 @@
                 "/srv/docserv",
                 "Directory in which to place the manpages which should be 
served")
 
+       yamlConfig = flag.String("config",
+                "",
+                "Configuration file in yaml format")
+
         verbose = flag.Bool("verbose",
                 false,
                 "Print additional status messages")
@@ -37,6 +45,42 @@
 // use go build -ldflags "-X main.rpm2docservVersion=<version>" to set the 
version
 var rpm2docservVersion = "HEAD"
 
+type Suites struct {
+        Name     string   `yaml:"name"`
+        Cache    []string `yaml:"cache,omitempty"`
+        Packages []string `yaml:"packages,omitempty"`
+}
+
+type Config struct {
+        ProductName string `yaml:"productname,omitempty"`
+        ProductUrl  string `yaml:"producturl,omitempty"`
+        LogoUrl     string `yaml:"logourl,omitempty"`
+        AssetsDir   string `yaml:"assets,omitempty"`
+        ServingDir  string `yaml:"servingdir"`
+        IndexPath   string `yaml:"auxindex"`
+        Download    string `yaml:"download"`
+        IsOffline   bool   `yaml:"offline,omitempty"`
+        BaseUrl     string `yaml:"baseurl,omitempty"`
+        Products    []Suites `yaml:"products"`
+        SortOrder   []string `yaml:"sortorder"`
+}
+
+func read_yaml_config(conffile string) (Config, error) {
+
+        var config Config
+
+        file, err := ioutil.ReadFile(conffile)
+        if err != nil {
+                return config, fmt.Errorf("Cannot read %q: %v", conffile, err)
+        }
+        err = yaml.Unmarshal(file, &config)
+        if err != nil {
+                return config, fmt.Errorf("Unmarshal error: %v", err)
+        }
+
+        return config, nil
+}
+
 func main() {
        flag.Parse()
 
@@ -45,6 +89,19 @@
                return
         }
 
+        if len(*yamlConfig) > 0 {
+                config, err := read_yaml_config(*yamlConfig)
+                if err != nil {
+                        log.Fatal(err)
+                }
+               if len(config.ServingDir) > 0 {
+                        servingDir = &config.ServingDir
+                }
+               if len(config.BaseUrl) > 0 {
+                       baseURL = &config.BaseUrl
+                }
+       }
+
        if len(*baseURL) == 0 {
                log.Fatal("Usage: docserv-sitemap --base-url=<URL> 
[--serving-dir=<dir>]")
        }
@@ -57,6 +114,29 @@
        }
 }
 
+func collectFiles(basedir string, dir string, sitemapEntries 
map[string]time.Time) error {
+
+       fn := filepath.Join(basedir, dir)
+       entries, err := ioutil.ReadDir (fn)
+       if err != nil {
+               return fmt.Errorf("Cannot open %v: %v", fn, err)
+       }
+
+       for _, bfn := range entries {
+               if bfn.IsDir() ||
+                       bfn.Name() == "sitemap.xml.gz" {
+                       continue
+               }
+
+               n := strings.TrimSuffix(bfn.Name(), ".gz")
+
+               if filepath.Ext(n) == ".html" && !bfn.ModTime().IsZero() {
+                       sitemapEntries[dir + "/" + n] = bfn.ModTime()
+               }
+       }
+       return nil
+}
+
 func walkDirs(dir string, baseURL string) error {
        sitemaps := make(map[string]time.Time)
 
@@ -73,70 +153,88 @@
                        log.Printf("Searching in \"%v\"...", sfi.Name())
                }
 
+               // openSUSE Tumbleweed has ~11000 package entries, 120000 should
+               // be good enough as start
+               sitemapEntries := make(map[string]time.Time, 120000)
+
                fn := filepath.Join(*servingDir, sfi.Name())
-               bins, err := os.Open(fn)
+               entrydirs, err := ioutil.ReadDir (fn)
                if err != nil {
                        return fmt.Errorf("Cannot open %v: %v", fn, err)
                }
-               defer bins.Close()
-
-               // openSUSE Tumbleweed has ~11000 package entries, 20000 should
-               // be good enough as start
-               sitemapEntries := make(map[string]time.Time, 20000)
 
-               for {
-                       if *verbose {
-                               log.Print("Calling Readdirnames...")
+               for _, bfn := range entrydirs {
+                       if bfn.Name() == "sitemap.xml.gz" {
+                               continue
                        }
-                       names, err := bins.Readdirnames(0)
-                       if err != nil {
-                               if err == io.EOF {
-                                       break
+
+                       if !bfn.ModTime().IsZero() {
+                               if bfn.IsDir() {
+                                       collectFiles(fn, bfn.Name(), 
sitemapEntries)
                                } else {
-                                       return fmt.Errorf ("Readdirnames 
failed: %v", err)
+                                       sitemapEntries[bfn.Name()] = 
bfn.ModTime()
                                }
                        }
-                       if *verbose {
-                               log.Printf("Readdirnames found %d entries...", 
len(names))
+
+               }
+
+
+               escapedUrlPath := &url.URL{Path: sfi.Name()}
+               if *verbose {
+                       log.Printf("Writing %d entries to %s/%s", 
len(sitemapEntries), dir, escapedUrlPath)
+               }
+
+               // Split sitemapEntries in smaller chunks
+               // Google has a limit of 50.000 entries per file
+               count := 0
+               chunkSize := 45000
+               batchKeys := make([]string, 0, chunkSize)
+               saveChunks := func() error {
+                       chunk := make(map[string]time.Time, len(batchKeys))
+                       for _, v := range batchKeys {
+                               chunk[v] = sitemapEntries[v]
                        }
+                       batchKeys = batchKeys[:0]
 
-                       if len(names) == 0 {
-                               break
+                       sitemapPath := filepath.Join(dir, sfi.Name(), "sitemap" 
+ strconv.Itoa(count) + ".xml.gz")
+                       if *verbose {
+                               log.Printf("Writing %d entries to %s", 
len(chunk), sitemapPath)
+                       }
+                       if err := write.Atomically(sitemapPath, true, func(w 
io.Writer) error {
+                               return sitemap.WriteTo(w, baseURL+"/" + 
escapedUrlPath.String(), chunk)
+                       }); err != nil {
+                               return fmt.Errorf("Write sitemap for %v failed: 
%v", sfi.Name(), err)
+                       }
+                       st, err := os.Stat(sitemapPath)
+                       if err == nil {
+                               sitemaps[escapedUrlPath.String() + "/sitemap" + 
strconv.Itoa(count) + ".xml"] = st.ModTime()
                        }
+                       count++
 
-                       for _, bfn := range names {
-                               if bfn == "sourcesWithManpages.txt.gz" ||
-                                       bfn == "index.html.gz" ||
-                                       bfn == "sitemap.xml.gz" ||
-                                       bfn == ".nobackup" {
-                                       continue
-                               }
+                       return nil
+               }
 
-                               fn := filepath.Join(dir, sfi.Name(), bfn)
-                               fi, err := os.Stat(fn)
+               for k := range sitemapEntries {
+                       batchKeys = append(batchKeys, k)
+                       if len(batchKeys) == chunkSize {
+                               err = saveChunks()
                                if err != nil {
-                                       return fmt.Errorf("Stat(%v) failed: 
%v", fn, err)
-                               }
-
-                               if !fi.ModTime().IsZero() {
-                                       sitemapEntries[bfn] = fi.ModTime()
+                                       return err
                                }
                        }
                }
-               bins.Close()
-
-               sitemapPath := filepath.Join(dir, sfi.Name(), "sitemap.xml.gz")
-               escapedUrlPath := &url.URL{Path: sfi.Name()}
-               if err := write.Atomically(sitemapPath, true, func(w io.Writer) 
error {
-                       return sitemap.WriteTo(w, baseURL+"/" + 
escapedUrlPath.String(), sitemapEntries)
-               }); err != nil {
-                       return fmt.Errorf("Write sitemap for %v failed: %v", 
sfi.Name(), err)
-               }
-               st, err := os.Stat(sitemapPath)
-               if err == nil {
-                       sitemaps[escapedUrlPath.String()] = st.ModTime()
+               // Process last, potentially incomplete batch
+               if len(batchKeys) > 0 {
+                       err = saveChunks()
+                       if err != nil {
+                               return err
+                       }
                }
        }
+
+       if *verbose {
+               log.Printf("Writing %d entries to sitemapindex.xml", 
len(sitemaps))
+       }
        return write.Atomically(filepath.Join(dir, "sitemapindex.xml.gz"), 
true, func(w io.Writer) error {
                return sitemap.WriteIndexTo(w, baseURL, sitemaps)
        })
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/rpm2docserv-20221121.c1d43dd/cmd/rpm2docserv/main.go 
new/rpm2docserv-20221125.be8d83b/cmd/rpm2docserv/main.go
--- old/rpm2docserv-20221121.c1d43dd/cmd/rpm2docserv/main.go    2022-11-21 
15:41:21.000000000 +0100
+++ new/rpm2docserv-20221125.be8d83b/cmd/rpm2docserv/main.go    2022-11-25 
09:47:26.000000000 +0100
@@ -35,6 +35,7 @@
        IndexPath   string `yaml:"auxindex"`
        Download    string `yaml:"download"`
        IsOffline   bool   `yaml:"offline,omitempty"`
+       BaseUrl     string `yaml:"baseurl,omitempty"`
        Products    []Suites `yaml:"products"`
        SortOrder   []string `yaml:"sortorder"`
 }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/rpm2docserv-20221121.c1d43dd/pkg/sitemap/sitemap.go 
new/rpm2docserv-20221125.be8d83b/pkg/sitemap/sitemap.go
--- old/rpm2docserv-20221121.c1d43dd/pkg/sitemap/sitemap.go     2022-11-21 
15:41:21.000000000 +0100
+++ new/rpm2docserv-20221125.be8d83b/pkg/sitemap/sitemap.go     2022-11-25 
09:47:26.000000000 +0100
@@ -40,14 +40,14 @@
        if err := enc.EncodeToken(start); err != nil {
                return err
        }
-       pkgs := make([]string, 0, len(contents))
-       for binarypkg := range contents {
-               pkgs = append(pkgs, binarypkg)
+       files := make([]string, 0, len(contents))
+       for entry := range contents {
+               files = append(files, entry)
        }
-       sort.Strings(pkgs)
-       for _, binarypkg := range pkgs {
+       sort.Strings(files)
+       for _, binarypkg := range files {
                if err := enc.EncodeElement(&url{
-                       Loc:     fmt.Sprintf("%s/%s/index.html", baseUrl, 
binarypkg),
+                       Loc:     fmt.Sprintf("%s/%s", baseUrl, binarypkg),
                        Lastmod: contents[binarypkg].Format(sitemapDateFormat),
                }, xml.StartElement{Name: xml.Name{Local: "url"}}); err != nil {
                        return err
@@ -85,7 +85,7 @@
        sort.Strings(pkgs)
        for _, suite := range pkgs {
                if err := enc.EncodeElement(&sitemap{
-                       Loc:     fmt.Sprintf("%s/%s/sitemap.xml.gz", baseUrl, 
suite),
+                       Loc:     fmt.Sprintf("%s/%s", baseUrl, suite),
                        Lastmod: contents[suite].Format(sitemapDateFormat),
                }, xml.StartElement{Name: xml.Name{Local: "sitemap"}}); err != 
nil {
                        return err

Reply via email to