Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package rpm2docserv for openSUSE:Factory checked in at 2022-11-25 13:13:16 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/rpm2docserv (Old) and /work/SRC/openSUSE:Factory/.rpm2docserv.new.1597 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "rpm2docserv" Fri Nov 25 13:13:16 2022 rev:8 rq:1038048 version:20221125.be8d83b Changes: -------- --- /work/SRC/openSUSE:Factory/rpm2docserv/rpm2docserv.changes 2022-11-21 16:35:17.333052900 +0100 +++ /work/SRC/openSUSE:Factory/.rpm2docserv.new.1597/rpm2docserv.changes 2022-11-25 13:23:07.847671962 +0100 @@ -1,0 +2,6 @@ +Fri Nov 25 08:52:48 UTC 2022 - ku...@suse.com + +- Update to version 20221125.be8d83b: + * Split sitemap.xml into chunks, use yaml config + +------------------------------------------------------------------- Old: ---- rpm2docserv-20221121.c1d43dd.tar.xz New: ---- rpm2docserv-20221125.be8d83b.tar.xz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ rpm2docserv.spec ++++++ --- /var/tmp/diff_new_pack.7fmLiN/_old 2022-11-25 13:23:08.471675329 +0100 +++ /var/tmp/diff_new_pack.7fmLiN/_new 2022-11-25 13:23:08.475675351 +0100 @@ -17,7 +17,7 @@ Name: rpm2docserv -Version: 20221121.c1d43dd +Version: 20221125.be8d83b Release: 0 Summary: Make manpages from RPMs accessible in a web browser License: Apache-2.0 ++++++ _servicedata ++++++ --- /var/tmp/diff_new_pack.7fmLiN/_old 2022-11-25 13:23:08.519675588 +0100 +++ /var/tmp/diff_new_pack.7fmLiN/_new 2022-11-25 13:23:08.523675610 +0100 @@ -1,7 +1,7 @@ <servicedata> <service name="tar_scm"> <param name="url">https://github.com/thkukuk/rpm2docserv.git</param> - <param name="changesrevision">c1d43dd5f511041930af449d08a1c7e62baf8a29</param></service> + <param name="changesrevision">be8d83b7cad07b59db5563cc3cc29e392dd45e2d</param></service> </servicedata> (No newline at EOF) ++++++ rpm2docserv-20221121.c1d43dd.tar.xz -> rpm2docserv-20221125.be8d83b.tar.xz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/rpm2docserv-20221121.c1d43dd/cmd/docserv-sitemap/sitemap.go new/rpm2docserv-20221125.be8d83b/cmd/docserv-sitemap/sitemap.go --- old/rpm2docserv-20221121.c1d43dd/cmd/docserv-sitemap/sitemap.go 2022-11-21 15:41:21.000000000 +0100 +++ new/rpm2docserv-20221125.be8d83b/cmd/docserv-sitemap/sitemap.go 2022-11-25 09:47:26.000000000 +0100 @@ -10,8 +10,12 @@ "net/url" "os" "path/filepath" + "strconv" + "strings" "time" + "gopkg.in/yaml.v3" + "github.com/thkukuk/rpm2docserv/pkg/sitemap" "github.com/thkukuk/rpm2docserv/pkg/write" ) @@ -25,6 +29,10 @@ "/srv/docserv", "Directory in which to place the manpages which should be served") + yamlConfig = flag.String("config", + "", + "Configuration file in yaml format") + verbose = flag.Bool("verbose", false, "Print additional status messages") @@ -37,6 +45,42 @@ // use go build -ldflags "-X main.rpm2docservVersion=<version>" to set the version var rpm2docservVersion = "HEAD" +type Suites struct { + Name string `yaml:"name"` + Cache []string `yaml:"cache,omitempty"` + Packages []string `yaml:"packages,omitempty"` +} + +type Config struct { + ProductName string `yaml:"productname,omitempty"` + ProductUrl string `yaml:"producturl,omitempty"` + LogoUrl string `yaml:"logourl,omitempty"` + AssetsDir string `yaml:"assets,omitempty"` + ServingDir string `yaml:"servingdir"` + IndexPath string `yaml:"auxindex"` + Download string `yaml:"download"` + IsOffline bool `yaml:"offline,omitempty"` + BaseUrl string `yaml:"baseurl,omitempty"` + Products []Suites `yaml:"products"` + SortOrder []string `yaml:"sortorder"` +} + +func read_yaml_config(conffile string) (Config, error) { + + var config Config + + file, err := ioutil.ReadFile(conffile) + if err != nil { + return config, fmt.Errorf("Cannot read %q: %v", conffile, err) + } + err = yaml.Unmarshal(file, &config) + if err != nil { + return config, fmt.Errorf("Unmarshal error: %v", err) + } + + return config, nil +} + func main() { flag.Parse() @@ -45,6 +89,19 @@ return } + if len(*yamlConfig) > 0 { + config, err := read_yaml_config(*yamlConfig) + if err != nil { + log.Fatal(err) + } + if len(config.ServingDir) > 0 { + servingDir = &config.ServingDir + } + if len(config.BaseUrl) > 0 { + baseURL = &config.BaseUrl + } + } + if len(*baseURL) == 0 { log.Fatal("Usage: docserv-sitemap --base-url=<URL> [--serving-dir=<dir>]") } @@ -57,6 +114,29 @@ } } +func collectFiles(basedir string, dir string, sitemapEntries map[string]time.Time) error { + + fn := filepath.Join(basedir, dir) + entries, err := ioutil.ReadDir (fn) + if err != nil { + return fmt.Errorf("Cannot open %v: %v", fn, err) + } + + for _, bfn := range entries { + if bfn.IsDir() || + bfn.Name() == "sitemap.xml.gz" { + continue + } + + n := strings.TrimSuffix(bfn.Name(), ".gz") + + if filepath.Ext(n) == ".html" && !bfn.ModTime().IsZero() { + sitemapEntries[dir + "/" + n] = bfn.ModTime() + } + } + return nil +} + func walkDirs(dir string, baseURL string) error { sitemaps := make(map[string]time.Time) @@ -73,70 +153,88 @@ log.Printf("Searching in \"%v\"...", sfi.Name()) } + // openSUSE Tumbleweed has ~11000 package entries, 120000 should + // be good enough as start + sitemapEntries := make(map[string]time.Time, 120000) + fn := filepath.Join(*servingDir, sfi.Name()) - bins, err := os.Open(fn) + entrydirs, err := ioutil.ReadDir (fn) if err != nil { return fmt.Errorf("Cannot open %v: %v", fn, err) } - defer bins.Close() - - // openSUSE Tumbleweed has ~11000 package entries, 20000 should - // be good enough as start - sitemapEntries := make(map[string]time.Time, 20000) - for { - if *verbose { - log.Print("Calling Readdirnames...") + for _, bfn := range entrydirs { + if bfn.Name() == "sitemap.xml.gz" { + continue } - names, err := bins.Readdirnames(0) - if err != nil { - if err == io.EOF { - break + + if !bfn.ModTime().IsZero() { + if bfn.IsDir() { + collectFiles(fn, bfn.Name(), sitemapEntries) } else { - return fmt.Errorf ("Readdirnames failed: %v", err) + sitemapEntries[bfn.Name()] = bfn.ModTime() } } - if *verbose { - log.Printf("Readdirnames found %d entries...", len(names)) + + } + + + escapedUrlPath := &url.URL{Path: sfi.Name()} + if *verbose { + log.Printf("Writing %d entries to %s/%s", len(sitemapEntries), dir, escapedUrlPath) + } + + // Split sitemapEntries in smaller chunks + // Google has a limit of 50.000 entries per file + count := 0 + chunkSize := 45000 + batchKeys := make([]string, 0, chunkSize) + saveChunks := func() error { + chunk := make(map[string]time.Time, len(batchKeys)) + for _, v := range batchKeys { + chunk[v] = sitemapEntries[v] } + batchKeys = batchKeys[:0] - if len(names) == 0 { - break + sitemapPath := filepath.Join(dir, sfi.Name(), "sitemap" + strconv.Itoa(count) + ".xml.gz") + if *verbose { + log.Printf("Writing %d entries to %s", len(chunk), sitemapPath) + } + if err := write.Atomically(sitemapPath, true, func(w io.Writer) error { + return sitemap.WriteTo(w, baseURL+"/" + escapedUrlPath.String(), chunk) + }); err != nil { + return fmt.Errorf("Write sitemap for %v failed: %v", sfi.Name(), err) + } + st, err := os.Stat(sitemapPath) + if err == nil { + sitemaps[escapedUrlPath.String() + "/sitemap" + strconv.Itoa(count) + ".xml"] = st.ModTime() } + count++ - for _, bfn := range names { - if bfn == "sourcesWithManpages.txt.gz" || - bfn == "index.html.gz" || - bfn == "sitemap.xml.gz" || - bfn == ".nobackup" { - continue - } + return nil + } - fn := filepath.Join(dir, sfi.Name(), bfn) - fi, err := os.Stat(fn) + for k := range sitemapEntries { + batchKeys = append(batchKeys, k) + if len(batchKeys) == chunkSize { + err = saveChunks() if err != nil { - return fmt.Errorf("Stat(%v) failed: %v", fn, err) - } - - if !fi.ModTime().IsZero() { - sitemapEntries[bfn] = fi.ModTime() + return err } } } - bins.Close() - - sitemapPath := filepath.Join(dir, sfi.Name(), "sitemap.xml.gz") - escapedUrlPath := &url.URL{Path: sfi.Name()} - if err := write.Atomically(sitemapPath, true, func(w io.Writer) error { - return sitemap.WriteTo(w, baseURL+"/" + escapedUrlPath.String(), sitemapEntries) - }); err != nil { - return fmt.Errorf("Write sitemap for %v failed: %v", sfi.Name(), err) - } - st, err := os.Stat(sitemapPath) - if err == nil { - sitemaps[escapedUrlPath.String()] = st.ModTime() + // Process last, potentially incomplete batch + if len(batchKeys) > 0 { + err = saveChunks() + if err != nil { + return err + } } } + + if *verbose { + log.Printf("Writing %d entries to sitemapindex.xml", len(sitemaps)) + } return write.Atomically(filepath.Join(dir, "sitemapindex.xml.gz"), true, func(w io.Writer) error { return sitemap.WriteIndexTo(w, baseURL, sitemaps) }) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/rpm2docserv-20221121.c1d43dd/cmd/rpm2docserv/main.go new/rpm2docserv-20221125.be8d83b/cmd/rpm2docserv/main.go --- old/rpm2docserv-20221121.c1d43dd/cmd/rpm2docserv/main.go 2022-11-21 15:41:21.000000000 +0100 +++ new/rpm2docserv-20221125.be8d83b/cmd/rpm2docserv/main.go 2022-11-25 09:47:26.000000000 +0100 @@ -35,6 +35,7 @@ IndexPath string `yaml:"auxindex"` Download string `yaml:"download"` IsOffline bool `yaml:"offline,omitempty"` + BaseUrl string `yaml:"baseurl,omitempty"` Products []Suites `yaml:"products"` SortOrder []string `yaml:"sortorder"` } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/rpm2docserv-20221121.c1d43dd/pkg/sitemap/sitemap.go new/rpm2docserv-20221125.be8d83b/pkg/sitemap/sitemap.go --- old/rpm2docserv-20221121.c1d43dd/pkg/sitemap/sitemap.go 2022-11-21 15:41:21.000000000 +0100 +++ new/rpm2docserv-20221125.be8d83b/pkg/sitemap/sitemap.go 2022-11-25 09:47:26.000000000 +0100 @@ -40,14 +40,14 @@ if err := enc.EncodeToken(start); err != nil { return err } - pkgs := make([]string, 0, len(contents)) - for binarypkg := range contents { - pkgs = append(pkgs, binarypkg) + files := make([]string, 0, len(contents)) + for entry := range contents { + files = append(files, entry) } - sort.Strings(pkgs) - for _, binarypkg := range pkgs { + sort.Strings(files) + for _, binarypkg := range files { if err := enc.EncodeElement(&url{ - Loc: fmt.Sprintf("%s/%s/index.html", baseUrl, binarypkg), + Loc: fmt.Sprintf("%s/%s", baseUrl, binarypkg), Lastmod: contents[binarypkg].Format(sitemapDateFormat), }, xml.StartElement{Name: xml.Name{Local: "url"}}); err != nil { return err @@ -85,7 +85,7 @@ sort.Strings(pkgs) for _, suite := range pkgs { if err := enc.EncodeElement(&sitemap{ - Loc: fmt.Sprintf("%s/%s/sitemap.xml.gz", baseUrl, suite), + Loc: fmt.Sprintf("%s/%s", baseUrl, suite), Lastmod: contents[suite].Format(sitemapDateFormat), }, xml.StartElement{Name: xml.Name{Local: "sitemap"}}); err != nil { return err