You have have multiple BaseX servers reading and writing to a single set of 
databases.

For example, in my web application I have five servers running: 1 to satisfy 
the web pages and 4 to perform queries. I had to implement my redirector to 
direct REST requests to the least-loaded server—maybe there’s a better way to 
do that that I’m not aware of?

I also depend heavily on indexes I build over my content to support specific 
queries (in my case, link where-used information) and make heavy use of 
attribute and token indexes.

Tamara Marnell recommends generating results on the server and then signalling 
the client (or responding to polls from the client) to return the result, 
avoiding having long-lived client-to-server HTTP connections—I haven’t had a 
chance to implement this approach, but I think it will help a lot once I can 
get to it.

Here’s my code (10.7) to get the least-loaded server port, which is used from 
my REST handler. If there’s a better way to do this with BaseX 10 I don’t know 
what it is—I put this together question to resolve an immediate performance 
issue. This is running on CentOS Linxu on an 8-core machine, so not the 
beefiest but it’s what I can get quickly from our internal IT group.

The REST handler handles an incoming URL and redirects it to the appropriate 
server port:

declare
  %rest:GET
  %rest:path('/now/rest/api')
  %output:method('xml')
function now:root(
) as item()* {
  let $incomingPort as xs:integer := request:port()
  let $debug := prof:dump(``[REST API Handler: Starting. Port 
`{$incomingPort}`]``)
  return
  if ($incomingPort eq $now:webserverPort)
  then now:redirectRestGet()
  else
  <mirabel timestamp="{current-dateTime()}">
    <databases>  {
      for $database in util:getFamilyDatabaseNames()
      order by $database descending
      return
      <database name="{$database}">
      </database>
  }</databases>
</mirabel>
};

The redirectRestGet() function (there’s a corresponding updating version for 
POST requests):

(:~
 : Redirect the request to the appropriate back-end worker based on reported
: CPU load.
:)
declare function now:redirectRestGet(
) as item()* {
  let $newURI as xs:string := now:getRedirectionTargetURI()
  let $msg := util:logToLog('now:redirectRestGet', ``[Redirecting to 
"`{$newURI}`"]``)
  return
  web:redirect($newURI)

};

(:~
 : Get the redirection target URI for a request
:)
declare function now:getRedirectionTargetURI() as xs:string {
  let $workerPort as xs:integer := now:getWorkerPort()
  let $newPort := $workerPort + $now:portOffset
  let $query as xs:string? := request:query()
  (: let $msg := prof:dump(``[redirectRestGet(): newPort="`{$newPort}`"]``) :)
  let $newURI as xs:string :=
      request:scheme() || ':'
      || '//' || request:hostname() || ':' || $newPort
      || request:path()
      || (if (exists($query)) then '?' || $query else ())
  return $newURI
};

(:~
 : Get the least-loaded worker port
:)
declare function now:getWorkerPort() as xs:integer {
  let $infos as map(*) := status:getServerInfos()
  let $portsToAvoid as xs:integer* := ($now:webserverPort - $now:portOffset)
  let $workerPort as xs:integer :=
    if (exists($infos?error))
    then
    let $msg := util:logToLog('now:redirectRestGet', ``[Error getting server 
infos: `{$infos?error}`]``)
    return 9984 (: This is a guess. Need to more reliably configure the base 
worker port. :)
    else
    let $lowestCPU as xs:decimal? := ($infos?data?*?pcpu ! xs:decimal(.) => 
min())
    let $candPorts as xs:integer* := ($infos?data?*[xs:decimal(?pcpu) eq 
$lowestCPU][not(xs:integer(?port) = $portsToAvoid)]?port ! xs:integer(.))
    (: let $msg := prof:dump(``[candPorts=`{$candPorts}`]``) :)
    return
    (: For reasons that are not clear, sometimes $candPorts is an empty list.
       Seeing of just recursing after a 1/2 second wait is enough to solve it 
or if we need to do more.
    :)
    if (count($candPorts) eq 0)
    then (()
      ,prof:dump(``[[WARN] now:getWorkerPort(): Got empty $candPorts list. 
Recursing...]``)
      ,prof:sleep(2000)
      ,now:getWorkerPort())
    else
    if (count($candPorts) gt 1)
    then $candPorts[(random-number-generator()?permute(1 to 
count($candPorts)))[1]]
    else $candPorts[1]
  return $workerPort
};

And here’s my server-status module that implements getServerInfos():

(:~
 : Gets server status information
:)
module namespace status=http://servicenow.com/xquery/module/server-status;

declare function status:isMacos() as xs:boolean {
  let $cmdResult := proc:execute('uname')
  let $result as xs:boolean := normalize-space($cmdResult/output/text()) eq 
'Darwin'
  return $result
};

(:~
 : Get the CPU usage percentage for the specified process ID.
: @param pids The process IDs to get the percentages for
: @return map:
     map{
       'error' : $errorMessage,
       'data' map{
         $pid : map{
           'pid' : $pid,
           'pcpu': $percent
         }
       }
     }
:)
declare function status:getCpuPercentForPIDs($pids as xs:string*) as map(*) {
  let $isMacos as xs:boolean := status:isMacos()
  let $cmd as xs:string := 'top'
  (: Get the data for each of the PIDs, using batch mode and 1 iteration: :)
  let $parameters :=
  if ($isMacos)
  then ('-l', '1', '-s', '0', '-stats', 'pid,cpu')
  else (for $pid in $pids return ('-p', $pid), '-b', '-n', '1')
  (: let $msg := prof:dump(``[status:getCpuPercentForPIDs(): `{$cmd}` 
`{string-join($parameters, ' ')}`]``) :)
  let $cmdResult as element() := (proc:execute($cmd, $parameters))
  return
  if (exists($cmdResult/error))
  then
    let $msg := () (: prof:dump(``[status:getCpuPercentForPIDs(): Error: 
`{$cmdResult/error/text()}`]``) :)
    return
    map{
      'error' : $cmdResult/error/text()
    }
  else if (empty($pids))
  then
    map{
      'error' : 'No PIDs provided'
    }
  else
    map{
      'data' :
      (: The linux version of top doesn't have the equivalent of the macOS 
-stats parameter, so we get everything: :)
      (: 25176 eliot.k+  20   0 8076608   2.1g  21352 S   0.0 13.7 132:26.69 
java :)
      let $lines as xs:string* := (($cmdResult/output/text() => 
tokenize('&#x0a;')) ! normalize-space(.))
      let $matchPattern as xs:string := ``[^(`{$pids => 
string-join('|')}`)\s.+]``
      (: The last line will be a blank line, so remove it: :)
      let $lines := ($lines => reverse() => tail())[matches(., $matchPattern)]
      (: let $msg := (prof:dump('Filtered Lines:'), prof:dump($lines)) :)
      return
      map:merge(
        for $line in $lines
        let $tokens as xs:string* := tokenize($line, '\s+')
        let $pid as xs:string? := $tokens[1]
        let $percent as xs:string := if ($isMacos) then $tokens[2] else 
$tokens[9]
        return
        map{
          $pid :
            map{
              'pid' : $pid,
              'pcpu' : $percent
            }
        }
      )
    }
};

(:~
 : Get a map of server port to PID maps.
: @return A map with the structure:
:
: map{
     'error' : "error message",
     'data' : map{
        $port : map{
          'port' : $port,
          'pid' : $pid,
          'pcpu' : '-1'
        }
      }
   }
:)
declare function status:getServerPIDInfos() as map(*) {
  let $isMacos as xs:boolean := status:isMacos()
  let $cmd as xs:string :=
    if ($isMacos)
    then 'pgrep'
    else 'ps'
  let $parameters :=
    if ($isMacos)
    then ('-lf', 'BaseX')
    else ('-e', '-o', 'pid,command')
  (: let $msg := prof:dump(``[status:getServerPIDInfos(): `{$cmd}` 
`{string-join($parameters, ' ')}`]``) :)
  let $cmdResult as element() := (proc:execute($cmd, $parameters))
  (: let $msg := (prof:dump(``[status:getServerPIDInfos(): cmdResult:]``), 
prof:dump($cmdResult)) :)
  return
  if (exists($cmdResult/error))
  then
  let $msg := (: prof:dump(``[Error from command: 
`{$cmdResult/error/text()}`]``) :) ()
  return map{
    'error' : $cmdResult/error/text()
  }
  else
  let $lines as xs:string* := ($cmdResult/output/text() => tokenize('&#x0a;'))
  (: let $msg := (prof:dump(``[status:getServerPIDInfos(): raw lines:]``), 
($lines ! prof:dump(.))) :)
  let $lines as xs:string* := ($lines)[contains(., 'BaseX')]
  (: let $msg := (prof:dump(``[status:getServerPIDInfos(): filtered lines:]``), 
($lines ! prof:dump(.))) :)
  (: Lines should be like:


   32725 svc_sec+  20   0  127624   3468   1700 S   0.0  0.0   0:00.10 bash

  :)
  return
  map{
    'data' :
      let $servers as map(*) := map:merge(
        for $line at $p in $lines[contains(., 'BaseXHTTP')]
        let $pid as xs:string := tokenize(normalize-space($line), '\s+')[1]
        let $port as xs:string := tokenize(substring-after($line, ' 
-p'),'\s+')[1]
        (: let $msg := (prof:dump(``[status:getServerPIDInfos(): port: 
`{$port}`, pid: `{$pid}`]``)) :)
        return map{ $port :
                      map{
                        'port' : $port,
                        'pid'  : $pid,
                        'pcpu' : '-1.0'
                      }
                  }
      )
      return $servers
  }
};

(:~
 : Get a map of server information maps with CPU percentages, one for each 
running BaseX server.
: @return A map with the structure:
:
: map{
     'error' : "error message",
     'data' : map{
        $port : map{
          'port' : $port,
          'pid' : $pid,
          'pcpu' : $cpuPercent
        }
      }
   }
:)
declare function status:getServerInfos() as map(*) {
  let $isMacos as xs:boolean := status:isMacos()
  let $serverInfo as map(*) := status:getServerPIDInfos()
  return
  if (exists($serverInfo?error))
  then $serverInfo
  else
  let $data as map(*) := $serverInfo?data
  (: let $msg := (prof:dump(``[status:getServerInfos(): data:]``), 
prof:dump($data)) :)
  let $pids as xs:string* := $data?*?pid
  let $cpuData as map(*)? := status:getCpuPercentForPIDs($pids)
  return
  map{
    'data' :
      let $servers as map(*) := map:merge(
        for $serverMap as map(*)? in $data?*
        let $pid := $serverMap?pid
        let $cpuPercent as xs:string := ((if (exists($cpuData?data)) then 
$cpuData?data($pid)?pcpu else ()), '-1.0')[1]
        return
        map{ $serverMap?port :
          map:put($serverMap, 'pcpu', $cpuPercent)
        }
      )
      return $servers
  }
};

Cheers,

Eliot
_____________________________________________
Eliot Kimber
Sr Staff Content Engineer
Digital Content & Design
O: 512 554 9368
M: 512 554 9368
servicenow.com<https://www.servicenow.com>
LinkedIn<https://www.linkedin.com/company/servicenow> | 
Twitter<https://twitter.com/servicenow> | 
YouTube<https://www.youtube.com/user/servicenowinc> | 
Facebook<https://www.facebook.com/servicenow>

From: BaseX-Talk <basex-talk-boun...@mailman.uni-konstanz.de> on behalf of 
Csaba Fekete <feketecs...@gmail.com>
Date: Monday, July 29, 2024 at 9:26 AM
To: BaseX <basex-talk@mailman.uni-konstanz.de>
Subject: [basex-talk] Performance & scaling
[External Email]

________________________________
HiI have read the Advanced User Guide section in the documentation and I got a 
lot of useful information but I'm still a little bit stuck. I have large (1 
GB+) mostly single document databases and complex queries that often take too 
long to execute. Apart from optimizing the  ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 
‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌

!
This message could be suspicious
• Similar name as someone you've contacted.
• This is a personal email address.
Provided by ServiceNow DT (Employee Portal KB0077950) - This banner is visible 
only to ServiceNow employees.<https://mimecast.com>
Hi

I have read the Advanced User Guide section in the documentation and I got a 
lot of useful information but I'm still a little bit stuck. I have large (1 
GB+) mostly single document databases and complex queries that often take too 
long to execute. Apart from optimizing the queries, what are my options to 
speed things up? Any ideas are welcome.

1) Vertical scaling - how does a single BaseX instance respond to adding more 
CPUs or RAM? Which one could improve performance? Apart from indexing, are 
there any fine-tuning options that could help improve performance?

2) Partitioning: would it make sense to break my documents into several smaller 
ones, run the queries against them in parallel and then merge the results? Has 
anyone done such optimization? Did it work?

2) Horizontal scaling: the docs read: "The BaseX client-server architecture 
offers ACID-safe transactions, with multiple readers and writers." - Where can 
I get a guide on how to set up a single writer - multiple readers architecture?

Thank you
Csaba

Reply via email to