there are a couple of issues with your test code; to name just a few:
- you're obviously measuring the time to execute the main method;
this includes parsing and iterating repeatedly of a large xml file.
i don't know xom but i assume that it is similar to jdom; now
building an entire dom tree in memory might be convenient for
the programmer but it is certainly not the most efficient way of
handling large xml data...
- the code that you provided has lots of room for improvement ;)
just an example:
return meta.getAttributeValue("value").trim().replaceAll(":",
"!").replaceAll("'", "!").replaceAll("\\/", "!")
.replaceAll("\"", "!").replaceAll("\\*", "!");
- you're calling session.save() every time you've added 10 nodes;
save() is an expensive operation; calling save() e.g. every 1000 nodes
is much more efficient
- you're doing *lots* of extensive string operations and dom tree traversals...
i'd guess that 99.9% of your claimed 20 minutes is spent in xml parsing,
string operations, etc etc, *not* in repositroy write operations.
the following quick test run on my local machine (2.8 ghz pentium)
took about 60s for adding 10k nodes saved in batches of 1000:
parent = root.addNode("foo", "nt:unstructured");
long t0 = System.currentTimeMillis();
for (int i = 1; i <= 10000; i++) {
parent.addNode("foo" + i);
if (i % 1000 == 0) {
root.save();
long t1 = System.currentTimeMillis();
System.out.println("adding 1000 nodes took " +
(t1 - t0) + "ms");
t0 = System.currentTimeMillis();
}
}
On 5/10/06, Eugeny N Dzhurinsky <[EMAIL PROTECTED]> wrote:
On Wed, May 10, 2006 at 02:12:19PM +0200, Stefan Guggisberg wrote:
> >I have almost same results with DerbyPersistenceManager for now. It takes
> >about 15-20 minutes to flush... Any ideas?
>
> some guesses:
> - did you start with an empty repository? note that modifying the
> <Workspace>
> element in repsitory.xml does not affect existing workspace.xml files.
I removing entire repository directory contents
> - is your jvm heap size appropriate?
-Xms128m -Xmx512m
> - how do you import those nodes? can you provide a test case?
Well, it's not a true test case, but it should give some imagination. We are
parsing some large XML file (~ 20 megabytes) and adding nodes to repository.
package tests;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Stack;
import javax.jcr.Node;
import javax.jcr.Repository;
import javax.jcr.Session;
import javax.jcr.SimpleCredentials;
import nu.xom.Builder;
import nu.xom.Document;
import nu.xom.Element;
import nu.xom.Elements;
import org.apache.jackrabbit.core.NodeImpl;
import org.apache.jackrabbit.uuid.UUID;
import org.apache.log4j.Logger;
import cms.auth.FileJRGroupBuilder;
import cms.auth.JRGroup;
import cms.helper.ConfigurationHelper;
import cms.helper.ResourceCreationHelper;
import cms.security.NodeACL;
import cms.security.NodeACLFactory;
import cms.security.TransientRepositoryFactory;
public class UserFileParsing {
private List groups;
private Stack uuids;
private int last_lenght;
private Node root_node;
private static List unuser_fields;
private int record_count;
Logger log = Logger.getLogger(UserFileParsing.class.getName());
static {
unuser_fields = new LinkedList();
unuser_fields.add("objectname");
}
class IdStructure {
private String UUID;
private String code;
public IdStructure(String UUID, String code) {
this.UUID = UUID;
this.code = code;
}
public String getCode() {
return code;
}
public String getUUID() {
return UUID;
}
public String toString() {
return "[" + UUID + ":" + code + "]";
}
};
public UserFileParsing() throws Exception {
groups = new LinkedList();
uuids = new Stack();
last_lenght = 0;
record_count = 0;
}
private void insertGroups() {
FileJRGroupBuilder builder = new FileJRGroupBuilder();
Iterator it = groups.iterator();
while (it.hasNext()) {
String gr_name = (String) it.next();
if (gr_name.length() > 0)
try {
builder.addGroup(gr_name);
// Thread.sleep(1);
} catch (Exception e) {
log.error(e, e);
}
}
builder.commit();
}
/**
* Parses XML file and stores datab into repository
* @param filename
* name of file to parse
*/
private void fileParsing(String filename) throws Exception {
Builder bld = new Builder();
Document document = bld.build(filename);
Element rootElement = document.getRootElement(); // treedump
Elements tree = rootElement.getFirstChildElement("ds")
.getFirstChildElement("tree").getChildElements();
for (int i = 0; i < tree.size(); i++) {
Element node = tree.get(i);
extractGroups(node);
record_count++;
}
if (log.isDebugEnabled())
log.debug("NUMBER OF NODES: " + record_count);
insertGroups();
// list of nodes ---
System.setProperty("java.security.cms.auth.login.config",
"conf/jaas.config");
ConfigurationHelper ch = new ConfigurationHelper(ResourceCreationHelper
.getResourcePath(UserFileParsing.class, "/security.properties",
true));
String CONFIG_FILE = ch.getRepositoryCfgFile();
String DIRECTORY = ch.getRepositoryDir();
// Set up a Jackrabbit repository with the specified
// configuration file and repository directory
Repository repository = TransientRepositoryFactory.getInstance(
CONFIG_FILE, DIRECTORY);
String username = "username";
String password = "password";
// Login to the default workspace as a dummy user
Session session = repository.login(new SimpleCredentials(username,
password.toCharArray()));
root_node = session.getRootNode();
root_node.addMixin("mix:referenceable");
int curr_record = 0;
int flush_step = 10;
int curr_flush = 10;
NodeACLFactory xmlf = NodeACLFactory.getInstance();
xmlf.setSystem(true);
for (int i = 0; i < tree.size(); i++) {
Element node = tree.get(i);
putNode(node, xmlf);
curr_record++;
if (log.isDebugEnabled())
log.debug(curr_record * 100 / record_count + "%");
if (curr_record * 100 / record_count > curr_flush) {
if (log.isDebugEnabled())
log.debug("flushing ....");
curr_flush += flush_step;
session.save();
}
}
session.save();
session.logout();
}
/**
* Retreives node name
* @param el
* XML node object
* @return name of the node
*/
private String getName(Element el) {
Elements els = el.getChildElements();
for (int i = 0; i < els.size(); i++) {
Element meta = els.get(i);
if (meta.getLocalName().toLowerCase().equals("meta")
&& meta.getAttributeValue("name").toLowerCase().equals(
"objectname"))
return meta.getAttributeValue("value").trim().replaceAll(":",
"!").replaceAll("'", "!").replaceAll("\\/", "!")
.replaceAll("\"", "!").replaceAll("\\*", "!");
}
return "NULL";
}
/**
* @return location of given node
* @param el
* node XML object
*/
private String getLocation(Element el) {
Elements els = el.getChildElements();
for (int i = 0; i < els.size(); i++) {
Element meta = els.get(i);
if (meta.getLocalName().toLowerCase().equals("meta")
&& meta.getAttributeValue("name").toLowerCase().equals(
"nodelocation"))
return meta.getAttributeValue("value");
}
return "NULL";
}
/**
* Adds attributes to a node
* @param node
* @param el
* XML element we need extract parameters from
*/
private void putAttributes(Node node, Element el) throws Exception {
Elements els = el.getChildElements();
for (int i = 0; i < els.size(); i++) {
Element meta = els.get(i);
if (meta.getLocalName().toLowerCase().equals("meta")
&& !unuser_fields.contains(meta.getAttributeValue("name")
.toLowerCase())) {
if (log.isDebugEnabled())
log.debug("Added property "
+ meta.getAttributeValue("name"));
node.setProperty(meta.getAttributeValue("name"), meta
.getAttributeValue("value"));
}
}
}
/**
* Parses privileges from given string
* @param priv
* privileges scring
* @return list of privileges
*/
private List parsePriv(String priv) {
List ret = new LinkedList();
String[] privil = priv.split(" ");
for (int i = 0; i < privil.length; i++)
if (privil[i].length() > 0)
ret.add(privil[i]);
return ret;
}
/**
* Stores ACLs for given node
* @param node
* XML node object
* @param node_uuid
* UUID of the node
* @param parent_uuid
* UUID of parent node
* @param factory
* node ACL factory to be used
*/
private void putACLs(Element node, UUID node_uuid, UUID parent_uuid,
NodeACLFactory factory) throws Exception {
JRGroup jr_group = new FileJRGroupBuilder().getInstance();
Element el = node.getFirstChildElement("rights");
Elements els = el.getChildElements();
HashMap acls = new HashMap();
for (int i = 0; i < els.size(); i++) {
Element right = els.get(i);
if (right.getLocalName().equals("right")
&& right.getAttributeValue("groupname").length() > 0) {
int scan = -1;
int read = -1;
int write = -1;
int add = -1;
int delete = -1;
List privileges = parsePriv(right.getAttributeValue("rights"));
if (privileges.contains("scan"))
scan = 1;
if (privileges.contains("read"))
read = 1;
if (privileges.contains("write"))
write = 1;
if (privileges.contains("add"))
add = 1;
if (privileges.contains("delete"))
delete = 1;
acls.put(jr_group.getGroupByName(
right.getAttributeValue("groupname")).getId(),
new NodeACL(scan, read, write, delete, add, 1, 1));
}
}
// add all privileges for default group
acls.put(jr_group.getGroupByName("default_group").getId(), new NodeACL(
1, 1, 1, 1, 1, 1, 1));
factory.addACL(parent_uuid, node_uuid, acls);
}
/**
* Stores node into repository
* @param node
* XML node object
* @param factory
* ACL factory to be used
*/
private void putNode(Element node, NodeACLFactory factory) throws Exception
{
Elements els = node.getChildElements();
Element last_version = null;
for (int i = 0; i < els.size(); i++) {
Element version_right = els.get(i);
if (version_right.getLocalName().equals("version"))
last_version = els.get(i);
}
Node parent_node = root_node;
String location = getLocation(last_version);
if (!uuids.isEmpty()) {
int times = (last_lenght - location.length()) / 4 + 1;
for (int i = 0; i < times; i++)
uuids.pop();
last_lenght = location.length();
IdStructure struct = (IdStructure) uuids.peek();
parent_node = root_node.getNode(struct.getUUID().substring(1,
struct.getUUID().length()));
}
Node new_node = parent_node.addNode(getName(last_version));
new_node.addMixin("mix:referenceable");
// Node new_node = parent_node.addNode(getName(last_version));
// long start_time = System.currentTimeMillis();
if (log.isDebugEnabled())
log.debug("Saving ACLs for node");
putACLs(node, ((NodeImpl) new_node).internalGetUUID(),
((NodeImpl) parent_node).internalGetUUID(), factory);
// log.debug("TIME: " + (System.currentTimeMillis() - start_time));
if (log.isDebugEnabled())
log.debug("Saving attributes for node");
putAttributes(new_node, last_version);
uuids.push(new IdStructure(new_node.getPath(), location));
}
/**
* Extracts groups from node and adds it to global list of groups
* @param node
* XML node object
*/
private void extractGroups(Element node) {
Elements rights = node.getFirstChildElement("rights")
.getChildElements();
for (int i = 0; i < rights.size(); i++) {
Element rig = rights.get(i);
String group_name = rig.getAttributeValue("groupname");
if (!groups.contains(group_name))
groups.add(group_name);
}
}
public static void main(String[] args) throws Exception {
UserFileParsing parsing = new UserFileParsing();
parsing.fileParsing("conf/sampledata.xml");
}
}
--
Eugene N Dzhurinsky