[ 
https://issues.apache.org/jira/browse/TINKERPOP-1298?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15307733#comment-15307733
 ] 

ASF GitHub Bot commented on TINKERPOP-1298:
-------------------------------------------

Github user okram commented on a diff in the pull request:

    https://github.com/apache/incubator-tinkerpop/pull/323#discussion_r65182601
  
    --- Diff: 
gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/bulkdumping/BulkExportVertexProgram.java
 ---
    @@ -0,0 +1,189 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing,
    + * software distributed under the License is distributed on an
    + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    + * KIND, either express or implied.  See the License for the
    + * specific language governing permissions and limitations
    + * under the License.
    + */
    +package org.apache.tinkerpop.gremlin.process.computer.bulkdumping;
    +
    +import org.apache.commons.configuration.BaseConfiguration;
    +import org.apache.commons.configuration.Configuration;
    +import org.apache.commons.configuration.ConfigurationUtils;
    +import org.apache.tinkerpop.gremlin.process.computer.GraphComputer;
    +import org.apache.tinkerpop.gremlin.process.computer.Memory;
    +import org.apache.tinkerpop.gremlin.process.computer.MessageScope;
    +import org.apache.tinkerpop.gremlin.process.computer.Messenger;
    +import org.apache.tinkerpop.gremlin.process.computer.VertexComputeKey;
    +import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
    +import 
org.apache.tinkerpop.gremlin.process.computer.traversal.TraversalVertexProgram;
    +import 
org.apache.tinkerpop.gremlin.process.computer.util.AbstractVertexProgramBuilder;
    +import org.apache.tinkerpop.gremlin.process.traversal.Path;
    +import org.apache.tinkerpop.gremlin.process.traversal.Traverser;
    +import 
org.apache.tinkerpop.gremlin.process.traversal.traverser.util.TraverserSet;
    +import org.apache.tinkerpop.gremlin.structure.Graph;
    +import org.apache.tinkerpop.gremlin.structure.Vertex;
    +import org.apache.tinkerpop.gremlin.structure.VertexProperty;
    +import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
    +import org.javatuples.Tuple;
    +
    +import java.util.ArrayList;
    +import java.util.Collection;
    +import java.util.Collections;
    +import java.util.HashMap;
    +import java.util.List;
    +import java.util.Map;
    +import java.util.Set;
    +import java.util.stream.Collectors;
    +
    +/**
    + * @author Daniel Kuppitz (http://gremlin.guru)
    + */
    +public class BulkExportVertexProgram implements VertexProgram<Tuple> {
    +
    +    public static final String BULK_EXPORT_VERTEX_PROGRAM_CFG_PREFIX = 
"gremlin.bulkExportVertexProgram";
    +    public static final String BULK_EXPORT_PROPERTIES = String.join(".", 
BULK_EXPORT_VERTEX_PROGRAM_CFG_PREFIX, "properties");
    +
    +    private Configuration configuration;
    +    private Map<String, String> properties;
    +    private List<String> sortedProperties;
    +    private Set<VertexComputeKey> vertexComputeKeys;
    +
    +    private BulkExportVertexProgram() {
    +    }
    +
    +    @Override
    +    public void loadState(final Graph graph, final Configuration config) {
    +        configuration = new BaseConfiguration();
    +        if (config != null) {
    +            ConfigurationUtils.copy(config, configuration);
    +        }
    +        properties = new HashMap<>();
    +        sortedProperties = new ArrayList<>();
    +        for (final String tuple : 
configuration.getString(BULK_EXPORT_PROPERTIES, "").split("\1")) {
    +            final String[] parts = tuple.split("\2", -1);
    +            properties.put(parts[0], parts[1]);
    +            sortedProperties.add(parts[0]);
    +        }
    +        vertexComputeKeys = 
Collections.singleton(VertexComputeKey.of(BULK_EXPORT_PROPERTIES, false));
    +    }
    +
    +    @Override
    +    public void storeState(final Configuration config) {
    +        VertexProgram.super.storeState(config);
    +        if (configuration != null) {
    +            ConfigurationUtils.copy(configuration, config);
    +        }
    +    }
    +
    +    @Override
    +    public void setup(final Memory memory) {
    +    }
    +
    +    @Override
    +    public void execute(final Vertex sourceVertex, final Messenger<Tuple> 
messenger, final Memory memory) {
    +        final VertexProperty<TraverserSet> haltedTraversers = 
sourceVertex.property(TraversalVertexProgram.HALTED_TRAVERSERS);
    +        haltedTraversers.ifPresent(traverserSet -> {
    +            final List<List<String>> rows = new ArrayList<>();
    +            for (final Traverser t : (Iterable<Traverser>) traverserSet) {
    +                final List<String> columns = new ArrayList<>();
    +                final Path path = t.path();
    +                final Iterable<String> keys = properties.isEmpty()
    +                        ? 
t.path().labels().stream().flatMap(Collection::stream).sorted().collect(Collectors.toSet())
    +                        : sortedProperties;
    +                for (final String key : keys) {
    +                    final String format = properties.getOrDefault(key, "");
    +                    final Object value = path.get(key);
    +                    columns.add("".equals(format) ? value.toString() : 
String.format(format, value));
    +                }
    +                rows.add(columns);
    +            }
    +            sourceVertex.property(BULK_EXPORT_PROPERTIES, rows);
    +        });
    +    }
    +
    +    @Override
    +    public boolean terminate(final Memory memory) {
    +        return properties == null || properties.isEmpty() || 
!memory.isInitialIteration();
    +    }
    +
    +    @Override
    +    public Set<MessageScope> getMessageScopes(final Memory memory) {
    +        return Collections.emptySet();
    +    }
    +
    +    @SuppressWarnings({"CloneDoesntDeclareCloneNotSupportedException", 
"CloneDoesntCallSuperClone"})
    +    @Override
    +    public VertexProgram<Tuple> clone() {
    --- End diff --
    
    Extend `StaticVertexProgram<Tuple>` instead of implementing 
`VertexProgram<Tuple>` and you can get rid of this `clone()` method (and others 
I believe?).


> Save OLAP results to file
> -------------------------
>
>                 Key: TINKERPOP-1298
>                 URL: https://issues.apache.org/jira/browse/TINKERPOP-1298
>             Project: TinkerPop
>          Issue Type: Improvement
>          Components: io, process
>            Reporter: Daniel Kuppitz
>            Assignee: Daniel Kuppitz
>
> Provide a way to save (tabular) results to text files, just like Spark's 
> {{saveAsTextFile}}.
> I'm not sure about the best way to do it. 3 options come to my mind:
> # a new step.
> # a {{VertexProgram}}
> # a configuration option
> Things to consider / open questions:
> * Is it sufficient to simply {{toString()}} all values or should we allow 
> formatters / format stings?
> * [~jlewandowski] pointed out that it would be nice to have support for the 
> [parquet file format|https://parquet.apache.org/]. I guess now we're already 
> talking about support for different {{FileOutputFormats}} and not just 
> formatters.
> * Is that only relevant for OLAP?
> * Can we support arbitrary file systems?



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to