This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-ray.git
The following commit(s) were added to refs/heads/main by this push:
new 2523e9f Rename from raysql to datafusion_ray in multiple places (#13)
2523e9f is described below
commit 2523e9f765c02122ae8d79fd3db0421b2e738896
Author: Tim Saucer <[email protected]>
AuthorDate: Thu Oct 3 08:26:37 2024 -0400
Rename from raysql to datafusion_ray in multiple places (#13)
---
README.md | 4 ++--
build.rs | 8 ++++----
datafusion_ray/__init__.py | 2 +-
datafusion_ray/context.py | 3 ++-
datafusion_ray/main.py | 10 +++++-----
examples/tips.py | 4 ++--
src/context.rs | 2 +-
src/lib.rs | 2 +-
src/planner.rs | 2 +-
src/proto/{raysql.proto => datafusion_ray.proto} | 4 ++--
10 files changed, 21 insertions(+), 20 deletions(-)
diff --git a/README.md b/README.md
index 5aa86e0..ef0281d 100644
--- a/README.md
+++ b/README.md
@@ -46,7 +46,7 @@ import os
import pandas as pd
import ray
-from datafusion_ray import RaySqlContext
+from datafusion_ray import DatafusionRayContext
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
@@ -54,7 +54,7 @@ SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
ray.init(resources={"worker": 1})
# Create a context and register a table
-ctx = RaySqlContext(2, use_ray_shuffle=True)
+ctx = DatafusionRayContext(2, use_ray_shuffle=True)
# Register either a CSV or Parquet file
# ctx.register_csv("tips", f"{SCRIPT_DIR}/tips.csv", True)
ctx.register_parquet("tips", f"{SCRIPT_DIR}/tips.parquet")
diff --git a/build.rs b/build.rs
index 51b30d0..a6b34dc 100644
--- a/build.rs
+++ b/build.rs
@@ -32,14 +32,14 @@ fn main() -> Result<(), String> {
// We don't include the proto files in releases so that downstreams
// do not need to have PROTOC included
- if Path::new("src/proto/raysql.proto").exists() {
+ if Path::new("src/proto/datafusion-ray.proto").exists() {
println!("cargo:rerun-if-changed=src/proto/datafusion.proto");
- println!("cargo:rerun-if-changed=src/proto/raysql.proto");
+ println!("cargo:rerun-if-changed=src/proto/datafusion-ray.proto");
tonic_build::configure()
.extern_path(".datafusion", "::datafusion_proto::protobuf")
- .compile(&["src/proto/raysql.proto"], &["src/proto"])
+ .compile(&["src/proto/datafusion-ray.proto"], &["src/proto"])
.map_err(|e| format!("protobuf compilation failed: {e}"))?;
- let generated_source_path = out.join("raysql.protobuf.rs");
+ let generated_source_path = out.join("datafusion-ray.protobuf.rs");
let code = std::fs::read_to_string(generated_source_path).unwrap();
let mut file = std::fs::OpenOptions::new()
.write(true)
diff --git a/datafusion_ray/__init__.py b/datafusion_ray/__init__.py
index c8bae17..d29bb29 100644
--- a/datafusion_ray/__init__.py
+++ b/datafusion_ray/__init__.py
@@ -28,6 +28,6 @@ from ._datafusion_ray_internal import (
serialize_execution_plan,
deserialize_execution_plan,
)
-from .context import RaySqlContext
+from .context import DatafusionRayContext
__version__ = importlib_metadata.version(__name__)
diff --git a/datafusion_ray/context.py b/datafusion_ray/context.py
index bd3aee3..e67d074 100644
--- a/datafusion_ray/context.py
+++ b/datafusion_ray/context.py
@@ -27,6 +27,7 @@ import datafusion_ray
from datafusion_ray import Context, ExecutionGraph, QueryStage
from typing import List
+
def schedule_execution(
graph: ExecutionGraph,
stage_id: int,
@@ -208,7 +209,7 @@ def execute_query_partition(
return ret[0] if len(ret) == 1 else ret
-class RaySqlContext:
+class DatafusionRayContext:
def __init__(self, num_workers: int = 1, use_ray_shuffle: bool = False):
self.ctx = Context(num_workers, use_ray_shuffle)
self.num_workers = num_workers
diff --git a/datafusion_ray/main.py b/datafusion_ray/main.py
index 9e9b97a..cfc08ec 100644
--- a/datafusion_ray/main.py
+++ b/datafusion_ray/main.py
@@ -20,7 +20,7 @@ import os
from pyarrow import csv as pacsv
import ray
-from datafusion_ray import RaySqlContext
+from datafusion_ray import DatafusionRayContext
NUM_CPUS_PER_WORKER = 8
@@ -31,9 +31,9 @@ QUERIES_DIR = os.path.join(SCRIPT_DIR,
f"../sqlbench-h/queries/sf={SF}")
RESULTS_DIR = f"results-sf{SF}"
-def setup_context(use_ray_shuffle: bool, num_workers: int = 2) ->
RaySqlContext:
+def setup_context(use_ray_shuffle: bool, num_workers: int = 2) ->
DatafusionRayContext:
print(f"Using {num_workers} workers")
- ctx = RaySqlContext(num_workers, use_ray_shuffle)
+ ctx = DatafusionRayContext(num_workers, use_ray_shuffle)
for table in [
"customer",
"lineitem",
@@ -53,14 +53,14 @@ def load_query(n: int) -> str:
return fin.read()
-def tpch_query(ctx: RaySqlContext, q: int = 1):
+def tpch_query(ctx: DatafusionRayContext, q: int = 1):
sql = load_query(q)
result_set = ctx.sql(sql)
return result_set
def tpch_timing(
- ctx: RaySqlContext,
+ ctx: DatafusionRayContext,
q: int = 1,
print_result: bool = False,
write_result: bool = False,
diff --git a/examples/tips.py b/examples/tips.py
index fff8834..30117c6 100644
--- a/examples/tips.py
+++ b/examples/tips.py
@@ -19,7 +19,7 @@ import os
import pandas as pd
import ray
-from raysql import RaySqlContext
+from datafusion_ray import DatafusionRayContext
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
@@ -27,7 +27,7 @@ SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
ray.init(resources={"worker": 1})
# Create a context and register a table
-ctx = RaySqlContext(2, use_ray_shuffle=True)
+ctx = DatafusionRayContext(2, use_ray_shuffle=True)
# Register either a CSV or Parquet file
# ctx.register_csv("tips", f"{SCRIPT_DIR}/tips.csv", True)
ctx.register_parquet("tips", f"{SCRIPT_DIR}/tips.parquet")
diff --git a/src/context.rs b/src/context.rs
index ec6f0b4..6821ba6 100644
--- a/src/context.rs
+++ b/src/context.rs
@@ -44,7 +44,7 @@ use tokio::task::JoinHandle;
type PyResultSet = Vec<PyObject>;
-#[pyclass(name = "Context", module = "raysql", subclass)]
+#[pyclass(name = "Context", module = "datafusion_ray", subclass)]
pub struct PyContext {
pub(crate) ctx: SessionContext,
use_ray_shuffle: bool,
diff --git a/src/lib.rs b/src/lib.rs
index 4436ac4..93d3f8d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -31,7 +31,7 @@ pub mod utils;
/// A Python module implemented in Rust.
#[pymodule]
-fn _datafusion_ray_internal(_py: Python, m: &PyModule) -> PyResult<()> {
+fn _datafusion_ray_internal(m: &Bound<'_, PyModule>) -> PyResult<()> {
// register classes that can be created directly from Python code
m.add_class::<context::PyContext>()?;
m.add_class::<planner::PyExecutionGraph>()?;
diff --git a/src/planner.rs b/src/planner.rs
index b940c9d..e045d1c 100644
--- a/src/planner.rs
+++ b/src/planner.rs
@@ -32,7 +32,7 @@ use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use uuid::Uuid;
-#[pyclass(name = "ExecutionGraph", module = "raysql", subclass)]
+#[pyclass(name = "ExecutionGraph", module = "datafusion_ray", subclass)]
pub struct PyExecutionGraph {
pub graph: ExecutionGraph,
}
diff --git a/src/proto/raysql.proto b/src/proto/datafusion_ray.proto
similarity index 95%
rename from src/proto/raysql.proto
rename to src/proto/datafusion_ray.proto
index 281e1ae..f64dcaf 100644
--- a/src/proto/raysql.proto
+++ b/src/proto/datafusion_ray.proto
@@ -1,9 +1,9 @@
syntax = "proto3";
-package raysql.protobuf;
+package datafusion_ray.protobuf;
option java_multiple_files = true;
-option java_package = "raysql.protobuf";
+option java_package = "datafusion_ray.protobuf";
option java_outer_classname = "RaySqlProto";
import "datafusion.proto";
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]