kosiew commented on code in PR #1036:
URL:
https://github.com/apache/datafusion-python/pull/1036#discussion_r2002243419
##########
python/tests/test_dataframe.py:
##########
@@ -1191,13 +1192,17 @@ def add_with_parameter(df_internal, value: Any) ->
DataFrame:
def test_dataframe_repr_html(df) -> None:
output = df._repr_html_()
Review Comment:
Would it be a good idea to test for other df fixtures too?
not specific to this PR, maybe add an empty_df fixture for tests too
eg
```
@pytest.fixture
def empty_df():
ctx = SessionContext()
# Create an empty RecordBatch with the same schema as df
batch = pa.RecordBatch.from_arrays(
[
pa.array([], type=pa.int64()),
pa.array([], type=pa.int64()),
pa.array([], type=pa.int64()),
],
names=["a", "b", "c"],
)
return ctx.from_arrow(batch)
@pytest.mark.parametrize(
"dataframe_fixture",
["empty_df", "df", "nested_df", "struct_df", "partitioned_df",
"aggregate_df"],
)
def test_dataframe_repr_html(request, dataframe_fixture) -> None:
df = request.getfixturevalue(dataframe_fixture)
output = df._repr_html_()
```
##########
src/dataframe.rs:
##########
@@ -111,56 +116,151 @@ impl PyDataFrame {
}
fn __repr__(&self, py: Python) -> PyDataFusionResult<String> {
- let df = self.df.as_ref().clone().limit(0, Some(10))?;
- let batches = wait_for_future(py, df.collect())?;
- let batches_as_string = pretty::pretty_format_batches(&batches);
- match batches_as_string {
- Ok(batch) => Ok(format!("DataFrame()\n{batch}")),
- Err(err) => Ok(format!("Error: {:?}", err.to_string())),
+ let (batches, has_more) = wait_for_future(
+ py,
+ collect_record_batches_to_display(self.df.as_ref().clone(), 10,
10),
+ )?;
+ if batches.is_empty() {
+ // This should not be reached, but do it for safety since we index
into the vector below
+ return Ok("No data to display".to_string());
}
- }
- fn _repr_html_(&self, py: Python) -> PyDataFusionResult<String> {
- let mut html_str = "<table border='1'>\n".to_string();
+ let batches_as_displ =
+
pretty::pretty_format_batches(&batches).map_err(py_datafusion_err)?;
+
+ let additional_str = match has_more {
+ true => "\nData truncated.",
+ false => "",
+ };
- let df = self.df.as_ref().clone().limit(0, Some(10))?;
- let batches = wait_for_future(py, df.collect())?;
+ Ok(format!("DataFrame()\n{batches_as_displ}{additional_str}"))
+ }
+ fn _repr_html_(&self, py: Python) -> PyDataFusionResult<String> {
+ let (batches, has_more) = wait_for_future(
+ py,
+ collect_record_batches_to_display(
+ self.df.as_ref().clone(),
+ MIN_TABLE_ROWS_TO_DISPLAY,
+ usize::MAX,
+ ),
+ )?;
Review Comment:
Extracting some variables into helper functions could make this more
readable and easier to maintain eg:
```
fn _repr_html_(&self, py: Python) -> PyDataFusionResult<String> {
let (batches, has_more) = wait_for_future(
py,
collect_record_batches_to_display(
self.df.as_ref().clone(),
MIN_TABLE_ROWS_TO_DISPLAY,
usize::MAX,
),
)?;
if batches.is_empty() {
// This should not be reached, but do it for safety since we
index into the vector below
return Ok("No data to display".to_string());
}
let table_uuid = uuid::Uuid::new_v4().to_string();
let schema = batches[0].schema();
// Get table formatters for displaying cell values
let batch_formatters = get_batch_formatters(&batches)?;
let rows_per_batch = batches.iter().map(|batch| batch.num_rows());
// Generate HTML components
let mut html_str = generate_html_table_header(&schema);
html_str.push_str(&generate_table_rows(
&batch_formatters,
rows_per_batch,
&table_uuid
)?);
html_str.push_str("</tbody></table></div>\n");
html_str.push_str(&generate_javascript());
if has_more {
html_str.push_str("Data truncated due to size.");
}
Ok(html_str)
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]