ava6969 commented on issue #33834:
URL: https://github.com/apache/arrow/issues/33834#issuecomment-1400834195
so i just found this function . i am manipulating to what i want
pd::DataFrame joinOnIndex(pd::DataFrame const& left_table,
pd::DataFrame const& right_table,
bool ignore_index)
{
// Convert DataFrames to tables
auto unique_index =
std::to_string(std::chrono::high_resolution_clock::now().time_since_epoch().count())
+ "index";
auto left_table_as_table = left_table.toTable(unique_index);
auto right_table_as_table = right_table.toTable(unique_index);
// Define merge options for columns in case of name conflicts
arrow::Field::MergeOptions merge_options;
merge_options.promote_nullability = true; // Rename conflicting columns
// Concatenate tables
arrow::ConcatenateTablesOptions options;
options.unify_schemas = true; // Automatically merge schemas of tables
options.field_merge_options = merge_options; // Use defined merge
options for columns
auto table = pd::ReturnOrThrowOnFailure(arrow::ConcatenateTables(
{ left_table_as_table, right_table_as_table },
options));
auto merged_rb =
pd::ReturnOrThrowOnFailure(table->CombineChunksToBatch());
auto index_column_i = merged_rb->schema()->GetFieldIndex(unique_index);
auto index = merged_rb->column(index_column_i);
merged_rb = pd::ReturnOrThrowOnFailure(
merged_rb->RemoveColumn(index_column_i) );
if(ignore_index)
{
return merged_rb;
}
else
{
return { merged_rb, index };
}
}
using namespace pd;
using namespace string_literals;
using std::pair;
using std::vector;
int main()
{
auto df1 = pd::DataFrame(NULL_INDEX,
pair{"letter"s, vector{"a"s, "b"s}},
pair{"number"s, vector{1, 2}});
auto df2 = pd::DataFrame(NULL_INDEX,
pair{"letter"s, vector{"c"s, "d"s}},
pair{"number"s, vector{3, 4}});
auto df3 = pd::DataFrame(NULL_INDEX,
pair{"letter"s, vector{"c"s, "d"s}},
pair{"number"s, vector{3, 4}},
pair{"animal"s, vector{"cat"s, "dog"s}});
std::cout << joinOnIndex(df1, df2, false) << "\n";
std::cout << joinOnIndex(df1, df2, true) << "\n";
std::cout << joinOnIndex(df1, df3, false) << "\n";
return 0;
}
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]