ava6969 commented on issue #33834:
URL: https://github.com/apache/arrow/issues/33834#issuecomment-1400834195

   so i just found this function . i am manipulating to what i want 
pd::DataFrame joinOnIndex(pd::DataFrame const& left_table,
                             pd::DataFrame const& right_table,
                             bool ignore_index)
   {
       // Convert DataFrames to tables
       auto unique_index = 
std::to_string(std::chrono::high_resolution_clock::now().time_since_epoch().count())
 + "index";
       auto left_table_as_table = left_table.toTable(unique_index);
       auto right_table_as_table = right_table.toTable(unique_index);
   
       // Define merge options for columns in case of name conflicts
       arrow::Field::MergeOptions merge_options;
       merge_options.promote_nullability = true; // Rename conflicting columns
   
       // Concatenate tables
       arrow::ConcatenateTablesOptions options;
       options.unify_schemas = true; // Automatically merge schemas of tables
       options.field_merge_options = merge_options; // Use defined merge 
options for columns
   
       auto table = pd::ReturnOrThrowOnFailure(arrow::ConcatenateTables(
           { left_table_as_table, right_table_as_table },
           options));
   
       auto merged_rb = 
pd::ReturnOrThrowOnFailure(table->CombineChunksToBatch());
   
       auto index_column_i = merged_rb->schema()->GetFieldIndex(unique_index);
       auto index = merged_rb->column(index_column_i);
   
       merged_rb = pd::ReturnOrThrowOnFailure( 
merged_rb->RemoveColumn(index_column_i) );
       if(ignore_index)
       {
           return merged_rb;
       }
       else
       {
           return { merged_rb, index };
       }
   }
   
   using namespace pd;
   using namespace string_literals;
   using std::pair;
   using std::vector;
   
   int main()
   {
   
       auto df1 = pd::DataFrame(NULL_INDEX,
                                pair{"letter"s, vector{"a"s, "b"s}},
                                pair{"number"s, vector{1, 2}});
   
       auto df2 = pd::DataFrame(NULL_INDEX,
                                pair{"letter"s, vector{"c"s, "d"s}},
                                pair{"number"s, vector{3, 4}});
   
       auto df3 = pd::DataFrame(NULL_INDEX,
                               pair{"letter"s, vector{"c"s, "d"s}},
                               pair{"number"s, vector{3, 4}},
                               pair{"animal"s, vector{"cat"s, "dog"s}});
   
       std::cout << joinOnIndex(df1, df2, false) << "\n";
   
       std::cout << joinOnIndex(df1, df2, true) << "\n";
   
       std::cout << joinOnIndex(df1, df3, false) << "\n";
       return 0;
   }


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to