Thanks for the reply,

I think using a Pandas (or a Numpy) approach would optimize the execution
of the program.

Target cells could be up to 10% the size of the dataset, a good example to
start with would have from 10 to 100 values.

Let me know your thoughts, here's a reproducible example which I formatted:



from numpy import random
import pandas as pd
import numpy as np
import operator
import math
from collections import deque
from queue import *
from queue import Queue
from itertools import product


def select_target_values(dataframe, number_of_target_values):
    target_cells = []
    for _ in range(number_of_target_values):
        row_x = random.randint(0, len(dataframe.columns) - 1)
        col_y = random.randint(0, len(dataframe) - 1)
        target_cells.append((row_x, col_y))
    return target_cells


def select_contours(target_cells):
    contour_coordinates = [(0, 1), (1, 0), (0, -1), (-1, 0)]
    contour_cells = []
    for target_cell in target_cells:
        # random contour count for each cell
        contour_cells_count = random.randint(1, 4)
        try:
            contour_cells.append(
                [
                    tuple(
                        map(
                            lambda i, j: i + j,
                            (target_cell[0], target_cell[1]),
                            contour_coordinates[iteration_],
                        )
                    )
                    for iteration_ in range(contour_cells_count)
                ]
            )
        except IndexError:
            continue
    return contour_cells


def create_zipf_distribution():
    zipf_dist = random.zipf(2, size=(50, 5)).reshape((50, 5))

    zipf_distribution_dataset = pd.DataFrame(zipf_dist).round(3)

    return zipf_distribution_dataset


def apply_contours(target_cells, contour_cells):
    target_cells_with_contour = []
    # create one single list of cells
    for idx, target_cell in enumerate(target_cells):
        target_cell_with_contour = [target_cell]
        target_cell_with_contour.extend(contour_cells[idx])
        target_cells_with_contour.append(target_cell_with_contour)
    return target_cells_with_contour


def create_possible_datasets(dataframe, target_cells_with_contour):
    all_datasets_final = []
    dataframe_original = dataframe.copy()

    list_tuples_idx_cells_all_datasets = list(
        filter(
            lambda x: x,
            [list(tuples) for tuples in
list(product(*target_cells_with_contour))],
        )
    )
    target_original_cells_coordinates = list(
        map(
            lambda x: x[0],
            [
                target_and_contour_cell
                for target_and_contour_cell in target_cells_with_contour
            ],
        )
    )
    for dataset_index_values in list_tuples_idx_cells_all_datasets:
        all_datasets = []
        for idx_cell in range(len(dataset_index_values)):
            dataframe_cpy = dataframe.copy()
            dataframe_cpy.iat[
                target_original_cells_coordinates[idx_cell][1],
                target_original_cells_coordinates[idx_cell][0],
            ] = dataframe_original.iloc[
                dataset_index_values[idx_cell][1],
dataset_index_values[idx_cell][0]
            ]
            all_datasets.append(dataframe_cpy)
        all_datasets_final.append(all_datasets)
    return all_datasets_final


def main():
    zipf_dataset = create_zipf_distribution()

    target_cells = select_target_values(zipf_dataset, 5)
    print(target_cells)
    contour_cells = select_contours(target_cells)
    print(contour_cells)
    target_cells_with_contour = apply_contours(target_cells, contour_cells)
    datasets = create_possible_datasets(zipf_dataset,
target_cells_with_contour)
    print(datasets)


main()

Le dim. 21 janv. 2024 à 16:33, Thomas Passin via Python-list <
python-list@python.org> a écrit :

> On 1/21/2024 7:37 AM, marc nicole via Python-list wrote:
> > Hello,
> >
> > I have an initial dataframe with a random list of target cells (each cell
> > being identified with a couple (x,y)).
> > I want to yield four different dataframes each containing the value of
> one
> > of the contour (surrounding) cells of each specified target cell.
> >
> > the surrounding cells to consider for a specific target cell are :
> (x-1,y),
> > (x,y-1),(x+1,y);(x,y+1), specifically I randomly choose 1 to 4 cells from
> > these and consider for replacement to the target cell.
> >
> > I want to do that through a pandas-specific approach without having to
> > define the contour cells separately and then apply the changes on the
> > dataframe
>
> 1. Why do you want a Pandas-specific approach?  Many people would rather
> keep code independent of special libraries if possible;
>
> 2. How big can these collections of target cells be, roughly speaking?
> The size could make a big difference in picking a design;
>
> 3. You really should work on formatting code for this list.  Your code
> below is very complex and would take a lot of work to reformat to the
> point where it is readable, especially with the nearly impenetrable
> arguments in some places.  Probably all that is needed is to replace all
> tabs by (say) three spaces, and to make sure you intentionally break
> lines well before they might get word-wrapped.  Here is one example I
> have reformatted (I hope I got this right):
>
> list_tuples_idx_cells_all_datasets = list(filter(
>     lambda x: utils_tuple_list_not_contain_nan(x),
>     [list(tuples) for tuples in list(
>           itertools.product(*target_cells_with_contour))
>     ]))
>
> 4. As an aside, it doesn't look like you need to convert all those
> sequences and iterators to lists all over the place;
>
>
> > (but rather using an all in one approach):
> > for now I have written this example which I think is not Pandas specific:
> [snip]
>
> --
> https://mail.python.org/mailman/listinfo/python-list
>
-- 
https://mail.python.org/mailman/listinfo/python-list

Reply via email to