Hello,

First of all thanks for all the effort that has been put into GLPK, it
seems like a great project.

For a University project, we try to solve the following problem. Given a
RxC matrix M, with all values in [0,1], find a set of D rows that minimizes:

sum_{i=1 .. C} min(M[j][i]) with j in D.

It can be seen that an reasonable upper bound for this problem is (R choose
D) * C), and that the problem is in fact linear in both R and C. However,
even for small problem instances (D=1, R=25, D=256) the MIP solver already
takes 14 seconds; setting the parameters higher results in incredibly high
runtimes.

Is there something that should be taken into consideration? Could this be a
problem with the GLPK solver?

Attached is a MWE in Python (using the pulp interface).

Best,
Jan
import argparse
import cProfile
import numpy as np
import pulp


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_rows', type=int, default=256)
    parser.add_argument('--num_cols', type=int, default=25)
    parser.add_argument('--select_rows', type=int, default=1)
    parser.add_argument('--solver', type=str, default='GLPK_CMD')
    return parser.parse_args()


def sum_of_mins(matrix, rows):
    return sum(np.amin(matrix[rows], axis=0))


def run():
    args = parse_args()
    big_m = args.num_cols + 1

    matrix = np.random.rand(args.num_rows, args.num_cols)

    pulp_optimizer = pulp.LpProblem('Minimize sum of columns', pulp.LpMinimize)

    # creates a variable for each configuration (var = 1 iff it was selected to the default set)
    row_identifier_vars = list()
    for row_idx in range(args.num_rows):
        row_identifier = pulp.LpVariable('row_%s' % row_idx, cat=pulp.LpBinary)
        row_identifier_vars.append(row_identifier)
    # ensures that only the required number of rows is chosen
    pulp_optimizer += pulp.lpSum(row_identifier_vars) == args.select_rows

    # creates a variable for each column. This stores the minimal achievable score given the selected rows
    col_min_value_vars = list()
    for col_idx in range(args.num_cols):
        current_col_min_value = pulp.LpVariable('col_%s' % col_idx, 0, 1.0, pulp.LpContinuous)
        col_min_value_vars.append(current_col_min_value)

        # in order to store the minimum of a group of values (over which we minimize), we need a set of auxiliary
        # variables and big M conventions according to Greg Glockners answer on Stackoverlow
        # https://stackoverflow.com/questions/10792139/using-min-max-within-an-integer-linear-program
        auxilary_variables = list()
        for row_idx in range(args.num_rows):
            current_auxilary = pulp.LpVariable('aux_col_%d_row_%d' % (col_idx, row_idx), cat=pulp.LpBinary)
            pulp_optimizer += current_col_min_value >= (matrix[row_idx][col_idx] * row_identifier_vars[row_idx]) - (big_m * current_auxilary)
            # due to the nature of our problem, we need to ensure that the configuration variable OR the auxiliary
            # variable is set
            pulp_optimizer += 0 <= 2 - row_identifier_vars[row_idx] - current_auxilary <= 1
            auxilary_variables.append(current_auxilary)
        pulp_optimizer += pulp.lpSum(auxilary_variables) == args.num_rows - 1

    # objective function: minimize the sum of score variables
    pulp_optimizer += pulp.lpSum(col_min_value_vars)

    pulp_optimizer.solve(solver=getattr(pulp, args.solver)())

    selected_rows = []
    if pulp.LpStatus[pulp_optimizer.status] == 'Optimal':
        for variable in pulp_optimizer.variables():
            if variable.name.startswith('row_'):
                if variable.varValue == 1:
                    selected_rows.append(int(variable.name.split('row_')[1]))

    difference = abs(pulp.value(pulp_optimizer.objective) - sum_of_mins(matrix, selected_rows))
    assert difference < 0.00001


if __name__ == '__main__':
    cProfile.run('run()')
_______________________________________________
Help-glpk mailing list
Help-glpk@gnu.org
https://lists.gnu.org/mailman/listinfo/help-glpk

Reply via email to