Hello,
First of all thanks for all the effort that has been put into GLPK, it
seems like a great project.
For a University project, we try to solve the following problem. Given a
RxC matrix M, with all values in [0,1], find a set of D rows that minimizes:
sum_{i=1 .. C} min(M[j][i]) with j in D.
It can be seen that an reasonable upper bound for this problem is (R choose
D) * C), and that the problem is in fact linear in both R and C. However,
even for small problem instances (D=1, R=25, D=256) the MIP solver already
takes 14 seconds; setting the parameters higher results in incredibly high
runtimes.
Is there something that should be taken into consideration? Could this be a
problem with the GLPK solver?
Attached is a MWE in Python (using the pulp interface).
Best,
Jan
import argparse
import cProfile
import numpy as np
import pulp
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--num_rows', type=int, default=256)
parser.add_argument('--num_cols', type=int, default=25)
parser.add_argument('--select_rows', type=int, default=1)
parser.add_argument('--solver', type=str, default='GLPK_CMD')
return parser.parse_args()
def sum_of_mins(matrix, rows):
return sum(np.amin(matrix[rows], axis=0))
def run():
args = parse_args()
big_m = args.num_cols + 1
matrix = np.random.rand(args.num_rows, args.num_cols)
pulp_optimizer = pulp.LpProblem('Minimize sum of columns', pulp.LpMinimize)
# creates a variable for each configuration (var = 1 iff it was selected to the default set)
row_identifier_vars = list()
for row_idx in range(args.num_rows):
row_identifier = pulp.LpVariable('row_%s' % row_idx, cat=pulp.LpBinary)
row_identifier_vars.append(row_identifier)
# ensures that only the required number of rows is chosen
pulp_optimizer += pulp.lpSum(row_identifier_vars) == args.select_rows
# creates a variable for each column. This stores the minimal achievable score given the selected rows
col_min_value_vars = list()
for col_idx in range(args.num_cols):
current_col_min_value = pulp.LpVariable('col_%s' % col_idx, 0, 1.0, pulp.LpContinuous)
col_min_value_vars.append(current_col_min_value)
# in order to store the minimum of a group of values (over which we minimize), we need a set of auxiliary
# variables and big M conventions according to Greg Glockners answer on Stackoverlow
# https://stackoverflow.com/questions/10792139/using-min-max-within-an-integer-linear-program
auxilary_variables = list()
for row_idx in range(args.num_rows):
current_auxilary = pulp.LpVariable('aux_col_%d_row_%d' % (col_idx, row_idx), cat=pulp.LpBinary)
pulp_optimizer += current_col_min_value >= (matrix[row_idx][col_idx] * row_identifier_vars[row_idx]) - (big_m * current_auxilary)
# due to the nature of our problem, we need to ensure that the configuration variable OR the auxiliary
# variable is set
pulp_optimizer += 0 <= 2 - row_identifier_vars[row_idx] - current_auxilary <= 1
auxilary_variables.append(current_auxilary)
pulp_optimizer += pulp.lpSum(auxilary_variables) == args.num_rows - 1
# objective function: minimize the sum of score variables
pulp_optimizer += pulp.lpSum(col_min_value_vars)
pulp_optimizer.solve(solver=getattr(pulp, args.solver)())
selected_rows = []
if pulp.LpStatus[pulp_optimizer.status] == 'Optimal':
for variable in pulp_optimizer.variables():
if variable.name.startswith('row_'):
if variable.varValue == 1:
selected_rows.append(int(variable.name.split('row_')[1]))
difference = abs(pulp.value(pulp_optimizer.objective) - sum_of_mins(matrix, selected_rows))
assert difference < 0.00001
if __name__ == '__main__':
cProfile.run('run()')
_______________________________________________
Help-glpk mailing list
Help-glpk@gnu.org
https://lists.gnu.org/mailman/listinfo/help-glpk