On 10/6/20 3:18 PM, Alaindevos wrote:
I have a large table consisting of two columns.One with words.Another with frequencies. I want to sort them efficiently according to the names or frequency. For this I need an efficient sort function where I can plugin my proper test of order, and proper swap. Currently I do it using an own written bubble sort that doesn't scale well.


I had fun writing the following program. Note how makeIndex allows visiting elements in sorted order without actually sorting them.

import std.random;
import std.range;
import std.algorithm;
import std.conv;
import std.stdio;

struct S {
  string word;
  size_t frequency;
}

bool byWord(S a, S b) {
  return a.word < b.word;
}

bool byFrequency(S a, S b) {
  return a.frequency < b.frequency;
}

auto dump(R)(string title, R range) {
  writefln!"\n%s:\n%(%s\n%)"(title, range);
}

// A test function that makes an S
S makeS() {
  string makeWord() {
    static letters = iota('a', 'z' + 1).map!(to!dchar).array;
    return letters.randomSample(4).to!string; // Four-letter words! :p
  }
  size_t makeFrequency() {
    return uniform(0, 100);
  }
  return S(makeWord(), makeFrequency());
}

// A test function that makes some S'es
S[] makeSs() {
  return 10.iota.map!(i => makeS()).array;
}

void main() {
  auto ss = makeSs();
  dump("Unsorted", ss);

  auto byWordIndexes = new size_t[ss.length];
  ss.makeIndex!byWord(byWordIndexes);
  dump("Still unsorted but visited by word order",
       byWordIndexes.map!(i => ss[i]));

  auto byFrequencyIndexes = new size_t[ss.length];
  ss.makeIndex!byFrequency(byFrequencyIndexes);
  dump("Still unsorted but visited by frequency order",
       byFrequencyIndexes.map!(i => ss[i]));

  ss.sort!byWord();
  dump("Actually sorted by words", ss);

  ss.sort!byFrequency();
  dump("Actually sorted by frequencies", ss);
}

Sample output:

Unsorted:
S("bfmp", 78)
S("imsx", 17)
S("kmwy", 60)
S("klpw", 92)
S("hnrt", 24)
S("aivz", 29)
S("prst", 24)
S("cdlm", 86)
S("alvz", 13)
S("mnxz", 52)

Still unsorted but visited by word order:
S("aivz", 29)
S("alvz", 13)
S("bfmp", 78)
S("cdlm", 86)
S("hnrt", 24)
S("imsx", 17)
S("klpw", 92)
S("kmwy", 60)
S("mnxz", 52)
S("prst", 24)

Still unsorted but visited by frequency order:
S("alvz", 13)
S("imsx", 17)
S("hnrt", 24)
S("prst", 24)
S("aivz", 29)
S("mnxz", 52)
S("kmwy", 60)
S("bfmp", 78)
S("cdlm", 86)
S("klpw", 92)

Actually sorted by words:
S("aivz", 29)
S("alvz", 13)
S("bfmp", 78)
S("cdlm", 86)
S("hnrt", 24)
S("imsx", 17)
S("klpw", 92)
S("kmwy", 60)
S("mnxz", 52)
S("prst", 24)

Actually sorted by frequencies:
S("alvz", 13)
S("imsx", 17)
S("hnrt", 24)
S("prst", 24)
S("aivz", 29)
S("mnxz", 52)
S("kmwy", 60)
S("bfmp", 78)
S("cdlm", 86)
S("klpw", 92)

Ali

Reply via email to