On Friday, 21 January 2022 at 09:10:56 UTC, forkit wrote:


ok... in the interest of corecting the code I posted previously...

... here is a version that actually works in secs (for a million records), as opposed to hours!


// ---------------

/+
===================================================================== This program create a sample dataset consisting of 'random' records,
   and then outputs that dataset to a file.

   Arguments can be passed on the command line,
   or otherwise default values are used instead.

   Example of that output can be seen at the end of this code.
=====================================================================
+/

module test;
@safe:
import std.stdio : write, writef, writeln, writefln;
import std.range : iota, takeExactly;
import std.array : array, byPair, Appender, appender;
import std.random : Random, unpredictableSeed, dice, choice, uniform;
import std.algorithm : map, uniq, canFind, among;
import std.conv : to;
import std.format;
import std.stdio : File;
import std.file : exists;
import std.exception : enforce;

debug { import std; }

Random rnd;
static this() {  rnd = Random(unpredictableSeed); } // thanks Ali

void main(string[] args)
{
    int recordsNeeded, valuesPerRecord;
    string fname;

    if(args.length < 4)
    {
        //recordsNeeded = 1_000_000;
        //recordsNeeded = 100_000;
        recordsNeeded = 10;

        valuesPerRecord= 8;

        //fname = "D:/rnd_records.txt";
        fname = "./rnd_records.txt";
    }
    else
    {
        // assumes valid values being passed in ;-)
        recordsNeeded = to!int(args[1]);
        valuesPerRecord = to!int(args[2]);
        fname = args[3];
    }

    debug
{ writefln("%s records, %s values for record, will be written to file: %s", recordsNeeded, valuesPerRecord, fname); }
    else
{ enforce(!exists(fname), "Oop! That file already exists!"); }

    // id needs to be 9 digits, and needs to start with 999
int[] idArray = takeExactly(iota(999*10^^6, 10^^9), recordsNeeded).array;
    debug { writefln("idArray.length = %s", idArray.length); }

    int[][] valuesArray;
createValuesArray(valuesArray, recordsNeeded, valuesPerRecord);

int[][int][] records = CreateDataSet(idArray, valuesArray, recordsNeeded);

    ProcessRecords(records, fname);

    writefln("All done. Check if records written to %s", fname);
}

void createValuesArray
(ref int[][] valuesArray, const(int) recordsNeeded, const(int) valuesPerRecord)
{
    valuesArray = iota(recordsNeeded)
            .map!(i => iota(valuesPerRecord)
            .map!(valuesPerRecord => cast(int)rnd.dice(0.6, 1.4))
.array).array; // NOTE: does register with -profile=gc

debug { writefln("valuesArray.length = %s", valuesArray.length); }

}

int[][int][] CreateDataSet
(const(int)[] idArray, int[][] valuesArray, const(int) numRecords)
{
    int[][int][] records;
    records.reserve(numRecords);
debug { writefln("records.capacity is %s", records.capacity); }

    foreach(i, const id; idArray)
    {
        // NOTE: below does register with -profile=gc
        records ~= [ idArray[i] : valuesArray[i] ];
    }

    debug { writefln("records.length = %s", records.length); }

    return records.dup;
}

void ProcessRecords
(in int[][int][] recArray, const(string) fname)
{
    auto file = File(fname, "w");
    scope(exit) file.close;

    Appender!string bigString = appender!string;
    bigString.reserve(recArray.length);
debug { writefln("bigString.capacity is %s", bigString.capacity); }

    // NOTE: forward declaration required for this nested function
    void processRecord(const(int) id, const(int)[] values)
    {
        // NOTE: below does register with -profile=gc
bigString ~= id.to!string ~ "," ~ values.format!"%(%s,%)" ~ "\n";
    }

    foreach(ref const record; recArray)
    {
        foreach (ref rp; record.byPair)
        {
            processRecord(rp.expand);
        }
    }

    file.write(bigString[]);
}

/+
sample file output:

9992511730,1,0,1,0,1,0,1
9995369731,1,1,1,1,1,1,1
9993136031,1,0,0,0,1,0,0
9998979051,1,1,1,1,0,1,1
9998438090,1,1,0,1,1,0,0
9995132750,0,0,1,0,1,1,1
9997123630,0,1,1,1,0,1,1
9998351590,1,0,0,1,1,1,1
9991454121,1,1,1,1,1,0,1
9997673520,1,1,1,1,1,1,1

+/

// ---------------

Reply via email to