/*** HELP START ***//*
 
## >>> `quickSortHashSDDV()` subroutine: <<< <a name="quicksorthashsddv-subroutine"></a> #######################  

The **quickSortHashSDDV()** subroutine is an alternative to the 
`CALL SORTN()` subroutine for 1-based big arrays (i.e. `> 10'000'000` elements) 
when memory used by `call sortn()` may be an issue. 
For smaller arrays the memory footprint is not significant.

The subroutine is based on an iterative quick sort algorithm 
implemented in the `qsortInCbyProcProto()` *C* prototype function.

The number of "sparse distinct data values" (argument `SDDV`) may 
be adjusted to use the hash sort instead of the quick sort.
  E.g. when number of unique values for sorting is less then 
  some *N* then an ordered hash table is used to store the data 
  and their count and sort them.

*Caution!* Array parameters to subroutine calls *must* be 1-based.

*Note!* Due to improper memory reporting/releasing for hash 
  tables in FCMP procedure the report memory used after running 
  the function may not be in line with the RAM memory required 
  for processing.
  
### SYNTAX: ###################################################################

The basic syntax is the following, the `<...>` means optional parameters:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~sas
call quickSortHashSDDV(A, SDDV)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

**Arguments description**:

1. `A`    - Argument is a 1-based array of numeric values.

2. `SDDV` - A number of distinct data values, e.g. 100'000.

*//*** HELP END ***/

subroutine quickSortHashSDDV(_A_[*], /* Argument is an 1-based array      */
                             SDDV    /* A number of distinct data values, e.g. 100'000 */
); 
  outargs _A_;

  /* storage for missing values */
  array NULLS[28] / NOSYMBOLS ;
  call zeromatrix(NULLS);

  /* Hash table storage is used for "sparse distinct data values" arrays */
  /* i.e. if less than 100'000 different elements then 
     values are stored in ordered hash table and counted.
   */
  declare hash H(hashexp:20, ordered:"a"); 
    rc = H.defineKey("Key");
    rc = H.defineData("Key","N");
    rc = H.defineDone();
    declare hiter IT("H");

  /* shift no missing to the right and count missing values */
  _K_ = dim(_A_);
  do _I_ = dim(_A_) to 1 by -1;
    if _A_[_I_] > .z then 
      do;
        /* hash sort for "sparse distinct data values" arrays */
        Key = _A_[_I_];
        if H.NUM_ITEMS() <= SDDV then /* the SDDV is the number of sparse distinct data values */
          do;
            if H.find() = 0 then N + 1;
                            else N = 1;
            rc = H.replace();
          end;
      
        Key = _A_[_K_];
        _A_[_K_] = _A_[_I_]; 
        if _K_ > _I_ then _A_[_I_] = Key;
        _K_ + (-1); 
      end;
    else
      do;
        NULLS[fromMissingToNumberBS(_A_[_I_])] + 1;
      end; 
  end;
  
  /* for numbers: */
  /* if "sparse distinct data values" then use hash container */
  if H.NUM_ITEMS() <= SDDV then
    do;
      J = _K_;  
      do while(IT.next() = 0);
        do I = 1 to N;
          J + 1;
          _A_[J] = Key;
        end;
      end;
      rc = H.clear(); /* clear hash table after use */
    end;
  /* otherwise use Quick Sort */
  else 
    do;
      rc = H.clear(); /* clear hash table before sorting */
      _MIN_ = -CONSTANT('BIG');
      do _I_ = 1 to _K_;
        _A_[_I_] = _MIN_;
      end;
      call qsortInCbyProcProto(_A_, _K_, dim(_A_)-1);
    end;

  
  /* for missing values */
  /* after shifting non missing values to the right 
     the _K_ indicates the number of all missing values encountered
   */
  if _K_ then
    do;
      _K_ = 0;
      do _I_ = 1 to 28;
        do _J_ = 1 to NULLS[_I_];
        _K_ + 1;
        _A_[_K_] = fromNumberToMissing(_I_);
        end;
      end;

    end;
endsub;


/*** HELP START ***//*
 
### EXAMPLES AND USECASES: ####################################################

**EXAMPLE 1.** For session with 8GB of RAM
   Array of size 250'000'000 with values in range 
   from 0 to 99'999'999 and around 10% of various 
   missing values.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~sas
  %let size = 250000000;
  options fullstimer;

  data _null_;
    array test[&size.] _temporary_ ;
    
    array m[0:27] _temporary_ 
      (._ .  .A .B .C .D .E .F .G .H .I .J .K .L 
       .M .N .O .P .Q .R .S .T .U .V .W .X .Y .Z);

    t = time();
    call streaminit(123);
    do _N_ = &size. to 1 by -1;
      _I_ + 1;
      if rand("uniform") > 0.1 then test[_I_] = int(100000000*rand("uniform"));
                               else test[_I_] = m[mod(_N_,28)];
    end;
    t = time() - t;
    put "Array population time: "  t;

    put "First 50 elements before sorting:";
    do _N_ = 1 to 20;
      put test[_N_] = @;
    end;  

    t = time();
    call quickSortHashSDDV (test, 2e4);
    t = time()-t;
    put "Sorting time: " / t=;

    put; put "First 50 elements after sorting:";
    do _N_ = 1 to 20;
      put test[_N_] = @;
    end;
  run;
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


**Example 2.** For session with 8GB of RAM
   Array of size 250'000'000 with values in range 
   from 0 to 9'999 and around 10% of various 
   missing values.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~sas
  %let size = 250000000;
  options fullstimer;

  data _null_;
    array test[&size.] _temporary_ ;
    
    array m[0:27] _temporary_ 
      (._ .  .A .B .C .D .E .F .G .H .I .J .K .L 
       .M .N .O .P .Q .R .S .T .U .V .W .X .Y .Z);

    t = time();
    call streaminit(123);
    do _N_ = &size. to 1 by -1;
      _I_ + 1;
      if rand("uniform") > 0.1 then test[_I_] = int(10000*rand("uniform"));
                               else test[_I_] = m[mod(_N_,28)];
    end;
    t = time() - t;
    put "Array population time: "  t;

    put "First 50 elements before sorting:";
    do _N_ = 1 to 20;
      put test[_N_] = @;
    end;  

    t = time();
    call quickSortHashSDDV (test, 2e4);
    t = time()-t;
    put "Sorting time: " / t=;

    put; put "First 50 elements after sorting:";
    do _N_ = 1 to 20;
      put test[_N_] = @;
    end;
  run;
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

---

*//*** HELP END ***/
