1717
1818package com .tdunning .math .stats ;
1919
20- import com .carrotsearch .randomizedtesting .annotations .Seed ;
20+ import java .io .FileWriter ;
21+ import java .io .IOException ;
22+ import java .nio .ByteBuffer ;
23+ import java .util .ArrayList ;
24+ import java .util .Arrays ;
25+ import java .util .HashMap ;
26+ import java .util .List ;
27+ import java .util .Map ;
28+ import java .util .Random ;
29+
2130import org .apache .commons .math3 .util .Pair ;
2231import org .apache .mahout .common .RandomUtils ;
2332import org .apache .mahout .math .jet .random .AbstractContinousDistribution ;
33+ import org .apache .mahout .math .jet .random .Exponential ;
2434import org .apache .mahout .math .jet .random .Uniform ;
2535import org .junit .Before ;
2636import org .junit .BeforeClass ;
2737import org .junit .Test ;
2838
29- import java .io .FileWriter ;
30- import java .io .IOException ;
31- import java .nio .ByteBuffer ;
32- import java .util .*;
39+ import com .carrotsearch .randomizedtesting .annotations .Seed ;
3340
3441//to freeze the tests with a particular seed, put the seed on the next line
3542//@Seed("84527677CF03B566:A6FF596BDDB2D59D")
@@ -59,10 +66,25 @@ protected TDigest fromBytes(ByteBuffer bytes) {
5966 return MergingDigest .fromBytes (bytes );
6067 }
6168
62-
69+ @ Test
70+ public void writeUniformAsymmetricScaleFunctionResults () {
71+ try {
72+ writeAsymmetricScaleFunctionResults (Distribution .UNIFORM );
73+ } catch (Exception e ) {
74+ e .printStackTrace ();
75+ }
76+ }
6377
6478 @ Test
65- public void writeAsymmetricScaleFunctionResults () {
79+ public void writeExponentialAsymmetricScaleFunctionResults () {
80+ try {
81+ writeAsymmetricScaleFunctionResults (Distribution .EXPONENTIAL );
82+ } catch (Exception e ) {
83+ e .printStackTrace ();
84+ }
85+ }
86+
87+ private void writeAsymmetricScaleFunctionResults (Distribution distribution ) throws Exception {
6688
6789 List <ScaleFunction > scaleFcns = Arrays .asList (ScaleFunction .K_0 , ScaleFunction .K_1 ,
6890 ScaleFunction .K_2 , ScaleFunction .K_3 , ScaleFunction .K_1_GLUED ,
@@ -79,12 +101,12 @@ public void writeAsymmetricScaleFunctionResults() {
79101 digestParams .put (fcn .toString () + "_USUAL" , new Pair <>(fcn , false ));
80102 }
81103 }
82- writeSeveralDigestUniformResults (digestParams , numTrials , "../docs/asymmetric/data/merging/" );
83-
104+ writeSeveralDigestUniformResults (digestParams , numTrials , distribution ,
105+ "../docs/asymmetric/data/merging/" + distribution . name () + "/" );
84106 }
85107
86- public void writeSeveralDigestUniformResults (Map <String , Pair <ScaleFunction , Boolean >> digestParams , int numTrials ,
87- String writeLocation ) {
108+ private void writeSeveralDigestUniformResults (Map <String , Pair <ScaleFunction , Boolean >> digestParams ,
109+ int numTrials , Distribution distribution , String writeLocation ) throws Exception {
88110
89111 int trialSize = 1_000_000 ;
90112 double compression = 100 ;
@@ -93,8 +115,12 @@ public void writeSeveralDigestUniformResults(Map<String, Pair<ScaleFunction, Boo
93115
94116 Map <String , List <Integer >> centroidCounts = new HashMap <>();
95117
118+ Map <String , List <List <Integer >>> centroidSequences = new HashMap <>();
119+
120+
96121 for (Map .Entry <String , Pair <ScaleFunction , Boolean >> entry : digestParams .entrySet ()) {
97122 centroidCounts .put (entry .getKey (), new ArrayList <Integer >());
123+ centroidSequences .put (entry .getKey (), new ArrayList <List <Integer >>());
98124 try {
99125 Map <Double , List <String >> records = new HashMap <>();
100126 for (double q : quants ) {
@@ -105,7 +131,12 @@ public void writeSeveralDigestUniformResults(Map<String, Pair<ScaleFunction, Boo
105131 digest .setScaleFunction (entry .getValue ().getFirst ());
106132 digest .setUseAlternatingSort (entry .getValue ().getSecond ());
107133 Random rand = new Random ();
108- AbstractContinousDistribution gen = new Uniform (50 , 51 , rand );
134+ AbstractContinousDistribution gen ;
135+ if (distribution .equals (Distribution .UNIFORM )) {
136+ gen = new Uniform (50 , 51 , rand );
137+ } else if (distribution .equals (Distribution .EXPONENTIAL )) {
138+ gen = new Exponential (5 , rand );
139+ } else throw new Exception ("distribution not specified" );
109140 double [] data = new double [trialSize ];
110141 for (int i = 0 ; i < trialSize ; i ++) {
111142 data [i ] = gen .nextDouble ();
@@ -121,6 +152,12 @@ public void writeSeveralDigestUniformResults(Map<String, Pair<ScaleFunction, Boo
121152 String .valueOf (Math .abs (q1 - q2 ) / Math .min (q , 1 - q )) + "\n " );
122153 }
123154 centroidCounts .get (entry .getKey ()).add (digest .centroids ().size ());
155+
156+ List <Integer > seq = new ArrayList <>();
157+ for (Centroid c : digest .centroids ()) {
158+ seq .add (c .count ());
159+ }
160+ centroidSequences .get (entry .getKey ()).add (seq );
124161 }
125162 for (double q : quants ) {
126163 FileWriter csvWriter = new FileWriter (writeLocation + entry .getKey () + "_" + String .valueOf (q ) + ".csv" );
@@ -140,6 +177,17 @@ public void writeSeveralDigestUniformResults(Map<String, Pair<ScaleFunction, Boo
140177 csvWriter .flush ();
141178 csvWriter .close ();
142179
180+
181+ FileWriter csvWriter2 = new FileWriter (writeLocation + entry .getKey () + "_centroid_sizes.csv" );
182+ for (List <Integer > ct : centroidSequences .get (entry .getKey ())) {
183+ for (Integer c : ct ) {
184+ csvWriter2 .append (c .toString ()).append ("," );
185+ }
186+ csvWriter2 .append ("\n " );
187+ }
188+ csvWriter2 .flush ();
189+ csvWriter2 .close ();
190+
143191 } catch (IOException e ) {
144192 System .out .println (e .toString ());
145193 return ;
0 commit comments