public class HyperLogLog extends Object implements NumDistinctValueEstimator
This is an implementation of the following variants of hyperloglog (HLL)
algorithm
Original - Original HLL algorithm from Flajolet et. al from
http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf
HLLNoBias - Google's implementation of bias correction based on lookup table
http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf
HLL++ - Google's implementation of HLL++ algorithm that uses SPARSE registers
http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf
Following are the constructor parameters that determines which algorithm is
used
numRegisterIndexBits - number of LSB hashcode bits to be used as register index.
Default is 14. min = 4 and max = 16
numHashBits - number of bits for hashcode. Default is 64. min = 32 and max = 128
encoding - Type of encoding to use (SPARSE or DENSE). The algorithm automatically
switches to DENSE beyond a threshold. Default: SPARSE
enableBitPacking - To enable bit packing or not. Bit packing improves compression
at the cost of more CPU cycles. Default: true
noBias - Use Google's bias table lookup for short range bias correction.
Enabling this will highly improve the estimation accuracy for short
range values. Default: true
| Modifier and Type | Class and Description |
|---|---|
static class |
HyperLogLog.EncodingType |
static class |
HyperLogLog.HyperLogLogBuilder |
LOG| Modifier and Type | Method and Description |
|---|---|
void |
add(long hashcode) |
void |
addBoolean(boolean val) |
void |
addByte(byte val) |
void |
addBytes(byte[] val) |
void |
addChar(char val) |
void |
addDouble(double val) |
void |
addFloat(float val) |
void |
addInt(int val) |
void |
addLong(long val) |
void |
addShort(short val) |
void |
addString(String val)
Java's default charset will be used for strings.
|
void |
addString(String val,
Charset charset) |
void |
addToEstimator(double d) |
void |
addToEstimator(org.apache.hadoop.hive.common.type.HiveDecimal decimal) |
void |
addToEstimator(long v) |
void |
addToEstimator(String s) |
static HyperLogLog.HyperLogLogBuilder |
builder() |
boolean |
canMerge(NumDistinctValueEstimator o) |
long |
count() |
NumDistinctValueEstimator |
deserialize(byte[] buf) |
boolean |
equals(Object obj) |
long |
estimateNumDistinctValues() |
HyperLogLog.EncodingType |
getEncoding() |
HLLDenseRegister |
getHLLDenseRegister() |
HLLSparseRegister |
getHLLSparseRegister() |
int |
getNumRegisterIndexBits() |
double |
getStandardError() |
int |
hashCode() |
int |
lengthFor(org.apache.hadoop.hive.ql.util.JavaDataModel model) |
void |
merge(HyperLogLog hll)
Merge the specified hyperloglog to the current one.
|
void |
mergeEstimators(NumDistinctValueEstimator o) |
void |
reset() |
byte[] |
serialize() |
void |
setCount(long count) |
void |
setEncoding(HyperLogLog.EncodingType encoding) |
void |
setHLLDenseRegister(byte[] reg)
Reconstruct dense registers from byte array
|
void |
setHLLSparseRegister(int[] reg)
Reconstruct sparse map from serialized integer list
|
HyperLogLog |
squash(int p0)
Reduces the accuracy of the HLL provided to a smaller size
|
String |
toString() |
String |
toStringExtended() |
public static HyperLogLog.HyperLogLogBuilder builder()
public void addBoolean(boolean val)
public void addByte(byte val)
public void addBytes(byte[] val)
public void addShort(short val)
public void addInt(int val)
public void addLong(long val)
public void addFloat(float val)
public void addDouble(double val)
public void addChar(char val)
public void addString(String val)
val - - input stringpublic void add(long hashcode)
public long estimateNumDistinctValues()
estimateNumDistinctValues in interface NumDistinctValueEstimatorpublic long count()
public void setCount(long count)
public double getStandardError()
public HLLDenseRegister getHLLDenseRegister()
public HLLSparseRegister getHLLSparseRegister()
public void setHLLSparseRegister(int[] reg)
reg - - uncompressed and delta decoded integer listpublic void setHLLDenseRegister(byte[] reg)
reg - - unpacked byte arraypublic void merge(HyperLogLog hll)
hll - - hyperloglog to be mergedIllegalArgumentExceptionpublic HyperLogLog squash(int p0)
p0 - - new p size for the new HyperLogLog (smaller or no change)public String toStringExtended()
public int getNumRegisterIndexBits()
public HyperLogLog.EncodingType getEncoding()
public void setEncoding(HyperLogLog.EncodingType encoding)
public void reset()
reset in interface NumDistinctValueEstimatorpublic byte[] serialize()
serialize in interface NumDistinctValueEstimatorpublic NumDistinctValueEstimator deserialize(byte[] buf)
deserialize in interface NumDistinctValueEstimatorpublic void addToEstimator(long v)
addToEstimator in interface NumDistinctValueEstimatorpublic void addToEstimator(String s)
addToEstimator in interface NumDistinctValueEstimatorpublic void addToEstimator(double d)
addToEstimator in interface NumDistinctValueEstimatorpublic void addToEstimator(org.apache.hadoop.hive.common.type.HiveDecimal decimal)
addToEstimator in interface NumDistinctValueEstimatorpublic void mergeEstimators(NumDistinctValueEstimator o)
mergeEstimators in interface NumDistinctValueEstimatorpublic int lengthFor(org.apache.hadoop.hive.ql.util.JavaDataModel model)
lengthFor in interface NumDistinctValueEstimatorpublic boolean canMerge(NumDistinctValueEstimator o)
canMerge in interface NumDistinctValueEstimatorCopyright © 2019 The Apache Software Foundation. All Rights Reserved.