| Constructor and Description |
|---|
Reader(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path file,
org.apache.hadoop.conf.Configuration conf)
Create a new RCFile reader.
|
Reader(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path file,
int bufferSize,
org.apache.hadoop.conf.Configuration conf,
long start,
long length)
Create a new RCFile reader.
|
| Modifier and Type | Method and Description |
|---|---|
void |
close()
Close the reader.
|
protected void |
currentValueBuffer() |
BytesRefArrayWritable |
getColumn(int columnID,
BytesRefArrayWritable rest)
Fetch all data in the buffer for a given column.
|
org.apache.hadoop.io.compress.CompressionCodec |
getCompressionCodec() |
int |
getCurrentBlockLength() |
int |
getCurrentCompressedKeyLen() |
RCFile.KeyBuffer |
getCurrentKeyBufferObj()
return the KeyBuffer object used in the reader.
|
int |
getCurrentKeyLength() |
void |
getCurrentRow(BytesRefArrayWritable ret)
get the current row used,make sure called
next(LongWritable)
first. |
RCFile.ValueBuffer |
getCurrentValueBufferObj()
return the ValueBuffer object used in the reader.
|
org.apache.hadoop.io.SequenceFile.Metadata |
getMetadata()
Return the metadata (Text to Text map) that was written into the
file.
|
org.apache.hadoop.io.Text |
getMetadataValueOf(org.apache.hadoop.io.Text key)
Return the metadata value associated with the given key.
|
long |
getPosition()
Return the current byte position in the input file.
|
boolean |
hasRecordsInBuffer() |
boolean |
isCompressedRCFile() |
long |
lastSeenSyncPos()
Returns the last seen sync position.
|
boolean |
next(org.apache.hadoop.io.LongWritable readRows)
Returns how many rows we fetched with next().
|
boolean |
nextBlock() |
boolean |
nextColumnsBatch()
Deprecated.
|
protected int |
nextKeyBuffer() |
protected org.apache.hadoop.fs.FSDataInputStream |
openFile(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path file,
int bufferSize,
long length)
Override this method to specialize the type of
FSDataInputStream returned. |
void |
resetBuffer()
Resets the values which determine if there are more rows in the buffer
This can be used after one calls seek or sync, if one called next before that.
|
void |
seek(long position)
Set the current byte position in the input file.
|
void |
sync(long position)
Seek to the next sync mark past a given position.
|
boolean |
syncSeen()
Returns true iff the previous call to next passed a sync mark.
|
String |
toString()
Returns the name of the file.
|
public Reader(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path file,
org.apache.hadoop.conf.Configuration conf)
throws IOException
IOExceptionpublic Reader(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path file,
int bufferSize,
org.apache.hadoop.conf.Configuration conf,
long start,
long length)
throws IOException
IOExceptionpublic org.apache.hadoop.io.SequenceFile.Metadata getMetadata()
public org.apache.hadoop.io.Text getMetadataValueOf(org.apache.hadoop.io.Text key)
key - the metadata key to retrieveprotected org.apache.hadoop.fs.FSDataInputStream openFile(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path file,
int bufferSize,
long length)
throws IOException
FSDataInputStream returned.IOExceptionpublic long getPosition()
throws IOException
IOExceptionpublic void seek(long position)
throws IOException
The position passed must be a position returned by
RCFile.Writer.getLength() when writing this file. To seek to an
arbitrary position, use sync(long). In another
words, the current seek can only seek to the end of the file. For other
positions, use sync(long).
IOExceptionpublic void resetBuffer()
public void sync(long position)
throws IOException
IOExceptionprotected int nextKeyBuffer()
throws IOException
IOExceptionprotected void currentValueBuffer()
throws IOException
IOExceptionpublic boolean nextBlock()
throws IOException
IOExceptionpublic BytesRefArrayWritable getColumn(int columnID, BytesRefArrayWritable rest) throws IOException
nextColumnsBatch().
Calling getColumn() with not change the result of
next(LongWritable) and
getCurrentRow(BytesRefArrayWritable).columnID - the number of the column to get 0 to N-1IOException@Deprecated public boolean nextColumnsBatch() throws IOException
next(LongWritable) and
getCurrentRow(BytesRefArrayWritable)IOExceptionpublic boolean next(org.apache.hadoop.io.LongWritable readRows)
throws IOException
seek(long),
nextColumnsBatch() can change the underlying key buffer and
value buffer.IOExceptionpublic boolean hasRecordsInBuffer()
public void getCurrentRow(BytesRefArrayWritable ret) throws IOException
next(LongWritable)
first.IOExceptionpublic boolean syncSeen()
public long lastSeenSyncPos()
public boolean isCompressedRCFile()
public void close()
public RCFile.KeyBuffer getCurrentKeyBufferObj()
public RCFile.ValueBuffer getCurrentValueBufferObj()
public int getCurrentBlockLength()
public int getCurrentKeyLength()
public int getCurrentCompressedKeyLen()
public org.apache.hadoop.io.compress.CompressionCodec getCompressionCodec()
Copyright © 2019 The Apache Software Foundation. All Rights Reserved.