2121import java .util .Arrays ;
2222import java .util .TimeZone ;
2323
24+ import org .apache .parquet .bytes .ByteBufferInputStream ;
25+ import org .apache .parquet .bytes .BytesInput ;
2426import org .apache .parquet .bytes .BytesUtils ;
2527import org .apache .parquet .column .ColumnDescriptor ;
2628import org .apache .parquet .column .Dictionary ;
@@ -388,15 +390,16 @@ private void decodeDictionaryIds(
388390 * is guaranteed that num is smaller than the number of values left in the current page.
389391 */
390392
391- private void readBooleanBatch (int rowId , int num , WritableColumnVector column ) {
393+ private void readBooleanBatch (int rowId , int num , WritableColumnVector column )
394+ throws IOException {
392395 if (column .dataType () != DataTypes .BooleanType ) {
393396 throw constructConvertNotSupportedException (descriptor , column );
394397 }
395398 defColumn .readBooleans (
396399 num , column , rowId , maxDefLevel , (VectorizedValuesReader ) dataColumn );
397400 }
398401
399- private void readIntBatch (int rowId , int num , WritableColumnVector column ) {
402+ private void readIntBatch (int rowId , int num , WritableColumnVector column ) throws IOException {
400403 // This is where we implement support for the valid type conversions.
401404 // TODO: implement remaining type conversions
402405 if (column .dataType () == DataTypes .IntegerType || column .dataType () == DataTypes .DateType ||
@@ -414,7 +417,7 @@ private void readIntBatch(int rowId, int num, WritableColumnVector column) {
414417 }
415418 }
416419
417- private void readLongBatch (int rowId , int num , WritableColumnVector column ) {
420+ private void readLongBatch (int rowId , int num , WritableColumnVector column ) throws IOException {
418421 // This is where we implement support for the valid type conversions.
419422 if (column .dataType () == DataTypes .LongType ||
420423 DecimalType .is64BitDecimalType (column .dataType ()) ||
@@ -434,7 +437,7 @@ private void readLongBatch(int rowId, int num, WritableColumnVector column) {
434437 }
435438 }
436439
437- private void readFloatBatch (int rowId , int num , WritableColumnVector column ) {
440+ private void readFloatBatch (int rowId , int num , WritableColumnVector column ) throws IOException {
438441 // This is where we implement support for the valid type conversions.
439442 // TODO: support implicit cast to double?
440443 if (column .dataType () == DataTypes .FloatType ) {
@@ -445,7 +448,7 @@ private void readFloatBatch(int rowId, int num, WritableColumnVector column) {
445448 }
446449 }
447450
448- private void readDoubleBatch (int rowId , int num , WritableColumnVector column ) {
451+ private void readDoubleBatch (int rowId , int num , WritableColumnVector column ) throws IOException {
449452 // This is where we implement support for the valid type conversions.
450453 // TODO: implement remaining type conversions
451454 if (column .dataType () == DataTypes .DoubleType ) {
@@ -456,7 +459,7 @@ private void readDoubleBatch(int rowId, int num, WritableColumnVector column) {
456459 }
457460 }
458461
459- private void readBinaryBatch (int rowId , int num , WritableColumnVector column ) {
462+ private void readBinaryBatch (int rowId , int num , WritableColumnVector column ) throws IOException {
460463 // This is where we implement support for the valid type conversions.
461464 // TODO: implement remaining type conversions
462465 VectorizedValuesReader data = (VectorizedValuesReader ) dataColumn ;
@@ -556,7 +559,7 @@ public Void visit(DataPageV2 dataPageV2) {
556559 });
557560 }
558561
559- private void initDataReader (Encoding dataEncoding , byte [] bytes , int offset ) throws IOException {
562+ private void initDataReader (Encoding dataEncoding , ByteBufferInputStream in ) throws IOException {
560563 this .endOfPageValueCount = valuesRead + pageValueCount ;
561564 if (dataEncoding .usesDictionary ()) {
562565 this .dataColumn = null ;
@@ -581,7 +584,7 @@ private void initDataReader(Encoding dataEncoding, byte[] bytes, int offset) thr
581584 }
582585
583586 try {
584- dataColumn .initFromPage (pageValueCount , bytes , offset );
587+ dataColumn .initFromPage (pageValueCount , in );
585588 } catch (IOException e ) {
586589 throw new IOException ("could not read page in col " + descriptor , e );
587590 }
@@ -602,12 +605,11 @@ private void readPageV1(DataPageV1 page) throws IOException {
602605 this .repetitionLevelColumn = new ValuesReaderIntIterator (rlReader );
603606 this .definitionLevelColumn = new ValuesReaderIntIterator (dlReader );
604607 try {
605- byte [] bytes = page .getBytes ().toByteArray ();
606- rlReader .initFromPage (pageValueCount , bytes , 0 );
607- int next = rlReader .getNextOffset ();
608- dlReader .initFromPage (pageValueCount , bytes , next );
609- next = dlReader .getNextOffset ();
610- initDataReader (page .getValueEncoding (), bytes , next );
608+ BytesInput bytes = page .getBytes ();
609+ ByteBufferInputStream in = bytes .toInputStream ();
610+ rlReader .initFromPage (pageValueCount , in );
611+ dlReader .initFromPage (pageValueCount , in );
612+ initDataReader (page .getValueEncoding (), in );
611613 } catch (IOException e ) {
612614 throw new IOException ("could not read page " + page + " in col " + descriptor , e );
613615 }
@@ -619,12 +621,13 @@ private void readPageV2(DataPageV2 page) throws IOException {
619621 page .getRepetitionLevels (), descriptor );
620622
621623 int bitWidth = BytesUtils .getWidthFromMaxInt (descriptor .getMaxDefinitionLevel ());
622- this .defColumn = new VectorizedRleValuesReader (bitWidth );
624+ // do not read the length from the stream. v2 pages handle dividing the page bytes.
625+ this .defColumn = new VectorizedRleValuesReader (bitWidth , false );
623626 this .definitionLevelColumn = new ValuesReaderIntIterator (this .defColumn );
624- this .defColumn .initFromBuffer (
625- this .pageValueCount , page .getDefinitionLevels ().toByteArray ());
627+ this .defColumn .initFromPage (
628+ this .pageValueCount , page .getDefinitionLevels ().toInputStream ());
626629 try {
627- initDataReader (page .getDataEncoding (), page .getData ().toByteArray (), 0 );
630+ initDataReader (page .getDataEncoding (), page .getData ().toInputStream () );
628631 } catch (IOException e ) {
629632 throw new IOException ("could not read page " + page + " in col " + descriptor , e );
630633 }
0 commit comments