Skip to content

Commit f284238

Browse files
Daniel Weeksjulienledem
authored andcommitted
PARQUET-22: Backport of HIVE-6938 adding rename support for parquet
This patch was included in hive after the moving the Serde to hive (included in hive 0.14+). Backport is required for use with previous versions. Author: Daniel Weeks <[email protected]> Closes apache#13 from dcw-netflix/backport-hive-6938-rename and squashes the following commits: 453367b [Daniel Weeks] Backport of HIVE-6938 adding rename support for parquet
1 parent fb01048 commit f284238

File tree

1 file changed

+28
-3
lines changed

1 file changed

+28
-3
lines changed

parquet-hive/parquet-hive-storage-handler/src/main/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ public class DataWritableReadSupport extends ReadSupport<ArrayWritable> {
4545

4646
private static final String TABLE_SCHEMA = "table_schema";
4747
public static final String HIVE_SCHEMA_KEY = "HIVE_TABLE_SCHEMA";
48+
public static final String PARQUET_COLUMN_INDEX_ACCESS = "parquet.column.index.access";
4849

4950
/**
5051
* From a string which columns names (including hive column), return a list
@@ -93,7 +94,8 @@ public parquet.hadoop.api.ReadSupport.ReadContext init(final Configuration confi
9394
for (final Integer idx : indexColumnsWanted) {
9495
typeListWanted.add(tableSchema.getType(listColumns.get(idx)));
9596
}
96-
requestedSchemaByUser = new MessageType(fileSchema.getName(), typeListWanted);
97+
requestedSchemaByUser = resolveSchemaAccess(new MessageType(fileSchema.getName(),
98+
typeListWanted), fileSchema, configuration);
9799

98100
return new ReadContext(requestedSchemaByUser, contextMetadata);
99101
} else {
@@ -121,8 +123,31 @@ public RecordMaterializer<ArrayWritable> prepareForRead(final Configuration conf
121123
throw new IllegalStateException("ReadContext not initialized properly. " +
122124
"Don't know the Hive Schema.");
123125
}
124-
final MessageType tableSchema = MessageTypeParser.
125-
parseMessageType(metadata.get(HIVE_SCHEMA_KEY));
126+
final MessageType tableSchema = resolveSchemaAccess(MessageTypeParser.
127+
parseMessageType(metadata.get(HIVE_SCHEMA_KEY)), fileSchema, configuration);
128+
126129
return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema);
127130
}
131+
132+
/**
133+
* Determine the file column names based on the position within the requested columns and
134+
* use that as the requested schema.
135+
*/
136+
private MessageType resolveSchemaAccess(MessageType requestedSchema, MessageType fileSchema,
137+
Configuration configuration) {
138+
if(configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false)) {
139+
final List<String> listColumns = getColumns(configuration.get(IOConstants.COLUMNS));
140+
141+
List<Type> requestedTypes = new ArrayList<Type>();
142+
143+
for(Type t : requestedSchema.getFields()) {
144+
int index = listColumns.indexOf(t.getName());
145+
requestedTypes.add(fileSchema.getType(index));
146+
}
147+
148+
requestedSchema = new MessageType(requestedSchema.getName(), requestedTypes);
149+
}
150+
151+
return requestedSchema;
152+
}
128153
}

0 commit comments

Comments
 (0)