@@ -24,7 +24,7 @@ import java.util.UUID
2424import scala .collection .JavaConverters ._
2525
2626import org .apache .hadoop .fs .FileSystem
27- import org .apache .iceberg .{FileFormat , FileScanTask }
27+ import org .apache .iceberg .{FileFormat , FileScanTask , MetadataColumns }
2828import org .apache .spark .Partition
2929import org .apache .spark .TaskContext
3030import org .apache .spark .broadcast .Broadcast
@@ -33,13 +33,14 @@ import org.apache.spark.sql.SparkSession
3333import org .apache .spark .sql .auron .{EmptyNativeRDD , NativeConverters , NativeHelper , NativeRDD , NativeSupports , Shims }
3434import org .apache .spark .sql .auron .iceberg .IcebergScanPlan
3535import org .apache .spark .sql .catalyst .InternalRow
36+ import org .apache .spark .sql .catalyst .expressions .Literal
3637import org .apache .spark .sql .execution .LeafExecNode
3738import org .apache .spark .sql .execution .SparkPlan
3839import org .apache .spark .sql .execution .datasources .{FilePartition , PartitionedFile }
3940import org .apache .spark .sql .execution .datasources .v2 .BatchScanExec
4041import org .apache .spark .sql .execution .metric .SQLMetric
4142import org .apache .spark .sql .internal .SQLConf
42- import org .apache .spark .sql .types .StructType
43+ import org .apache .spark .sql .types .{ StringType , StructType }
4344import org .apache .spark .util .SerializableConfiguration
4445
4546import org .apache .auron .{protobuf => pb }
@@ -57,31 +58,36 @@ case class NativeIcebergTableScanExec(basedScan: BatchScanExec, plan: IcebergSca
5758 override val output = basedScan.output
5859 override val outputPartitioning = basedScan.outputPartitioning
5960
60- private lazy val readSchema : StructType = plan.readSchema
61+ private lazy val fileSchema : StructType = plan.fileSchema
62+ private lazy val partitionSchema : StructType = plan.partitionSchema
63+ private lazy val projectableSchema : StructType =
64+ StructType (fileSchema.fields ++ partitionSchema.fields)
6165 private lazy val fileTasks : Seq [FileScanTask ] = plan.fileTasks
6266
6367 private lazy val partitions : Array [FilePartition ] = buildFilePartitions()
6468 private lazy val fileSizes : Map [String , Long ] = buildFileSizes()
6569
66- private lazy val nativeFileSchema : pb.Schema = NativeConverters .convertSchema(readSchema )
70+ private lazy val nativeFileSchema : pb.Schema = NativeConverters .convertSchema(fileSchema )
6771 private lazy val nativePartitionSchema : pb.Schema =
68- NativeConverters .convertSchema(StructType ( Nil ) )
72+ NativeConverters .convertSchema(partitionSchema )
6973
7074 private lazy val caseSensitive : Boolean = SQLConf .get.caseSensitiveAnalysis
7175
7276 private lazy val fieldIndexByName : Map [String , Int ] = {
7377 if (caseSensitive) {
74- readSchema .fieldNames.zipWithIndex.toMap
78+ projectableSchema .fieldNames.zipWithIndex.toMap
7579 } else {
76- readSchema .fieldNames.map(_.toLowerCase(Locale .ROOT )).zipWithIndex.toMap
80+ projectableSchema .fieldNames.map(_.toLowerCase(Locale .ROOT )).zipWithIndex.toMap
7781 }
7882 }
7983
8084 private def fieldIndexFor (name : String ): Int = {
8185 if (caseSensitive) {
82- fieldIndexByName.getOrElse(name, readSchema .fieldIndex(name))
86+ fieldIndexByName.getOrElse(name, projectableSchema .fieldIndex(name))
8387 } else {
84- fieldIndexByName.getOrElse(name.toLowerCase(Locale .ROOT ), readSchema.fieldIndex(name))
88+ fieldIndexByName.getOrElse(
89+ name.toLowerCase(Locale .ROOT ),
90+ projectableSchema.fieldIndex(name))
8591 }
8692 }
8793
@@ -98,6 +104,7 @@ case class NativeIcebergTableScanExec(basedScan: BatchScanExec, plan: IcebergSca
98104 .setPath(filePath)
99105 .setSize(size)
100106 .setLastModifiedNs(0 )
107+ .addAllPartitionValues(metadataPartitionValues(filePath).asJava)
101108 .setRange(
102109 pb.FileRange
103110 .newBuilder()
@@ -112,6 +119,17 @@ case class NativeIcebergTableScanExec(basedScan: BatchScanExec, plan: IcebergSca
112119 .build()
113120 }
114121
122+ private def metadataPartitionValues (filePath : String ): Seq [pb.ScalarValue ] =
123+ partitionSchema.fields.map { field =>
124+ field.name match {
125+ case name if name == MetadataColumns .FILE_PATH .name() =>
126+ NativeConverters .convertExpr(Literal .create(filePath, StringType )).getLiteral
127+ case name =>
128+ throw new IllegalStateException (
129+ s " unsupported Iceberg metadata column in native scan: $name" )
130+ }
131+ }
132+
115133 override def doExecuteNative (): NativeRDD = {
116134 if (partitions.isEmpty) {
117135 return new EmptyNativeRDD (sparkContext)
0 commit comments