package org.apache.paimon.format.parquet;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import javax.annotation.Nullable;
import org.apache.paimon.data.InternalRow;
import org.apache.paimon.data.columnar.ColumnVector;
import org.apache.paimon.data.columnar.ColumnarRow;
import org.apache.paimon.data.columnar.ColumnarRowIterator;
import org.apache.paimon.data.columnar.VectorizedColumnBatch;
import org.apache.paimon.data.columnar.writable.WritableColumnVector;
import org.apache.paimon.format.FormatReaderFactory;
import org.apache.paimon.format.parquet.reader.ColumnReader;
import org.apache.paimon.format.parquet.reader.ParquetDecimalVector;
import org.apache.paimon.format.parquet.reader.ParquetSplitReaderUtil;
import org.apache.paimon.format.parquet.reader.ParquetTimestampVector;
import org.apache.paimon.format.parquet.type.ParquetField;
import org.apache.paimon.fs.Path;
import org.apache.paimon.options.Options;
import org.apache.paimon.reader.RecordReader;
import org.apache.paimon.shade.org.apache.parquet.ParquetReadOptions;
import org.apache.paimon.shade.org.apache.parquet.column.page.PageReadStore;
import org.apache.paimon.shade.org.apache.parquet.filter2.compat.FilterCompat;
import org.apache.paimon.shade.org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.paimon.shade.org.apache.parquet.hadoop.ParquetInputFormat;
import org.apache.paimon.shade.org.apache.parquet.io.ColumnIOFactory;
import org.apache.paimon.shade.org.apache.parquet.schema.ConversionPatterns;
import org.apache.paimon.shade.org.apache.parquet.schema.GroupType;
import org.apache.paimon.shade.org.apache.parquet.schema.MessageType;
import org.apache.paimon.shade.org.apache.parquet.schema.Type;
import org.apache.paimon.shade.org.apache.parquet.schema.Types;
import org.apache.paimon.types.ArrayType;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.DataType;
import org.apache.paimon.types.MapType;
import org.apache.paimon.types.RowType;
import org.apache.paimon.utils.Pool;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/paimon/format/parquet/ParquetReaderFactory.class */
public class ParquetReaderFactory implements FormatReaderFactory {
    private static final Logger LOG = LoggerFactory.getLogger(ParquetReaderFactory.class);
    private static final String ALLOCATION_SIZE = "parquet.read.allocation.size";
    private final Options conf;
    private final RowType projectedType;
    private final String[] projectedFields;
    private final DataType[] projectedTypes;
    private final int batchSize;
    private final FilterCompat.Filter filter;
    private final Set<Integer> unknownFieldsIndices = new HashSet();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/paimon/format/parquet/ParquetReaderFactory$ParquetReader.class */
    public class ParquetReader implements RecordReader<InternalRow> {
        private ParquetFileReader reader;
        private final MessageType requestedSchema;
        private final long totalRowCount;
        private final Pool<ParquetReaderBatch> pool;
        private long rowsReturned;
        private long totalCountLoadedSoFar;
        private long currentRowPosition;
        private long nextRowPosition;
        private ColumnReader[] columnReaders;
        private final List<ParquetField> fields;

        private ParquetReader(ParquetFileReader parquetFileReader, MessageType messageType, long j, Pool<ParquetReaderBatch> pool, List<ParquetField> list) {
            this.reader = parquetFileReader;
            this.requestedSchema = messageType;
            this.totalRowCount = j;
            this.pool = pool;
            this.rowsReturned = 0L;
            this.totalCountLoadedSoFar = 0L;
            this.currentRowPosition = 0L;
            this.nextRowPosition = 0L;
            this.fields = list;
        }

        @Override // org.apache.paimon.reader.RecordReader
        @Nullable
        public RecordReader.RecordIterator<InternalRow> readBatch() throws IOException {
            ParquetReaderBatch cachedEntry = getCachedEntry();
            if (nextBatch(cachedEntry)) {
                return cachedEntry.convertAndGetIterator(this.currentRowPosition);
            }
            cachedEntry.recycle();
            return null;
        }

        private boolean nextBatch(ParquetReaderBatch parquetReaderBatch) throws IOException {
            for (WritableColumnVector writableColumnVector : parquetReaderBatch.writableVectors) {
                writableColumnVector.reset();
            }
            parquetReaderBatch.columnarBatch.setNumRows(0);
            if (this.rowsReturned >= this.totalRowCount) {
                return false;
            }
            if (this.rowsReturned == this.totalCountLoadedSoFar) {
                readNextRowGroup();
            } else {
                this.currentRowPosition = this.nextRowPosition;
            }
            int min = (int) Math.min(ParquetReaderFactory.this.batchSize, this.totalCountLoadedSoFar - this.rowsReturned);
            for (int i = 0; i < this.columnReaders.length; i++) {
                if (this.columnReaders[i] == null) {
                    parquetReaderBatch.writableVectors[i].fillWithNulls();
                } else {
                    this.columnReaders[i].readToVector(min, parquetReaderBatch.writableVectors[i]);
                }
            }
            this.rowsReturned += min;
            this.nextRowPosition = this.currentRowPosition + min;
            parquetReaderBatch.columnarBatch.setNumRows(min);
            return true;
        }

        private void readNextRowGroup() throws IOException {
            PageReadStore readNextRowGroup = this.reader.readNextRowGroup();
            if (readNextRowGroup == null) {
                throw new IOException("expecting more rows but reached last block. Read " + this.rowsReturned + " out of " + this.totalRowCount);
            }
            List<Type> fields = this.requestedSchema.getFields();
            this.columnReaders = new ColumnReader[fields.size()];
            for (int i = 0; i < fields.size(); i++) {
                if (!ParquetReaderFactory.this.unknownFieldsIndices.contains(Integer.valueOf(i))) {
                    this.columnReaders[i] = ParquetSplitReaderUtil.createColumnReader(ParquetReaderFactory.this.projectedTypes[i], fields.get(i), this.requestedSchema.getColumns(), readNextRowGroup, this.fields.get(i), 0);
                }
            }
            this.totalCountLoadedSoFar += readNextRowGroup.getRowCount();
            if (readNextRowGroup.getRowIndexOffset().isPresent()) {
                this.currentRowPosition = readNextRowGroup.getRowIndexOffset().get().longValue();
            } else {
                if (this.reader.rowGroupsFiltered()) {
                    throw new RuntimeException("There is a bug, rowIndexOffset must be present when row groups are filtered.");
                }
                this.currentRowPosition = this.nextRowPosition;
            }
        }

        private ParquetReaderBatch getCachedEntry() throws IOException {
            try {
                return this.pool.pollEntry();
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                throw new IOException("Interrupted");
            }
        }

        @Override // org.apache.paimon.reader.RecordReader, java.io.Closeable, java.lang.AutoCloseable
        public void close() throws IOException {
            if (this.reader != null) {
                this.reader.close();
                this.reader = null;
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/paimon/format/parquet/ParquetReaderFactory$ParquetReaderBatch.class */
    public static class ParquetReaderBatch {
        private final WritableColumnVector[] writableVectors;
        protected final VectorizedColumnBatch columnarBatch;
        private final Pool.Recycler<ParquetReaderBatch> recycler;
        private final ColumnarRowIterator result;

        protected ParquetReaderBatch(Path path, WritableColumnVector[] writableColumnVectorArr, VectorizedColumnBatch vectorizedColumnBatch, Pool.Recycler<ParquetReaderBatch> recycler) {
            this.writableVectors = writableColumnVectorArr;
            this.columnarBatch = vectorizedColumnBatch;
            this.recycler = recycler;
            this.result = new ColumnarRowIterator(path, new ColumnarRow(vectorizedColumnBatch), this::recycle);
        }

        public void recycle() {
            this.recycler.recycle(this);
        }

        public RecordReader.RecordIterator<InternalRow> convertAndGetIterator(long j) {
            this.result.reset(j);
            return this.result;
        }
    }

    public ParquetReaderFactory(Options options, RowType rowType, int i, FilterCompat.Filter filter) {
        this.conf = options;
        this.projectedType = rowType;
        this.projectedFields = (String[]) rowType.getFieldNames().toArray(new String[0]);
        this.projectedTypes = (DataType[]) rowType.getFieldTypes().toArray(new DataType[0]);
        this.batchSize = i;
        this.filter = filter;
    }

    @Override // org.apache.paimon.format.FormatReaderFactory
    public ParquetReader createReader(FormatReaderFactory.Context context) throws IOException {
        ParquetReadOptions.Builder withRange = ParquetReadOptions.builder().withRange(0L, context.fileSize());
        setReadOptions(withRange);
        ParquetFileReader parquetFileReader = new ParquetFileReader(ParquetInputFile.fromPath(context.fileIO(), context.filePath()), withRange.build());
        MessageType schema = parquetFileReader.getFileMetaData().getSchema();
        MessageType clipParquetSchema = clipParquetSchema(schema);
        parquetFileReader.setRequestedSchema(clipParquetSchema);
        checkSchema(schema, clipParquetSchema);
        return new ParquetReader(parquetFileReader, clipParquetSchema, parquetFileReader.getRecordCount(), createPoolOfBatches(context.filePath(), clipParquetSchema), ParquetSplitReaderUtil.buildFieldsList(this.projectedType.getFields(), this.projectedType.getFieldNames(), new ColumnIOFactory().getColumnIO(clipParquetSchema)));
    }

    private void setReadOptions(ParquetReadOptions.Builder builder) {
        builder.useSignedStringMinMax(this.conf.getBoolean("parquet.strings.signed-min-max.enabled", false));
        builder.useDictionaryFilter(this.conf.getBoolean(ParquetInputFormat.DICTIONARY_FILTERING_ENABLED, true));
        builder.useStatsFilter(this.conf.getBoolean(ParquetInputFormat.STATS_FILTERING_ENABLED, true));
        builder.useRecordFilter(this.conf.getBoolean(ParquetInputFormat.RECORD_FILTERING_ENABLED, true));
        builder.useColumnIndexFilter(this.conf.getBoolean(ParquetInputFormat.COLUMN_INDEX_FILTERING_ENABLED, true));
        builder.usePageChecksumVerification(this.conf.getBoolean(ParquetInputFormat.PAGE_VERIFY_CHECKSUM_ENABLED, false));
        builder.useBloomFilter(this.conf.getBoolean(ParquetInputFormat.BLOOM_FILTERING_ENABLED, true));
        builder.withMaxAllocationInBytes(this.conf.getInteger(ALLOCATION_SIZE, 8388608));
        String string = this.conf.getString("parquet.read.bad.record.threshold", null);
        if (string != null) {
            builder.set("parquet.read.bad.record.threshold", string);
        }
        builder.withRecordFilter(this.filter);
    }

    /* JADX WARN: Multi-variable type inference failed */
    private MessageType clipParquetSchema(GroupType groupType) {
        Type[] typeArr = new Type[this.projectedFields.length];
        for (int i = 0; i < this.projectedFields.length; i++) {
            String str = this.projectedFields[i];
            if (groupType.containsField(str)) {
                typeArr[i] = clipParquetType(this.projectedTypes[i], groupType.getType(str));
            } else {
                LOG.warn("{} does not exist in {}, will fill the field with null.", str, groupType);
                typeArr[i] = ParquetSchemaConverter.convertToParquetType(str, this.projectedTypes[i]);
                this.unknownFieldsIndices.add(Integer.valueOf(i));
            }
        }
        return (MessageType) Types.buildMessage().addFields(typeArr).named("paimon-parquet");
    }

    private Type clipParquetType(DataType dataType, Type type) {
        switch (dataType.getTypeRoot()) {
            case ROW:
                GroupType groupType = (GroupType) type;
                ArrayList arrayList = new ArrayList();
                for (DataField dataField : ((RowType) dataType).getFields()) {
                    String name = dataField.name();
                    if (!groupType.containsField(name)) {
                        throw new RuntimeException("field " + name + " is missing");
                    }
                    arrayList.add(clipParquetType(dataField.type(), groupType.getType(name)));
                }
                return groupType.withNewFields(arrayList);
            case MAP:
                MapType mapType = (MapType) dataType;
                GroupType groupType2 = (GroupType) type;
                GroupType asGroupType = groupType2.getType("key_value").asGroupType();
                return ConversionPatterns.mapType(groupType2.getRepetition(), groupType2.getName(), "key_value", clipParquetType(mapType.getKeyType(), asGroupType.getType("key")), asGroupType.containsField("value") ? clipParquetType(mapType.getValueType(), asGroupType.getType("value")) : null);
            case ARRAY:
                GroupType groupType3 = (GroupType) type;
                return ConversionPatterns.listOfElements(groupType3.getRepetition(), groupType3.getName(), clipParquetType(((ArrayType) dataType).getElementType(), groupType3.getType("list").asGroupType().getType("element")));
            default:
                return type;
        }
    }

    private void checkSchema(MessageType messageType, MessageType messageType2) throws IOException, UnsupportedOperationException {
        if (this.projectedFields.length != messageType2.getFieldCount()) {
            throw new RuntimeException("The quality of field type is incompatible with the request schema!");
        }
        for (int i = 0; i < messageType2.getFieldCount(); i++) {
            String[] strArr = messageType2.getPaths().get(i);
            if (messageType.containsPath(strArr)) {
                if (!messageType.getColumnDescription(strArr).equals(messageType2.getColumns().get(i))) {
                    throw new UnsupportedOperationException("Schema evolution not supported.");
                }
            } else if (messageType2.getColumns().get(i).getMaxDefinitionLevel() == 0) {
                throw new IOException("Required column is missing in data file. Col: " + Arrays.toString(strArr));
            }
        }
    }

    private Pool<ParquetReaderBatch> createPoolOfBatches(Path path, MessageType messageType) {
        Pool<ParquetReaderBatch> pool = new Pool<>(1);
        pool.add(createReaderBatch(path, messageType, pool.recycler()));
        return pool;
    }

    private ParquetReaderBatch createReaderBatch(Path path, MessageType messageType, Pool.Recycler<ParquetReaderBatch> recycler) {
        WritableColumnVector[] createWritableVectors = createWritableVectors(messageType);
        return createReaderBatch(path, createWritableVectors, createVectorizedColumnBatch(createWritableVectors), recycler);
    }

    private WritableColumnVector[] createWritableVectors(MessageType messageType) {
        WritableColumnVector[] writableColumnVectorArr = new WritableColumnVector[this.projectedTypes.length];
        List<Type> fields = messageType.getFields();
        for (int i = 0; i < this.projectedTypes.length; i++) {
            writableColumnVectorArr[i] = ParquetSplitReaderUtil.createWritableColumnVector(this.batchSize, this.projectedTypes[i], fields.get(i), messageType.getColumns(), 0);
        }
        return writableColumnVectorArr;
    }

    private VectorizedColumnBatch createVectorizedColumnBatch(WritableColumnVector[] writableColumnVectorArr) {
        ColumnVector[] columnVectorArr = new ColumnVector[writableColumnVectorArr.length];
        for (int i = 0; i < writableColumnVectorArr.length; i++) {
            switch (this.projectedTypes[i].getTypeRoot()) {
                case DECIMAL:
                    columnVectorArr[i] = new ParquetDecimalVector(writableColumnVectorArr[i]);
                    break;
                case TIMESTAMP_WITHOUT_TIME_ZONE:
                case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
                    columnVectorArr[i] = new ParquetTimestampVector(writableColumnVectorArr[i]);
                    break;
                default:
                    columnVectorArr[i] = writableColumnVectorArr[i];
                    break;
            }
        }
        return new VectorizedColumnBatch(columnVectorArr);
    }

    private ParquetReaderBatch createReaderBatch(Path path, WritableColumnVector[] writableColumnVectorArr, VectorizedColumnBatch vectorizedColumnBatch, Pool.Recycler<ParquetReaderBatch> recycler) {
        return new ParquetReaderBatch(path, writableColumnVectorArr, vectorizedColumnBatch, recycler);
    }
}
