package io.trino.parquet.reader;

import com.google.common.base.Preconditions;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import io.airlift.slice.Slice;
import io.trino.memory.context.AggregatedMemoryContext;
import io.trino.parquet.ChunkKey;
import io.trino.parquet.ChunkReader;
import io.trino.parquet.DiskRange;
import io.trino.parquet.Field;
import io.trino.parquet.GroupField;
import io.trino.parquet.ParquetCorruptionException;
import io.trino.parquet.ParquetDataSource;
import io.trino.parquet.ParquetReaderOptions;
import io.trino.parquet.ParquetValidationUtils;
import io.trino.parquet.PrimitiveField;
import io.trino.parquet.RichColumnDescriptor;
import io.trino.parquet.predicate.Predicate;
import io.trino.parquet.reader.FilteredOffsetIndex;
import io.trino.spi.block.ArrayBlock;
import io.trino.spi.block.Block;
import io.trino.spi.block.RowBlock;
import io.trino.spi.block.RunLengthEncodedBlock;
import io.trino.spi.type.ArrayType;
import io.trino.spi.type.MapType;
import io.trino.spi.type.RowType;
import io.trino.spi.type.Type;
import it.unimi.dsi.fastutil.booleans.BooleanArrayList;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.filter2.compat.FilterCompat;
import org.apache.parquet.filter2.predicate.FilterPredicate;
import org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
import org.apache.parquet.hadoop.metadata.ColumnPath;
import org.apache.parquet.internal.column.columnindex.OffsetIndex;
import org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter;
import org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore;
import org.apache.parquet.internal.filter2.columnindex.RowRanges;
import org.apache.parquet.io.MessageColumnIO;
import org.apache.parquet.io.PrimitiveColumnIO;
import org.joda.time.DateTimeZone;

/* loaded from: input_file:io/trino/parquet/reader/ParquetReader.class */
public class ParquetReader implements Closeable {
    private static final int MAX_VECTOR_LENGTH = 1024;
    private static final int INITIAL_BATCH_SIZE = 1;
    private static final int BATCH_SIZE_GROWTH_FACTOR = 2;
    private final Optional<String> fileCreatedBy;
    private final List<BlockMetaData> blocks;
    private final Optional<List<Long>> firstRowsOfBlocks;
    private final List<PrimitiveColumnIO> columns;
    private final ParquetDataSource dataSource;
    private final DateTimeZone timeZone;
    private final AggregatedMemoryContext systemMemoryContext;
    private final Optional<FilterPredicate> filter;
    private int currentRowGroup;
    private BlockMetaData currentBlockMetadata;
    private long currentGroupRowCount;
    private Optional<Long> firstRowIndexInGroup;
    private RowRanges currentGroupRowRanges;
    private long nextRowInGroup;
    private int batchSize;
    private int nextBatchSize;
    private final PrimitiveColumnReader[] columnReaders;
    private final long[] maxBytesPerCell;
    private long maxCombinedBytesPerRow;
    private final ParquetReaderOptions options;
    private int maxBatchSize;
    private AggregatedMemoryContext currentRowGroupMemoryContext;
    private final Multimap<ChunkKey, ChunkReader> chunkReaders;
    private final List<Optional<ColumnIndexStore>> columnIndexStore;
    private final List<RowRanges> blockRowRanges;
    private final Map<ColumnPath, ColumnDescriptor> paths;

    public ParquetReader(Optional<String> optional, MessageColumnIO messageColumnIO, List<BlockMetaData> list, Optional<List<Long>> optional2, ParquetDataSource parquetDataSource, DateTimeZone dateTimeZone, AggregatedMemoryContext aggregatedMemoryContext, ParquetReaderOptions parquetReaderOptions) throws IOException {
        this(optional, messageColumnIO, list, optional2, parquetDataSource, dateTimeZone, aggregatedMemoryContext, parquetReaderOptions, null, null);
    }

    public ParquetReader(Optional<String> optional, MessageColumnIO messageColumnIO, List<BlockMetaData> list, Optional<List<Long>> optional2, ParquetDataSource parquetDataSource, DateTimeZone dateTimeZone, AggregatedMemoryContext aggregatedMemoryContext, ParquetReaderOptions parquetReaderOptions, Predicate predicate, List<Optional<ColumnIndexStore>> list2) throws IOException {
        this.currentRowGroup = -1;
        this.firstRowIndexInGroup = Optional.empty();
        this.nextBatchSize = INITIAL_BATCH_SIZE;
        this.maxBatchSize = MAX_VECTOR_LENGTH;
        this.paths = new HashMap();
        this.fileCreatedBy = (Optional) Objects.requireNonNull(optional, "fileCreatedBy is null");
        this.columns = ((MessageColumnIO) Objects.requireNonNull(messageColumnIO, "messageColumnIO is null")).getLeaves();
        this.blocks = (List) Objects.requireNonNull(list, "blocks is null");
        this.firstRowsOfBlocks = (Optional) Objects.requireNonNull(optional2, "firstRowsOfBlocks is null");
        this.dataSource = (ParquetDataSource) Objects.requireNonNull(parquetDataSource, "dataSource is null");
        this.timeZone = (DateTimeZone) Objects.requireNonNull(dateTimeZone, "timeZone is null");
        this.systemMemoryContext = (AggregatedMemoryContext) Objects.requireNonNull(aggregatedMemoryContext, "systemMemoryContext is null");
        this.currentRowGroupMemoryContext = aggregatedMemoryContext.newAggregatedMemoryContext();
        this.options = (ParquetReaderOptions) Objects.requireNonNull(parquetReaderOptions, "options is null");
        this.columnReaders = new PrimitiveColumnReader[this.columns.size()];
        this.maxBytesPerCell = new long[this.columns.size()];
        optional2.ifPresent(list3 -> {
            Preconditions.checkArgument(list.size() == list3.size(), "elements of firstRowsOfBlocks must correspond to blocks");
        });
        this.columnIndexStore = list2;
        this.blockRowRanges = listWithNulls(this.blocks.size());
        Iterator<PrimitiveColumnIO> it = this.columns.iterator();
        while (it.hasNext()) {
            ColumnDescriptor columnDescriptor = it.next().getColumnDescriptor();
            this.paths.put(ColumnPath.get(columnDescriptor.getPath()), columnDescriptor);
        }
        if (predicate == null || !parquetReaderOptions.isUseColumnIndex()) {
            this.filter = Optional.empty();
        } else {
            this.filter = predicate.toParquetFilter(dateTimeZone);
        }
        ArrayListMultimap create = ArrayListMultimap.create();
        for (int i = 0; i < list.size(); i += INITIAL_BATCH_SIZE) {
            BlockMetaData blockMetaData = list.get(i);
            for (PrimitiveColumnIO primitiveColumnIO : this.columns) {
                int id = primitiveColumnIO.getId();
                ColumnChunkMetaData columnChunkMetaData = getColumnChunkMetaData(blockMetaData, primitiveColumnIO.getColumnDescriptor());
                ColumnPath path = columnChunkMetaData.getPath();
                long rowCount = blockMetaData.getRowCount();
                long startingPos = columnChunkMetaData.getStartingPos();
                long totalSize = columnChunkMetaData.getTotalSize();
                FilteredOffsetIndex filteredOffsetIndex = getFilteredOffsetIndex(i, rowCount, path);
                if (filteredOffsetIndex == null) {
                    create.put(new ChunkKey(id, i), new DiskRange(startingPos, Math.toIntExact(totalSize)));
                } else {
                    for (FilteredOffsetIndex.OffsetRange offsetRange : filteredOffsetIndex.calculateOffsetRanges(startingPos)) {
                        create.put(new ChunkKey(id, i), new DiskRange(offsetRange.getOffset(), Math.toIntExact(offsetRange.getLength())));
                    }
                }
            }
        }
        this.chunkReaders = parquetDataSource.planRead(create);
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
        freeCurrentRowGroupBuffers();
        this.currentRowGroupMemoryContext.close();
        this.dataSource.close();
    }

    public long lastBatchStartRow() {
        return (this.firstRowIndexInGroup.orElseThrow(() -> {
            return new IllegalStateException("row index unavailable");
        }).longValue() + this.nextRowInGroup) - this.batchSize;
    }

    public int nextBatch() {
        if (this.nextRowInGroup >= this.currentGroupRowCount && !advanceToNextRowGroup()) {
            return -1;
        }
        this.batchSize = Math.min(this.nextBatchSize, this.maxBatchSize);
        this.nextBatchSize = Math.min(this.batchSize * BATCH_SIZE_GROWTH_FACTOR, MAX_VECTOR_LENGTH);
        this.batchSize = Math.toIntExact(Math.min(this.batchSize, this.currentGroupRowCount - this.nextRowInGroup));
        this.nextRowInGroup += this.batchSize;
        Arrays.stream(this.columnReaders).forEach(primitiveColumnReader -> {
            primitiveColumnReader.prepareNextRead(this.batchSize);
        });
        return this.batchSize;
    }

    private boolean advanceToNextRowGroup() {
        this.currentRowGroupMemoryContext.close();
        this.currentRowGroupMemoryContext = this.systemMemoryContext.newAggregatedMemoryContext();
        freeCurrentRowGroupBuffers();
        this.currentRowGroup += INITIAL_BATCH_SIZE;
        if (this.currentRowGroup == this.blocks.size()) {
            return false;
        }
        this.currentBlockMetadata = this.blocks.get(this.currentRowGroup);
        this.firstRowIndexInGroup = this.firstRowsOfBlocks.map(list -> {
            return (Long) list.get(this.currentRowGroup);
        });
        this.currentGroupRowCount = this.currentBlockMetadata.getRowCount();
        if (this.filter.isPresent() && this.options.isUseColumnIndex() && this.columnIndexStore.get(this.currentRowGroup).isPresent()) {
            this.currentGroupRowRanges = getRowRanges(this.filter.get(), this.currentRowGroup);
            long rowCount = this.currentGroupRowRanges.rowCount();
            if (rowCount == 0) {
                return false;
            }
            this.currentGroupRowCount = rowCount;
        }
        this.nextRowInGroup = 0L;
        initializeColumnReaders();
        return true;
    }

    private void freeCurrentRowGroupBuffers() {
        if (this.currentRowGroup < 0) {
            return;
        }
        for (int i = 0; i < this.columns.size(); i += INITIAL_BATCH_SIZE) {
            Collection collection = this.chunkReaders.get(new ChunkKey(i, this.currentRowGroup));
            if (collection != null) {
                Iterator it = collection.iterator();
                while (it.hasNext()) {
                    ((ChunkReader) it.next()).free();
                }
            }
        }
    }

    private ColumnChunk readArray(GroupField groupField) throws IOException {
        List typeParameters = groupField.getType().getTypeParameters();
        Preconditions.checkArgument(typeParameters.size() == INITIAL_BATCH_SIZE, "Arrays must have a single type parameter, found %s", typeParameters.size());
        ColumnChunk readColumnChunk = readColumnChunk(groupField.getChildren().get(0).get());
        IntArrayList intArrayList = new IntArrayList();
        BooleanArrayList booleanArrayList = new BooleanArrayList();
        ListColumnReader.calculateCollectionOffsets(groupField, intArrayList, booleanArrayList, readColumnChunk.getDefinitionLevels(), readColumnChunk.getRepetitionLevels());
        return new ColumnChunk(ArrayBlock.fromElementBlock(booleanArrayList.size(), Optional.of(booleanArrayList.toBooleanArray()), intArrayList.toIntArray(), readColumnChunk.getBlock()), readColumnChunk.getDefinitionLevels(), readColumnChunk.getRepetitionLevels());
    }

    private ColumnChunk readMap(GroupField groupField) throws IOException {
        List typeParameters = groupField.getType().getTypeParameters();
        Preconditions.checkArgument(typeParameters.size() == BATCH_SIZE_GROWTH_FACTOR, "Maps must have two type parameters, found %s", typeParameters.size());
        Block[] blockArr = new Block[typeParameters.size()];
        ColumnChunk readColumnChunk = readColumnChunk(groupField.getChildren().get(0).get());
        blockArr[0] = readColumnChunk.getBlock();
        blockArr[INITIAL_BATCH_SIZE] = readColumnChunk(groupField.getChildren().get(INITIAL_BATCH_SIZE).get()).getBlock();
        IntArrayList intArrayList = new IntArrayList();
        BooleanArrayList booleanArrayList = new BooleanArrayList();
        ListColumnReader.calculateCollectionOffsets(groupField, intArrayList, booleanArrayList, readColumnChunk.getDefinitionLevels(), readColumnChunk.getRepetitionLevels());
        return new ColumnChunk(groupField.getType().createBlockFromKeyValue(Optional.of(booleanArrayList.toBooleanArray()), intArrayList.toIntArray(), blockArr[0], blockArr[INITIAL_BATCH_SIZE]), readColumnChunk.getDefinitionLevels(), readColumnChunk.getRepetitionLevels());
    }

    private ColumnChunk readStruct(GroupField groupField) throws IOException {
        List parameters = groupField.getType().getTypeSignature().getParameters();
        Block[] blockArr = new Block[parameters.size()];
        ColumnChunk columnChunk = null;
        List<Optional<Field>> children = groupField.getChildren();
        for (int i = 0; i < parameters.size(); i += INITIAL_BATCH_SIZE) {
            Optional<Field> optional = children.get(i);
            if (optional.isPresent()) {
                columnChunk = readColumnChunk(optional.get());
                blockArr[i] = columnChunk.getBlock();
            }
        }
        for (int i2 = 0; i2 < parameters.size(); i2 += INITIAL_BATCH_SIZE) {
            if (blockArr[i2] == null) {
                blockArr[i2] = RunLengthEncodedBlock.create((Type) groupField.getType().getTypeParameters().get(i2), (Object) null, columnChunk.getBlock().getPositionCount());
            }
        }
        boolean[] booleanArray = StructColumnReader.calculateStructOffsets(groupField, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels()).toBooleanArray();
        return new ColumnChunk(RowBlock.fromFieldBlocks(booleanArray.length, Optional.of(booleanArray), blockArr), columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
    }

    private FilteredOffsetIndex getFilteredOffsetIndex(int i, long j, ColumnPath columnPath) {
        RowRanges rowRanges;
        OffsetIndex offsetIndex;
        if (!this.filter.isPresent() || (rowRanges = getRowRanges(this.filter.get(), i)) == null || rowRanges.rowCount() >= j) {
            return null;
        }
        Optional<ColumnIndexStore> optional = this.columnIndexStore.get(i);
        if (!optional.isPresent() || (offsetIndex = optional.get().getOffsetIndex(columnPath)) == null) {
            return null;
        }
        return FilteredOffsetIndex.filterOffsetIndex(offsetIndex, rowRanges, j);
    }

    private ColumnChunk readPrimitive(PrimitiveField primitiveField) throws IOException {
        RichColumnDescriptor descriptor = primitiveField.getDescriptor();
        int id = primitiveField.getId();
        PrimitiveColumnReader primitiveColumnReader = this.columnReaders[id];
        if (primitiveColumnReader.getPageReader() == null) {
            ParquetValidationUtils.validateParquet(this.currentBlockMetadata.getRowCount() > 0, "Row group has 0 rows", new Object[0]);
            ColumnChunkMetaData columnChunkMetaData = getColumnChunkMetaData(this.currentBlockMetadata, descriptor);
            primitiveColumnReader.setPageReader(createPageReader(allocateBlock(id), columnChunkMetaData, descriptor, getFilteredOffsetIndex(this.currentRowGroup, this.currentBlockMetadata.getRowCount(), columnChunkMetaData.getPath())), this.currentGroupRowRanges);
        }
        ColumnChunk readPrimitive = primitiveColumnReader.readPrimitive(primitiveField);
        long sizeInBytes = readPrimitive.getBlock().getSizeInBytes() / this.batchSize;
        if (this.maxBytesPerCell[id] < sizeInBytes) {
            this.maxCombinedBytesPerRow = (this.maxCombinedBytesPerRow - this.maxBytesPerCell[id]) + sizeInBytes;
            this.maxBatchSize = Math.toIntExact(Math.min(this.maxBatchSize, Math.max(1L, this.options.getMaxReadBlockSize().toBytes() / this.maxCombinedBytesPerRow)));
            this.maxBytesPerCell[id] = sizeInBytes;
        }
        return readPrimitive;
    }

    private PageReader createPageReader(List<Slice> list, ColumnChunkMetaData columnChunkMetaData, ColumnDescriptor columnDescriptor, OffsetIndex offsetIndex) throws IOException {
        return new ParquetColumnChunk(this.fileCreatedBy, new ColumnChunkDescriptor(columnDescriptor, columnChunkMetaData), list, offsetIndex).readAllPages();
    }

    private List<Slice> allocateBlock(int i) {
        Collection collection = this.chunkReaders.get(new ChunkKey(i, this.currentRowGroup));
        ArrayList newArrayListWithExpectedSize = Lists.newArrayListWithExpectedSize(collection.size());
        Iterator it = collection.iterator();
        while (it.hasNext()) {
            Slice read = ((ChunkReader) it.next()).read();
            this.currentRowGroupMemoryContext.newLocalMemoryContext(ParquetReader.class.getSimpleName()).setBytes(read.length());
            newArrayListWithExpectedSize.add(read);
        }
        return newArrayListWithExpectedSize;
    }

    private ColumnChunkMetaData getColumnChunkMetaData(BlockMetaData blockMetaData, ColumnDescriptor columnDescriptor) throws IOException {
        for (ColumnChunkMetaData columnChunkMetaData : blockMetaData.getColumns()) {
            if (columnChunkMetaData.getPath().equals(ColumnPath.get(columnDescriptor.getPath()))) {
                return columnChunkMetaData;
            }
        }
        throw new ParquetCorruptionException("Metadata is missing for column: %s", columnDescriptor);
    }

    private void initializeColumnReaders() {
        for (PrimitiveColumnIO primitiveColumnIO : this.columns) {
            this.columnReaders[primitiveColumnIO.getId()] = PrimitiveColumnReader.createReader(new RichColumnDescriptor(primitiveColumnIO.getColumnDescriptor(), primitiveColumnIO.getType().asPrimitiveType()), this.timeZone);
        }
    }

    public Block readBlock(Field field) throws IOException {
        return readColumnChunk(field).getBlock();
    }

    private ColumnChunk readColumnChunk(Field field) throws IOException {
        return field.getType() instanceof RowType ? readStruct((GroupField) field) : field.getType() instanceof MapType ? readMap((GroupField) field) : field.getType() instanceof ArrayType ? readArray((GroupField) field) : readPrimitive((PrimitiveField) field);
    }

    public ParquetDataSource getDataSource() {
        return this.dataSource;
    }

    public AggregatedMemoryContext getSystemMemoryContext() {
        return this.systemMemoryContext;
    }

    private static <T> List<T> listWithNulls(int i) {
        return (List) Stream.generate(() -> {
            return null;
        }).limit(i).collect(Collectors.toCollection(ArrayList::new));
    }

    private RowRanges getRowRanges(FilterPredicate filterPredicate, int i) {
        Objects.requireNonNull(filterPredicate, "filter is null");
        RowRanges rowRanges = this.blockRowRanges.get(i);
        if (rowRanges == null) {
            Optional<ColumnIndexStore> optional = this.columnIndexStore.get(i);
            if (optional.isPresent()) {
                rowRanges = ColumnIndexFilter.calculateRowRanges(FilterCompat.get(filterPredicate), optional.get(), this.paths.keySet(), this.blocks.get(i).getRowCount());
                this.blockRowRanges.set(i, rowRanges);
            }
        }
        return rowRanges;
    }
}
