/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.graylog.shaded.opensearch22.org.apache.lucene.index;

import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import org.graylog.shaded.opensearch22.org.apache.lucene.codecs.Codec;
import org.graylog.shaded.opensearch22.org.apache.lucene.codecs.NormsProducer;
import org.graylog.shaded.opensearch22.org.apache.lucene.codecs.TermVectorsFormat;
import org.graylog.shaded.opensearch22.org.apache.lucene.codecs.TermVectorsReader;
import org.graylog.shaded.opensearch22.org.apache.lucene.codecs.TermVectorsWriter;
import org.graylog.shaded.opensearch22.org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingTermVectorsFormat;
import org.graylog.shaded.opensearch22.org.apache.lucene.search.DocIdSetIterator;
import org.graylog.shaded.opensearch22.org.apache.lucene.store.Directory;
import org.graylog.shaded.opensearch22.org.apache.lucene.store.FlushInfo;
import org.graylog.shaded.opensearch22.org.apache.lucene.store.IOContext;
import org.graylog.shaded.opensearch22.org.apache.lucene.util.ByteBlockPool;
import org.graylog.shaded.opensearch22.org.apache.lucene.util.BytesRef;
import org.graylog.shaded.opensearch22.org.apache.lucene.util.IOUtils;
import org.graylog.shaded.opensearch22.org.apache.lucene.util.IntBlockPool;

final class SortingTermVectorsConsumer extends TermVectorsConsumer {

  private static final TermVectorsFormat TEMP_TERM_VECTORS_FORMAT =
      new Lucene90CompressingTermVectorsFormat(
          "TempTermVectors", "", SortingStoredFieldsConsumer.NO_COMPRESSION, 8 * 1024, 128, 10);
  TrackingTmpOutputDirectoryWrapper tmpDirectory;

  SortingTermVectorsConsumer(
      final IntBlockPool.Allocator intBlockAllocator,
      final ByteBlockPool.Allocator byteBlockAllocator,
      Directory directory,
      SegmentInfo info,
      Codec codec) {
    super(intBlockAllocator, byteBlockAllocator, directory, info, codec);
  }

  @Override
  void flush(
      Map<String, TermsHashPerField> fieldsToFlush,
      final SegmentWriteState state,
      Sorter.DocMap sortMap,
      NormsProducer norms)
      throws IOException {
    super.flush(fieldsToFlush, state, sortMap, norms);
    if (tmpDirectory != null) {
      TermVectorsReader reader =
          TEMP_TERM_VECTORS_FORMAT.vectorsReader(
              tmpDirectory, state.segmentInfo, state.fieldInfos, IOContext.DEFAULT);
      // Don't pull a merge instance, since merge instances optimize for
      // sequential access while term vectors will likely be accessed in random
      // order here.
      TermVectorsWriter writer =
          codec
              .termVectorsFormat()
              .vectorsWriter(state.directory, state.segmentInfo, IOContext.DEFAULT);
      try {
        reader.checkIntegrity();
        for (int docID = 0; docID < state.segmentInfo.maxDoc(); docID++) {
          Fields vectors = reader.get(sortMap == null ? docID : sortMap.newToOld(docID));
          writeTermVectors(writer, vectors, state.fieldInfos);
        }
        writer.finish(state.segmentInfo.maxDoc());
      } finally {
        IOUtils.close(reader, writer);
        IOUtils.deleteFiles(tmpDirectory, tmpDirectory.getTemporaryFiles().values());
      }
    }
  }

  @Override
  void initTermVectorsWriter() throws IOException {
    if (writer == null) {
      IOContext context = new IOContext(new FlushInfo(lastDocID, bytesUsed.get()));
      tmpDirectory = new TrackingTmpOutputDirectoryWrapper(directory);
      writer = TEMP_TERM_VECTORS_FORMAT.vectorsWriter(tmpDirectory, info, context);
      lastDocID = 0;
    }
  }

  @Override
  public void abort() {
    try {
      super.abort();
    } finally {
      if (tmpDirectory != null) {
        IOUtils.deleteFilesIgnoringExceptions(
            tmpDirectory, tmpDirectory.getTemporaryFiles().values());
      }
    }
  }

  /**
   * Safe (but, slowish) default method to copy every vector field in the provided {@link
   * TermVectorsWriter}.
   */
  private static void writeTermVectors(
      TermVectorsWriter writer, Fields vectors, FieldInfos fieldInfos) throws IOException {
    if (vectors == null) {
      writer.startDocument(0);
      writer.finishDocument();
      return;
    }

    int numFields = vectors.size();
    if (numFields == -1) {
      // count manually! TODO: Maybe enforce that Fields.size() returns something valid?
      numFields = 0;
      for (final Iterator<String> it = vectors.iterator(); it.hasNext(); ) {
        it.next();
        numFields++;
      }
    }
    writer.startDocument(numFields);

    String lastFieldName = null;

    TermsEnum termsEnum = null;
    PostingsEnum docsAndPositionsEnum = null;

    int fieldCount = 0;
    for (String fieldName : vectors) {
      fieldCount++;
      final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldName);

      assert lastFieldName == null || fieldName.compareTo(lastFieldName) > 0
          : "lastFieldName=" + lastFieldName + " fieldName=" + fieldName;
      lastFieldName = fieldName;

      final Terms terms = vectors.terms(fieldName);
      if (terms == null) {
        // FieldsEnum shouldn't lie...
        continue;
      }

      final boolean hasPositions = terms.hasPositions();
      final boolean hasOffsets = terms.hasOffsets();
      final boolean hasPayloads = terms.hasPayloads();
      assert !hasPayloads || hasPositions;

      int numTerms = (int) terms.size();
      if (numTerms == -1) {
        // count manually. It is stupid, but needed, as Terms.size() is not a mandatory statistics
        // function
        numTerms = 0;
        termsEnum = terms.iterator();
        while (termsEnum.next() != null) {
          numTerms++;
        }
      }

      writer.startField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads);
      termsEnum = terms.iterator();

      int termCount = 0;
      while (termsEnum.next() != null) {
        termCount++;

        final int freq = (int) termsEnum.totalTermFreq();

        writer.startTerm(termsEnum.term(), freq);

        if (hasPositions || hasOffsets) {
          docsAndPositionsEnum =
              termsEnum.postings(
                  docsAndPositionsEnum, PostingsEnum.OFFSETS | PostingsEnum.PAYLOADS);
          assert docsAndPositionsEnum != null;

          final int docID = docsAndPositionsEnum.nextDoc();
          assert docID != DocIdSetIterator.NO_MORE_DOCS;
          assert docsAndPositionsEnum.freq() == freq;

          for (int posUpto = 0; posUpto < freq; posUpto++) {
            final int pos = docsAndPositionsEnum.nextPosition();
            final int startOffset = docsAndPositionsEnum.startOffset();
            final int endOffset = docsAndPositionsEnum.endOffset();

            final BytesRef payload = docsAndPositionsEnum.getPayload();

            assert !hasPositions || pos >= 0;
            writer.addPosition(pos, startOffset, endOffset, payload);
          }
        }
        writer.finishTerm();
      }
      assert termCount == numTerms;
      writer.finishField();
    }
    assert fieldCount == numFields;
    writer.finishDocument();
  }
}
