Class LlamaService

java.lang.Object
chat.octet.model.LlamaService

public class LlamaService extends Object
Llama.cpp API

C++ source: llamajava.h, llamajava.cpp

Since:
b1345
Author:
William
  • Constructor Details

    • LlamaService

      public LlamaService()
  • Method Details

    • initNative

      public static void initNative()
    • getLlamaModelDefaultParams

      public static LlamaModelParams getLlamaModelDefaultParams()
    • getLlamaContextDefaultParams

      public static LlamaContextParams getLlamaContextDefaultParams()
    • llamaBackendInit

      public static void llamaBackendInit(boolean numa)
    • llamaBackendFree

      public static void llamaBackendFree()
    • loadLlamaModelFromFile

      public static void loadLlamaModelFromFile(String modelPath, LlamaModelParams params) throws ModelException
      Throws:
      ModelException
    • createNewContextWithModel

      public static void createNewContextWithModel(LlamaContextParams params) throws ModelException
      Throws:
      ModelException
    • release

      public static void release()
    • getMaxDevices

      public static int getMaxDevices()
    • isMmapSupported

      public static boolean isMmapSupported()
    • isMlockSupported

      public static boolean isMlockSupported()
    • getVocabSize

      public static int getVocabSize()
    • getContextSize

      public static int getContextSize()
    • getEmbeddingSize

      public static int getEmbeddingSize()
    • getVocabType

      public static int getVocabType()
    • loadLoraModelFromFile

      public static int loadLoraModelFromFile(String loraPath, float loraScale, String baseModelPath, int threads) throws ModelException
      Throws:
      ModelException
    • getLogits

      public static float[] getLogits(int index)
    • getEmbeddings

      public static float[] getEmbeddings()
    • getTokenText

      public static String getTokenText(int token)
    • getTokenScore

      public static float getTokenScore(int token)
    • getTokenType

      public static int getTokenType(int token)
    • getTokenBOS

      public static int getTokenBOS()
    • getTokenEOS

      public static int getTokenEOS()
    • getTokenNL

      public static int getTokenNL()
    • tokenize

      public static int tokenize(byte[] buf, int bufferLength, int[] tokens, int maxTokens, boolean addBos)
    • tokenToPiece

      public static int tokenToPiece(int token, byte[] buf, int bufferLength)
    • getSamplingMetrics

      public static Metrics getSamplingMetrics(boolean reset)
    • getSystemInfo

      public static String getSystemInfo()
    • sampling

      public static int sampling(float[] logits, int[] lastTokens, int lastTokensSize, float penalty, float alphaFrequency, float alphaPresence, boolean penalizeNL, int mirostatMode, float mirostatTAU, float mirostatETA, float temperature, int topK, float topP, float tsf, float typical, int sequenceId, int pastTokenSize) throws DecodeException
      Throws:
      DecodeException
    • loadLlamaGrammar

      public static boolean loadLlamaGrammar(String grammarRules)
    • batchDecode

      public static int batchDecode(int sequenceId, int[] tokens, int inputLength, int pastTokenSize)
    • clearCache

      public static void clearCache(int sequenceId, int posStart, int posEnd)
    • clearCache

      public static void clearCache(int sequenceId)
    • tokenize

      public static int[] tokenize(String text, boolean addBos)