Package chat.octet.model
Class LlamaService
java.lang.Object
chat.octet.model.LlamaService
Llama.cpp API
C++ source: llamajava.h, llamajava.cpp
- Since:
- b1345
- Author:
- William
-
Constructor Summary
Constructors -
Method Summary
Modifier and TypeMethodDescriptionstatic intbatchDecode(int sequenceId, int[] tokens, int inputLength, int pastTokenSize) static voidclearCache(int sequenceId) static voidclearCache(int sequenceId, int posStart, int posEnd) static voidstatic intstatic float[]static intstatic LlamaContextParamsstatic LlamaModelParamsstatic float[]getLogits(int index) static intstatic MetricsgetSamplingMetrics(boolean reset) static Stringstatic intstatic intstatic intstatic floatgetTokenScore(int token) static StringgetTokenText(int token) static intgetTokenType(int token) static intstatic intstatic voidstatic booleanstatic booleanstatic voidstatic voidllamaBackendInit(boolean numa) static booleanloadLlamaGrammar(String grammarRules) static voidloadLlamaModelFromFile(String modelPath, LlamaModelParams params) static intloadLoraModelFromFile(String loraPath, float loraScale, String baseModelPath, int threads) static voidrelease()static intsampling(float[] logits, int[] lastTokens, int lastTokensSize, float penalty, float alphaFrequency, float alphaPresence, boolean penalizeNL, int mirostatMode, float mirostatTAU, float mirostatETA, float temperature, int topK, float topP, float tsf, float typical, int sequenceId, int pastTokenSize) static inttokenize(byte[] buf, int bufferLength, int[] tokens, int maxTokens, boolean addBos) static int[]static inttokenToPiece(int token, byte[] buf, int bufferLength)
-
Constructor Details
-
LlamaService
public LlamaService()
-
-
Method Details
-
initNative
public static void initNative() -
getLlamaModelDefaultParams
-
getLlamaContextDefaultParams
-
llamaBackendInit
public static void llamaBackendInit(boolean numa) -
llamaBackendFree
public static void llamaBackendFree() -
loadLlamaModelFromFile
public static void loadLlamaModelFromFile(String modelPath, LlamaModelParams params) throws ModelException - Throws:
ModelException
-
createNewContextWithModel
- Throws:
ModelException
-
release
public static void release() -
getMaxDevices
public static int getMaxDevices() -
isMmapSupported
public static boolean isMmapSupported() -
isMlockSupported
public static boolean isMlockSupported() -
getVocabSize
public static int getVocabSize() -
getContextSize
public static int getContextSize() -
getEmbeddingSize
public static int getEmbeddingSize() -
getVocabType
public static int getVocabType() -
loadLoraModelFromFile
public static int loadLoraModelFromFile(String loraPath, float loraScale, String baseModelPath, int threads) throws ModelException - Throws:
ModelException
-
getLogits
public static float[] getLogits(int index) -
getEmbeddings
public static float[] getEmbeddings() -
getTokenText
-
getTokenScore
public static float getTokenScore(int token) -
getTokenType
public static int getTokenType(int token) -
getTokenBOS
public static int getTokenBOS() -
getTokenEOS
public static int getTokenEOS() -
getTokenNL
public static int getTokenNL() -
tokenize
public static int tokenize(byte[] buf, int bufferLength, int[] tokens, int maxTokens, boolean addBos) -
tokenToPiece
public static int tokenToPiece(int token, byte[] buf, int bufferLength) -
getSamplingMetrics
-
getSystemInfo
-
sampling
public static int sampling(float[] logits, int[] lastTokens, int lastTokensSize, float penalty, float alphaFrequency, float alphaPresence, boolean penalizeNL, int mirostatMode, float mirostatTAU, float mirostatETA, float temperature, int topK, float topP, float tsf, float typical, int sequenceId, int pastTokenSize) throws DecodeException - Throws:
DecodeException
-
loadLlamaGrammar
-
batchDecode
public static int batchDecode(int sequenceId, int[] tokens, int inputLength, int pastTokenSize) -
clearCache
public static void clearCache(int sequenceId, int posStart, int posEnd) -
clearCache
public static void clearCache(int sequenceId) -
tokenize
-