Skip to content

Commit 096d98c

Browse files
authored
fix: add rope settings during model load, fix CUDA (#821)
Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent 147cae9 commit 096d98c

File tree

7 files changed

+176
-148
lines changed

7 files changed

+176
-148
lines changed

Makefile

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@ GOVET=$(GOCMD) vet
44
BINARY_NAME=local-ai
55

66
# llama.cpp versions
7-
# Temporarly pinned to https://github.com/go-skynet/go-llama.cpp/pull/124
8-
GOLLAMA_VERSION?=562d2b5a71195627a63bb34f639e0fb0e2b2df3f
7+
GOLLAMA_VERSION?=6ba16de8e965e5aa0f32d25ef9d6149bb6586565
98

109
# gpt4all version
1110
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,13 @@
1313

1414
**LocalAI** is a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families that are compatible with the ggml format. Does not require GPU.
1515

16-
For a list of the supported model families, please see [the model compatibility table](https://localai.io/model-compatibility/index.html#model-compatibility-table).
17-
1816
In a nutshell:
1917

2018
- Local, OpenAI drop-in alternative REST API. You own your data.
2119
- NO GPU required. NO Internet access is required either
2220
- Optional, GPU Acceleration is available in `llama.cpp`-compatible LLMs. See also the [build section](https://localai.io/basics/build/index.html).
2321
- Supports multiple models:
24-
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... and more)
22+
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
2523
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
2624
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
2725
- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
@@ -31,6 +29,8 @@ In a nutshell:
3129

3230
LocalAI was created by [Ettore Di Giacinto](https://github.com/mudler/) and is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome!
3331

32+
Note that this started just as a [fun weekend project](https://localai.io/#backstory) in order to try to create the necessary pieces for a full AI assistant like `ChatGPT`: the community is growing fast and we are working hard to make it better and more stable. If you want to help, please consider contributing (see below)!
33+
3434
See the [Getting started](https://localai.io/basics/getting_started/index.html) and [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/) sections to learn how to use LocalAI. For a list of curated models check out the [model gallery](https://localai.io/models/).
3535

3636

@@ -53,6 +53,7 @@ See the [Getting started](https://localai.io/basics/getting_started/index.html)
5353
- [ ] Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351)
5454
- [ ] Enable gallery management directly from the webui.
5555
- [x] 🔥 OpenAI functions: https://github.com/go-skynet/LocalAI/issues/588
56+
- [ ] 🔥 GPTQ support: https://github.com/go-skynet/LocalAI/issues/796
5657

5758
## News
5859

api/backend/options.go

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,21 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
1515
b = c.Batch
1616
}
1717
return &pb.ModelOptions{
18-
ContextSize: int32(c.ContextSize),
19-
Seed: int32(c.Seed),
20-
NBatch: int32(b),
21-
F16Memory: c.F16,
22-
MLock: c.MMlock,
23-
NUMA: c.NUMA,
24-
Embeddings: c.Embeddings,
25-
LowVRAM: c.LowVRAM,
26-
NGPULayers: int32(c.NGPULayers),
27-
MMap: c.MMap,
28-
MainGPU: c.MainGPU,
29-
Threads: int32(c.Threads),
30-
TensorSplit: c.TensorSplit,
18+
ContextSize: int32(c.ContextSize),
19+
Seed: int32(c.Seed),
20+
NBatch: int32(b),
21+
F16Memory: c.F16,
22+
MLock: c.MMlock,
23+
RopeFreqBase: c.RopeFreqBase,
24+
RopeFreqScale: c.RopeFreqScale,
25+
NUMA: c.NUMA,
26+
Embeddings: c.Embeddings,
27+
LowVRAM: c.LowVRAM,
28+
NGPULayers: int32(c.NGPULayers),
29+
MMap: c.MMap,
30+
MainGPU: c.MainGPU,
31+
Threads: int32(c.Threads),
32+
TensorSplit: c.TensorSplit,
3133
}
3234
}
3335

extra/grpc/huggingface/backend_pb2.py

Lines changed: 18 additions & 18 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/grpc/llm/llama/llama.go

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@ type LLM struct {
1717
}
1818

1919
func (llm *LLM) Load(opts *pb.ModelOptions) error {
20-
llamaOpts := []llama.ModelOption{}
20+
llamaOpts := []llama.ModelOption{
21+
llama.WithRopeFreqBase(opts.RopeFreqBase),
22+
llama.WithRopeFreqScale(opts.RopeFreqScale),
23+
}
2124

2225
if opts.ContextSize != 0 {
2326
llamaOpts = append(llamaOpts, llama.SetContext(int(opts.ContextSize)))
@@ -56,15 +59,15 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
5659

5760
func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
5861
predictOptions := []llama.PredictOption{
59-
llama.SetTemperature(float64(opts.Temperature)),
60-
llama.SetTopP(float64(opts.TopP)),
62+
llama.SetTemperature(opts.Temperature),
63+
llama.SetTopP(opts.TopP),
6164
llama.SetTopK(int(opts.TopK)),
6265
llama.SetTokens(int(opts.Tokens)),
6366
llama.SetThreads(int(opts.Threads)),
6467
llama.WithGrammar(opts.Grammar),
65-
llama.SetRopeFreqBase(float64(opts.RopeFreqBase)),
66-
llama.SetRopeFreqScale(float64(opts.RopeFreqScale)),
67-
llama.SetNegativePromptScale(float64(opts.NegativePromptScale)),
68+
llama.SetRopeFreqBase(opts.RopeFreqBase),
69+
llama.SetRopeFreqScale(opts.RopeFreqScale),
70+
llama.SetNegativePromptScale(opts.NegativePromptScale),
6871
llama.SetNegativePrompt(opts.NegativePrompt),
6972
}
7073

@@ -86,11 +89,11 @@ func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
8689
}
8790

8891
if opts.MirostatETA != 0 {
89-
predictOptions = append(predictOptions, llama.SetMirostatETA(float64(opts.MirostatETA)))
92+
predictOptions = append(predictOptions, llama.SetMirostatETA(opts.MirostatETA))
9093
}
9194

9295
if opts.MirostatTAU != 0 {
93-
predictOptions = append(predictOptions, llama.SetMirostatTAU(float64(opts.MirostatTAU)))
96+
predictOptions = append(predictOptions, llama.SetMirostatTAU(opts.MirostatTAU))
9497
}
9598

9699
if opts.Debug {
@@ -100,7 +103,7 @@ func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
100103
predictOptions = append(predictOptions, llama.SetStopWords(opts.StopPrompts...))
101104

102105
if opts.PresencePenalty != 0 {
103-
predictOptions = append(predictOptions, llama.SetPenalty(float64(opts.PresencePenalty)))
106+
predictOptions = append(predictOptions, llama.SetPenalty(opts.PresencePenalty))
104107
}
105108

106109
if opts.NKeep != 0 {
@@ -125,13 +128,13 @@ func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
125128

126129
//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
127130

128-
predictOptions = append(predictOptions, llama.SetFrequencyPenalty(float64(opts.FrequencyPenalty)))
131+
predictOptions = append(predictOptions, llama.SetFrequencyPenalty(opts.FrequencyPenalty))
129132
predictOptions = append(predictOptions, llama.SetMlock(opts.MLock))
130133
predictOptions = append(predictOptions, llama.SetMemoryMap(opts.MMap))
131134
predictOptions = append(predictOptions, llama.SetPredictionMainGPU(opts.MainGPU))
132135
predictOptions = append(predictOptions, llama.SetPredictionTensorSplit(opts.TensorSplit))
133-
predictOptions = append(predictOptions, llama.SetTailFreeSamplingZ(float64(opts.TailFreeSamplingZ)))
134-
predictOptions = append(predictOptions, llama.SetTypicalP(float64(opts.TypicalP)))
136+
predictOptions = append(predictOptions, llama.SetTailFreeSamplingZ(opts.TailFreeSamplingZ))
137+
predictOptions = append(predictOptions, llama.SetTypicalP(opts.TypicalP))
135138
return predictOptions
136139
}
137140

0 commit comments

Comments
 (0)