Skip to content

Commit 0ea7ad6

Browse files
committed
Add support for HSQLDB store
1 parent 91f43ee commit 0ea7ad6

File tree

23 files changed

+657
-141
lines changed

23 files changed

+657
-141
lines changed

aiservices/openai/src/main/java/com/microsoft/semantickernel/aiservices/openai/textembedding/OpenAITextEmbeddingGenerationService.java

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,35 +8,37 @@
88
import com.microsoft.semantickernel.aiservices.openai.OpenAiService;
99
import com.microsoft.semantickernel.exceptions.AIException;
1010
import com.microsoft.semantickernel.services.openai.OpenAiServiceBuilder;
11-
import com.microsoft.semantickernel.services.textcompletion.TextGenerationService;
1211
import com.microsoft.semantickernel.services.textembedding.Embedding;
1312
import com.microsoft.semantickernel.services.textembedding.TextEmbeddingGenerationService;
13+
import java.util.ArrayList;
14+
import java.util.Arrays;
15+
import java.util.List;
16+
import javax.annotation.Nullable;
1417
import org.slf4j.Logger;
1518
import org.slf4j.LoggerFactory;
1619
import reactor.core.publisher.Mono;
1720

18-
import javax.annotation.Nullable;
19-
import java.util.ArrayList;
20-
import java.util.List;
21-
2221
/**
2322
* An OpenAI implementation of a {@link TextEmbeddingGenerationService}.
24-
*
2523
*/
2624
public class OpenAITextEmbeddingGenerationService extends OpenAiService<OpenAIAsyncClient>
2725
implements TextEmbeddingGenerationService {
26+
2827
private static final Logger LOGGER = LoggerFactory
2928
.getLogger(OpenAITextEmbeddingGenerationService.class);
3029
private static final int DEFAULT_DIMENSIONS = 1536;
3130
private final int dimensions;
3231

32+
public static final int EMBEDDING_DIMENSIONS_SMALL = 1536;
33+
public static final int EMBEDDING_DIMENSIONS_LARGE = 3072;
34+
3335
/**
3436
* Creates a new {@link OpenAITextEmbeddingGenerationService}.
3537
*
36-
* @param client OpenAI client
38+
* @param client OpenAI client
3739
* @param deploymentName deployment name
38-
* @param modelId OpenAI model id
39-
* @param serviceId Service id
40+
* @param modelId OpenAI model id
41+
* @param serviceId Service id
4042
*/
4143
public OpenAITextEmbeddingGenerationService(
4244
OpenAIAsyncClient client,
@@ -57,6 +59,24 @@ public static Builder builder() {
5759
return new Builder();
5860
}
5961

62+
/**
63+
* Generates embeddings for the given data.
64+
*
65+
* @param data The data to generate embeddings for.
66+
* @return A Mono that completes with the embeddings.
67+
*/
68+
@Override
69+
public Mono<Embedding> generateEmbeddingAsync(String data) {
70+
return this.internalGenerateTextEmbeddingsAsync(Arrays.asList(data))
71+
.flatMap(embeddings -> {
72+
if (embeddings.isEmpty()) {
73+
return Mono.empty();
74+
}
75+
76+
return Mono.just(embeddings.get(0));
77+
});
78+
}
79+
6080
/**
6181
* Generates embeddings for the given data.
6282
*
@@ -88,6 +108,7 @@ protected Mono<List<Embedding>> internalGenerateTextEmbeddingsAsync(List<String>
88108
*/
89109
public static class Builder extends
90110
OpenAiServiceBuilder<OpenAIAsyncClient, OpenAITextEmbeddingGenerationService, OpenAITextEmbeddingGenerationService.Builder> {
111+
91112
private int dimensions = DEFAULT_DIMENSIONS;
92113

93114
/**

api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/jdbc/Hotel.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordDataAttribute;
66
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordKeyAttribute;
77
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordVectorAttribute;
8+
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
89

910
import java.util.List;
1011

@@ -23,19 +24,19 @@ public class Hotel {
2324
private final String description;
2425

2526
@JsonProperty("summaryEmbedding1")
26-
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = "euclidean")
27+
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
2728
private final List<Float> euclidean;
2829

2930
@JsonProperty("summaryEmbedding2")
30-
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = "cosineDistance")
31+
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = DistanceFunction.COSINE_DISTANCE)
3132
private final List<Float> cosineDistance;
3233

3334
@JsonProperty("summaryEmbedding3")
34-
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = "dotProduct")
35+
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = DistanceFunction.DOT_PRODUCT)
3536
private final List<Float> dotProduct;
3637

3738
@JsonProperty("indexedSummaryEmbedding")
38-
@VectorStoreRecordVectorAttribute(dimensions = 8, indexKind = "hnsw", distanceFunction = "euclidean")
39+
@VectorStoreRecordVectorAttribute(dimensions = 8, indexKind = "hnsw", distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
3940
private final List<Float> indexedEuclidean;
4041
@VectorStoreRecordDataAttribute
4142
private double rating;

api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/jdbc/JDBCVectorStoreRecordCollectionTest.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ private JDBCVectorStoreRecordCollection<Hotel> buildRecordCollection(QueryProvid
7979
throw new IllegalArgumentException("Unknown query provider: " + provider);
8080
}
8181

82-
8382
JDBCVectorStoreRecordCollection<Hotel> recordCollection = new JDBCVectorStoreRecordCollection<>(
8483
dataSource,
8584
collectionName,

api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/redis/Hotel.java

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@
55
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordDataAttribute;
66
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordKeyAttribute;
77
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordVectorAttribute;
8-
8+
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
99
import java.util.List;
1010

1111
public class Hotel {
12+
1213
@VectorStoreRecordKeyAttribute
1314
private final String id;
1415

@@ -23,15 +24,15 @@ public class Hotel {
2324
private final String description;
2425

2526
@JsonProperty("summaryEmbedding1")
26-
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = "euclidean")
27+
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
2728
private final List<Float> euclidean;
2829

2930
@JsonProperty("summaryEmbedding2")
30-
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = "cosineDistance")
31+
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = DistanceFunction.COSINE_DISTANCE)
3132
private final List<Float> cosineDistance;
3233

3334
@JsonProperty("summaryEmbedding3")
34-
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = "dotProduct")
35+
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = DistanceFunction.DOT_PRODUCT)
3536
private final List<Float> dotProduct;
3637
@VectorStoreRecordDataAttribute
3738
private double rating;
@@ -42,14 +43,14 @@ public Hotel() {
4243

4344
@JsonCreator
4445
public Hotel(
45-
@JsonProperty("id") String id,
46-
@JsonProperty("name") String name,
47-
@JsonProperty("code") int code,
48-
@JsonProperty("summary") String description,
49-
@JsonProperty("summaryEmbedding1") List<Float> euclidean,
50-
@JsonProperty("summaryEmbedding2") List<Float> cosineDistance,
51-
@JsonProperty("summaryEmbedding3") List<Float> dotProduct,
52-
@JsonProperty("rating") double rating) {
46+
@JsonProperty("id") String id,
47+
@JsonProperty("name") String name,
48+
@JsonProperty("code") int code,
49+
@JsonProperty("summary") String description,
50+
@JsonProperty("summaryEmbedding1") List<Float> euclidean,
51+
@JsonProperty("summaryEmbedding2") List<Float> cosineDistance,
52+
@JsonProperty("summaryEmbedding3") List<Float> dotProduct,
53+
@JsonProperty("rating") double rating) {
5354
this.id = id;
5455
this.name = name;
5556
this.code = code;

samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/chatcompletion/Example63_ChatCompletionPrompts.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public class Example63_ChatCompletionPrompts {
1919
// Only required if AZURE_CLIENT_KEY is set
2020
private static final String CLIENT_ENDPOINT = System.getenv("CLIENT_ENDPOINT");
2121
private static final String MODEL_ID = System.getenv()
22-
.getOrDefault("MODEL_ID", "gpt-35-turbo");
22+
.getOrDefault("MODEL_ID", "gpt-35-turbo-2");
2323

2424
public static void main(String[] args) throws InterruptedException {
2525

samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/AzureAISearchVectorStore.java

Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,17 @@
1616
import com.microsoft.semantickernel.connectors.data.azureaisearch.AzureAISearchVectorStoreRecordCollection;
1717
import com.microsoft.semantickernel.connectors.data.azureaisearch.AzureAISearchVectorStoreRecordCollectionOptions;
1818
import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult;
19+
import com.microsoft.semantickernel.data.vectorsearch.queries.VectorSearchQuery;
1920
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordDataAttribute;
2021
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordKeyAttribute;
2122
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordVectorAttribute;
23+
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
2224
import java.nio.charset.StandardCharsets;
2325
import java.util.Arrays;
2426
import java.util.Base64;
25-
import java.util.Collections;
2627
import java.util.List;
2728
import java.util.Map;
2829
import java.util.stream.Collectors;
29-
3030
import reactor.core.publisher.Flux;
3131
import reactor.core.publisher.Mono;
3232

@@ -48,29 +48,27 @@ public class AzureAISearchVectorStore {
4848
private static final int EMBEDDING_DIMENSIONS = 1536;
4949

5050
static class GitHubFile {
51+
5152
@JsonProperty("fileId") // Set a different name for the storage field if needed
5253
@VectorStoreRecordKeyAttribute()
5354
private final String id;
5455
@VectorStoreRecordDataAttribute()
56+
@VectorStoreRecordVectorAttribute(distanceFunction = DistanceFunction.COSINE_DISTANCE, dimensions = EMBEDDING_DIMENSIONS)
5557
private final String description;
5658
@VectorStoreRecordDataAttribute
5759
private final String link;
58-
@VectorStoreRecordVectorAttribute(dimensions = EMBEDDING_DIMENSIONS, indexKind = "Hnsw")
59-
private final List<Float> embedding;
6060

6161
public GitHubFile() {
62-
this(null, null, null, Collections.emptyList());
62+
this(null, null, null);
6363
}
6464

6565
public GitHubFile(
6666
@JsonProperty("fileId") String id,
6767
@JsonProperty("description") String description,
68-
@JsonProperty("link") String link,
69-
@JsonProperty("embedding") List<Float> embedding) {
68+
@JsonProperty("link") String link) {
7069
this.id = id;
7170
this.description = description;
7271
this.link = link;
73-
this.embedding = embedding;
7472
}
7573

7674
static String encodeId(String realId) {
@@ -157,8 +155,7 @@ private static Mono<List<VectorSearchResult<GitHubFile>>> search(
157155
AzureAISearchVectorStoreRecordCollection<GitHubFile> recordCollection,
158156
OpenAITextEmbeddingGenerationService embeddingGeneration) {
159157

160-
return embeddingGeneration.generateEmbeddingsAsync(Collections.singletonList(searchText))
161-
.flatMap(r -> recordCollection.searchAsync(r.get(0).getVector(), null));
158+
return recordCollection.searchAsync(VectorSearchQuery.createQuery(searchText, null));
162159
}
163160

164161
private static Mono<List<String>> storeData(
@@ -170,16 +167,12 @@ private static Mono<List<String>> storeData(
170167
.flatMap(entry -> {
171168
System.out.println("Save '" + entry.getKey() + "' to memory.");
172169

173-
return embeddingGeneration
174-
.generateEmbeddingsAsync(Collections.singletonList(entry.getValue()))
175-
.flatMap(embeddings -> {
176-
GitHubFile gitHubFile = new GitHubFile(
177-
GitHubFile.encodeId(entry.getKey()),
178-
entry.getValue(),
179-
entry.getKey(),
180-
embeddings.get(0).getVector());
181-
return recordCollection.upsertAsync(gitHubFile, null);
182-
});
170+
GitHubFile gitHubFile = new GitHubFile(
171+
GitHubFile.encodeId(entry.getKey()),
172+
entry.getValue(),
173+
entry.getKey());
174+
175+
return recordCollection.upsertAsync(gitHubFile, null);
183176
})
184177
.collectList();
185178
}

samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/InMemory_DataStorage.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import java.util.List;
1818
import java.util.Map;
1919
import java.util.stream.Collectors;
20+
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
2021
import reactor.core.publisher.Flux;
2122
import reactor.core.publisher.Mono;
2223

@@ -41,7 +42,7 @@ static class GitHubFile {
4142
private final String description;
4243
@VectorStoreRecordDataAttribute
4344
private final String link;
44-
@VectorStoreRecordVectorAttribute(dimensions = EMBEDDING_DIMENSIONS, indexKind = "Hnsw")
45+
@VectorStoreRecordVectorAttribute(dimensions = EMBEDDING_DIMENSIONS, indexKind = "Hnsw", distanceFunction = DistanceFunction.COSINE_DISTANCE)
4546
private final List<Float> embedding;
4647

4748
public GitHubFile(

samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/JDBC_DataStorage.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordDataAttribute;
1515
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordKeyAttribute;
1616
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordVectorAttribute;
17+
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
1718
import com.mysql.cj.jdbc.MysqlDataSource;
1819
import java.nio.charset.StandardCharsets;
1920
import java.sql.SQLException;
@@ -49,7 +50,7 @@ static class GitHubFile {
4950
private final String description;
5051
@VectorStoreRecordDataAttribute
5152
private final String link;
52-
@VectorStoreRecordVectorAttribute(dimensions = EMBEDDING_DIMENSIONS, indexKind = "Hnsw")
53+
@VectorStoreRecordVectorAttribute(dimensions = EMBEDDING_DIMENSIONS, indexKind = "Hnsw", distanceFunction = DistanceFunction.COSINE_DISTANCE)
5354
private final List<Float> embedding;
5455

5556
public GitHubFile() {

samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/Redis_DataStorage.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import java.util.Map;
2424
import java.util.stream.Collectors;
2525

26+
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
2627
import reactor.core.publisher.Flux;
2728
import reactor.core.publisher.Mono;
2829
import redis.clients.jedis.JedisPooled;
@@ -51,7 +52,7 @@ public static class GitHubFile {
5152
private final String description;
5253
@VectorStoreRecordDataAttribute
5354
private final String link;
54-
@VectorStoreRecordVectorAttribute(dimensions = EMBEDDING_DIMENSIONS, indexKind = "Hnsw")
55+
@VectorStoreRecordVectorAttribute(dimensions = EMBEDDING_DIMENSIONS, indexKind = "Hnsw", distanceFunction = DistanceFunction.COSINE_DISTANCE)
5556
private final List<Float> embedding;
5657

5758
public GitHubFile() {
Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,30 @@
11
// Copyright (c) Microsoft. All rights reserved.
22
package com.microsoft.semantickernel.services.textembedding;
33

4-
import com.microsoft.semantickernel.builders.SemanticKernelBuilder;
54
import com.microsoft.semantickernel.services.AIService;
6-
import reactor.core.publisher.Mono;
7-
85
import java.util.List;
6+
import reactor.core.publisher.Mono;
97

10-
/** Interface for text embedding generation services */
8+
/**
9+
* Interface for text embedding generation services
10+
*/
1111
public interface EmbeddingGenerationService<TValue> extends AIService {
12+
1213
/**
1314
* Generates a list of embeddings associated to the data
1415
*
1516
* @param data List of texts to generate embeddings for
1617
* @return List of embeddings of each data point
1718
*/
1819
Mono<List<Embedding>> generateEmbeddingsAsync(List<TValue> data);
20+
21+
/**
22+
* Generates an embedding associated to the data
23+
*
24+
* @param data Text to generate embedding for
25+
* @return Embedding of the data
26+
*/
27+
28+
Mono<Embedding> generateEmbeddingAsync(TValue data);
29+
1930
}

0 commit comments

Comments
 (0)