From 52949e251d574bdc29dded693fe04767b4236201 Mon Sep 17 00:00:00 2001
From: Ivan Despot <66276597+g-despot@users.noreply.github.com>
Date: Thu, 30 Apr 2026 15:48:26 +0200
Subject: [PATCH 1/4] Fix tokenizer
---
.../integration/TextAnalyzerITest.java | 163 ++++++++++++++++++
.../v1/api/collections/TextAnalyzer.java | 6 +-
2 files changed, 166 insertions(+), 3 deletions(-)
create mode 100644 src/it/java/io/weaviate/integration/TextAnalyzerITest.java
diff --git a/src/it/java/io/weaviate/integration/TextAnalyzerITest.java b/src/it/java/io/weaviate/integration/TextAnalyzerITest.java
new file mode 100644
index 000000000..4c6356cf3
--- /dev/null
+++ b/src/it/java/io/weaviate/integration/TextAnalyzerITest.java
@@ -0,0 +1,163 @@
+package io.weaviate.integration;
+
+import java.util.Map;
+
+import org.assertj.core.api.Assertions;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import io.weaviate.ConcurrentTest;
+import io.weaviate.client6.v1.api.WeaviateClient;
+import io.weaviate.client6.v1.api.collections.Property;
+import io.weaviate.client6.v1.api.collections.TextAnalyzer;
+import io.weaviate.client6.v1.api.collections.Tokenization;
+import io.weaviate.client6.v1.api.collections.query.Filter;
+import io.weaviate.containers.Container;
+import io.weaviate.containers.Weaviate;
+
+/**
+ * End-to-end coverage for the v1.37 per-property {@link TextAnalyzer}
+ * configuration: ASCII folding ({@code asciiFold}, {@code asciiFoldIgnore})
+ * and per-property {@code stopwordPreset}.
+ *
+ *
This test would have caught the snake_case {@code @SerializedName}
+ * regression in {@link TextAnalyzer} that was silently dropping every
+ * analyzer setting on the wire. Two layers of assertion:
+ *
+ *
+ * - Schema round-trip: read the collection back via {@code
+ * collection.config.get()} and verify the {@code textAnalyzer} field
+ * is populated with the values that were sent. If the JSON keys
+ * don't match Weaviate's struct tags, the server stores nothing and
+ * this assertion fails.
+ *
- Behavioral: insert a string and run a filter query that
+ * only succeeds when folding actually executes server-side. If the
+ * analyzer config arrived as a no-op, the filter returns zero
+ * results and the assertion fails.
+ *
+ */
+public class TextAnalyzerITest extends ConcurrentTest {
+ private static final WeaviateClient client = Container.WEAVIATE.getClient();
+
+ @BeforeClass
+ public static void __() {
+ Weaviate.Version.V137.orSkip();
+ }
+
+ @Test
+ public void testAsciiFoldRoundTripsThroughConfigAndAffectsFilters() throws Exception {
+ var nsAccent = ns("AccentFolding");
+ client.collections.create(nsAccent, c -> c
+ .properties(
+ Property.text("text_default",
+ p -> p.tokenization(Tokenization.WORD)),
+ Property.text("text_folded",
+ p -> p.tokenization(Tokenization.WORD)
+ .textAnalyzer(TextAnalyzer.of(t -> t.foldAscii(true)))),
+ Property.text("text_folded_keep_e",
+ p -> p.tokenization(Tokenization.WORD)
+ .textAnalyzer(TextAnalyzer.of(t -> t
+ .foldAscii(true)
+ .keepAscii("é"))))));
+
+ var products = client.collections.use(nsAccent);
+
+ // ---- Layer 1: schema round-trip -------------------------------------
+ var config = products.config.get();
+ Assertions.assertThat(config).isPresent();
+ var props = config.get().properties();
+
+ var textDefault = props.stream()
+ .filter(p -> p.propertyName().equals("text_default")).findFirst().orElseThrow();
+ var textFolded = props.stream()
+ .filter(p -> p.propertyName().equals("text_folded")).findFirst().orElseThrow();
+ var textFoldedKeepE = props.stream()
+ .filter(p -> p.propertyName().equals("text_folded_keep_e")).findFirst().orElseThrow();
+
+ Assertions.assertThat(textDefault.textAnalyzer())
+ .as("default property has no textAnalyzer config")
+ .isNull();
+
+ Assertions.assertThat(textFolded.textAnalyzer())
+ .as("text_folded persists asciiFold=true")
+ .isNotNull()
+ .satisfies(ta -> {
+ Assertions.assertThat(ta.foldAscii()).isTrue();
+ });
+
+ Assertions.assertThat(textFoldedKeepE.textAnalyzer())
+ .as("text_folded_keep_e persists asciiFold=true and asciiFoldIgnore=[é]")
+ .isNotNull()
+ .satisfies(ta -> {
+ Assertions.assertThat(ta.foldAscii()).isTrue();
+ Assertions.assertThat(ta.keepAscii()).containsExactly("é");
+ });
+
+ // ---- Layer 2: behavioral --------------------------------------------
+ products.data.insert(Map.of(
+ "text_default", "Café Crème Bio",
+ "text_folded", "Café Crème Bio",
+ "text_folded_keep_e", "Café Crème Bio"));
+
+ // "cafe" (lowercase, no accents) must match only the fully-folded property.
+ var defaultMatches = products.query.fetchObjects(
+ q -> q.filters(Filter.property("text_default").eq("cafe")));
+ Assertions.assertThat(defaultMatches.objects())
+ .as("text_default has no folding, 'cafe' should not match 'Café Crème Bio'")
+ .isEmpty();
+
+ var foldedMatches = products.query.fetchObjects(
+ q -> q.filters(Filter.property("text_folded").eq("cafe")));
+ Assertions.assertThat(foldedMatches.objects())
+ .as("text_folded has asciiFold=true, 'cafe' must match 'Café Crème Bio'")
+ .hasSize(1);
+
+ var keepEMatches = products.query.fetchObjects(
+ q -> q.filters(Filter.property("text_folded_keep_e").eq("cafe")));
+ Assertions.assertThat(keepEMatches.objects())
+ .as("text_folded_keep_e preserves é, 'cafe' must NOT match 'Café Crème Bio'")
+ .isEmpty();
+
+ // The exact accented form matches everywhere.
+ for (String prop : new String[] {"text_default", "text_folded", "text_folded_keep_e"}) {
+ var hits = products.query.fetchObjects(
+ q -> q.filters(Filter.property(prop).eq("Café")));
+ Assertions.assertThat(hits.objects())
+ .as("'Café' (exact) should match on %s regardless of folding", prop)
+ .hasSize(1);
+ }
+ }
+
+ @Test
+ public void testStopwordPresetRoundTripsThroughConfig() throws Exception {
+ var nsStop = ns("StopwordPreset");
+ client.collections.create(nsStop, c -> c
+ .properties(
+ Property.text("name_en",
+ p -> p.tokenization(Tokenization.WORD)
+ .textAnalyzer(TextAnalyzer.of(t -> t.stopwordPreset("en")))),
+ Property.text("name_none",
+ p -> p.tokenization(Tokenization.WORD)
+ .textAnalyzer(TextAnalyzer.of(t -> t.stopwordPreset("none"))))));
+
+ var products = client.collections.use(nsStop);
+ var config = products.config.get();
+ Assertions.assertThat(config).isPresent();
+ var props = config.get().properties();
+
+ var nameEn = props.stream()
+ .filter(p -> p.propertyName().equals("name_en")).findFirst().orElseThrow();
+ var nameNone = props.stream()
+ .filter(p -> p.propertyName().equals("name_none")).findFirst().orElseThrow();
+
+ Assertions.assertThat(nameEn.textAnalyzer())
+ .as("name_en persists stopwordPreset=en")
+ .isNotNull()
+ .satisfies(ta -> Assertions.assertThat(ta.stopwordPreset()).isEqualTo("en"));
+
+ Assertions.assertThat(nameNone.textAnalyzer())
+ .as("name_none persists stopwordPreset=none")
+ .isNotNull()
+ .satisfies(ta -> Assertions.assertThat(ta.stopwordPreset()).isEqualTo("none"));
+ }
+}
diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/TextAnalyzer.java b/src/main/java/io/weaviate/client6/v1/api/collections/TextAnalyzer.java
index 3265b63a5..045e85d4c 100644
--- a/src/main/java/io/weaviate/client6/v1/api/collections/TextAnalyzer.java
+++ b/src/main/java/io/weaviate/client6/v1/api/collections/TextAnalyzer.java
@@ -10,9 +10,9 @@
import io.weaviate.client6.v1.internal.ObjectBuilder;
public record TextAnalyzer(
- @SerializedName("ascii_fold") Boolean foldAscii,
- @SerializedName("ascii_fold_ignore") List keepAscii,
- @SerializedName("stopword_preset") String stopwordPreset) {
+ @SerializedName("asciiFold") Boolean foldAscii,
+ @SerializedName("asciiFoldIgnore") List keepAscii,
+ @SerializedName("stopwordPreset") String stopwordPreset) {
public static TextAnalyzer of() {
return null;
From 04188800d8d17ab368c8c68f170836900f081ca7 Mon Sep 17 00:00:00 2001
From: Ivan Despot <66276597+g-despot@users.noreply.github.com>
Date: Thu, 30 Apr 2026 15:52:31 +0200
Subject: [PATCH 2/4] Remove comment
---
.../integration/TextAnalyzerITest.java | 265 ++++++++----------
1 file changed, 122 insertions(+), 143 deletions(-)
diff --git a/src/it/java/io/weaviate/integration/TextAnalyzerITest.java b/src/it/java/io/weaviate/integration/TextAnalyzerITest.java
index 4c6356cf3..a6ca565e4 100644
--- a/src/it/java/io/weaviate/integration/TextAnalyzerITest.java
+++ b/src/it/java/io/weaviate/integration/TextAnalyzerITest.java
@@ -15,149 +15,128 @@
import io.weaviate.containers.Container;
import io.weaviate.containers.Weaviate;
-/**
- * End-to-end coverage for the v1.37 per-property {@link TextAnalyzer}
- * configuration: ASCII folding ({@code asciiFold}, {@code asciiFoldIgnore})
- * and per-property {@code stopwordPreset}.
- *
- * This test would have caught the snake_case {@code @SerializedName}
- * regression in {@link TextAnalyzer} that was silently dropping every
- * analyzer setting on the wire. Two layers of assertion:
- *
- *
- * - Schema round-trip: read the collection back via {@code
- * collection.config.get()} and verify the {@code textAnalyzer} field
- * is populated with the values that were sent. If the JSON keys
- * don't match Weaviate's struct tags, the server stores nothing and
- * this assertion fails.
- *
- Behavioral: insert a string and run a filter query that
- * only succeeds when folding actually executes server-side. If the
- * analyzer config arrived as a no-op, the filter returns zero
- * results and the assertion fails.
- *
- */
public class TextAnalyzerITest extends ConcurrentTest {
- private static final WeaviateClient client = Container.WEAVIATE.getClient();
-
- @BeforeClass
- public static void __() {
- Weaviate.Version.V137.orSkip();
- }
-
- @Test
- public void testAsciiFoldRoundTripsThroughConfigAndAffectsFilters() throws Exception {
- var nsAccent = ns("AccentFolding");
- client.collections.create(nsAccent, c -> c
- .properties(
- Property.text("text_default",
- p -> p.tokenization(Tokenization.WORD)),
- Property.text("text_folded",
- p -> p.tokenization(Tokenization.WORD)
- .textAnalyzer(TextAnalyzer.of(t -> t.foldAscii(true)))),
- Property.text("text_folded_keep_e",
- p -> p.tokenization(Tokenization.WORD)
- .textAnalyzer(TextAnalyzer.of(t -> t
- .foldAscii(true)
- .keepAscii("é"))))));
-
- var products = client.collections.use(nsAccent);
-
- // ---- Layer 1: schema round-trip -------------------------------------
- var config = products.config.get();
- Assertions.assertThat(config).isPresent();
- var props = config.get().properties();
-
- var textDefault = props.stream()
- .filter(p -> p.propertyName().equals("text_default")).findFirst().orElseThrow();
- var textFolded = props.stream()
- .filter(p -> p.propertyName().equals("text_folded")).findFirst().orElseThrow();
- var textFoldedKeepE = props.stream()
- .filter(p -> p.propertyName().equals("text_folded_keep_e")).findFirst().orElseThrow();
-
- Assertions.assertThat(textDefault.textAnalyzer())
- .as("default property has no textAnalyzer config")
- .isNull();
-
- Assertions.assertThat(textFolded.textAnalyzer())
- .as("text_folded persists asciiFold=true")
- .isNotNull()
- .satisfies(ta -> {
- Assertions.assertThat(ta.foldAscii()).isTrue();
- });
-
- Assertions.assertThat(textFoldedKeepE.textAnalyzer())
- .as("text_folded_keep_e persists asciiFold=true and asciiFoldIgnore=[é]")
- .isNotNull()
- .satisfies(ta -> {
- Assertions.assertThat(ta.foldAscii()).isTrue();
- Assertions.assertThat(ta.keepAscii()).containsExactly("é");
- });
-
- // ---- Layer 2: behavioral --------------------------------------------
- products.data.insert(Map.of(
- "text_default", "Café Crème Bio",
- "text_folded", "Café Crème Bio",
- "text_folded_keep_e", "Café Crème Bio"));
-
- // "cafe" (lowercase, no accents) must match only the fully-folded property.
- var defaultMatches = products.query.fetchObjects(
- q -> q.filters(Filter.property("text_default").eq("cafe")));
- Assertions.assertThat(defaultMatches.objects())
- .as("text_default has no folding, 'cafe' should not match 'Café Crème Bio'")
- .isEmpty();
-
- var foldedMatches = products.query.fetchObjects(
- q -> q.filters(Filter.property("text_folded").eq("cafe")));
- Assertions.assertThat(foldedMatches.objects())
- .as("text_folded has asciiFold=true, 'cafe' must match 'Café Crème Bio'")
- .hasSize(1);
-
- var keepEMatches = products.query.fetchObjects(
- q -> q.filters(Filter.property("text_folded_keep_e").eq("cafe")));
- Assertions.assertThat(keepEMatches.objects())
- .as("text_folded_keep_e preserves é, 'cafe' must NOT match 'Café Crème Bio'")
- .isEmpty();
-
- // The exact accented form matches everywhere.
- for (String prop : new String[] {"text_default", "text_folded", "text_folded_keep_e"}) {
- var hits = products.query.fetchObjects(
- q -> q.filters(Filter.property(prop).eq("Café")));
- Assertions.assertThat(hits.objects())
- .as("'Café' (exact) should match on %s regardless of folding", prop)
- .hasSize(1);
+ private static final WeaviateClient client = Container.WEAVIATE.getClient();
+
+ @BeforeClass
+ public static void __() {
+ Weaviate.Version.V137.orSkip();
+ }
+
+ @Test
+ public void testAsciiFoldRoundTripsThroughConfigAndAffectsFilters() throws Exception {
+ var nsAccent = ns("AccentFolding");
+ client.collections.create(nsAccent, c -> c
+ .properties(
+ Property.text("text_default",
+ p -> p.tokenization(Tokenization.WORD)),
+ Property.text("text_folded",
+ p -> p.tokenization(Tokenization.WORD)
+ .textAnalyzer(TextAnalyzer.of(t -> t.foldAscii(true)))),
+ Property.text("text_folded_keep_e",
+ p -> p.tokenization(Tokenization.WORD)
+ .textAnalyzer(TextAnalyzer.of(t -> t
+ .foldAscii(true)
+ .keepAscii("é"))))));
+
+ var products = client.collections.use(nsAccent);
+
+ // ---- Layer 1: schema round-trip -------------------------------------
+ var config = products.config.get();
+ Assertions.assertThat(config).isPresent();
+ var props = config.get().properties();
+
+ var textDefault = props.stream()
+ .filter(p -> p.propertyName().equals("text_default")).findFirst().orElseThrow();
+ var textFolded = props.stream()
+ .filter(p -> p.propertyName().equals("text_folded")).findFirst().orElseThrow();
+ var textFoldedKeepE = props.stream()
+ .filter(p -> p.propertyName().equals("text_folded_keep_e")).findFirst().orElseThrow();
+
+ Assertions.assertThat(textDefault.textAnalyzer())
+ .as("default property has no textAnalyzer config")
+ .isNull();
+
+ Assertions.assertThat(textFolded.textAnalyzer())
+ .as("text_folded persists asciiFold=true")
+ .isNotNull()
+ .satisfies(ta -> {
+ Assertions.assertThat(ta.foldAscii()).isTrue();
+ });
+
+ Assertions.assertThat(textFoldedKeepE.textAnalyzer())
+ .as("text_folded_keep_e persists asciiFold=true and asciiFoldIgnore=[é]")
+ .isNotNull()
+ .satisfies(ta -> {
+ Assertions.assertThat(ta.foldAscii()).isTrue();
+ Assertions.assertThat(ta.keepAscii()).containsExactly("é");
+ });
+
+ // ---- Layer 2: behavioral --------------------------------------------
+ products.data.insert(Map.of(
+ "text_default", "Café Crème Bio",
+ "text_folded", "Café Crème Bio",
+ "text_folded_keep_e", "Café Crème Bio"));
+
+ // "cafe" (lowercase, no accents) must match only the fully-folded property.
+ var defaultMatches = products.query.fetchObjects(
+ q -> q.filters(Filter.property("text_default").eq("cafe")));
+ Assertions.assertThat(defaultMatches.objects())
+ .as("text_default has no folding, 'cafe' should not match 'Café Crème Bio'")
+ .isEmpty();
+
+ var foldedMatches = products.query.fetchObjects(
+ q -> q.filters(Filter.property("text_folded").eq("cafe")));
+ Assertions.assertThat(foldedMatches.objects())
+ .as("text_folded has asciiFold=true, 'cafe' must match 'Café Crème Bio'")
+ .hasSize(1);
+
+ var keepEMatches = products.query.fetchObjects(
+ q -> q.filters(Filter.property("text_folded_keep_e").eq("cafe")));
+ Assertions.assertThat(keepEMatches.objects())
+ .as("text_folded_keep_e preserves é, 'cafe' must NOT match 'Café Crème Bio'")
+ .isEmpty();
+
+ // The exact accented form matches everywhere.
+ for (String prop : new String[] { "text_default", "text_folded", "text_folded_keep_e" }) {
+ var hits = products.query.fetchObjects(
+ q -> q.filters(Filter.property(prop).eq("Café")));
+ Assertions.assertThat(hits.objects())
+ .as("'Café' (exact) should match on %s regardless of folding", prop)
+ .hasSize(1);
+ }
+ }
+
+ @Test
+ public void testStopwordPresetRoundTripsThroughConfig() throws Exception {
+ var nsStop = ns("StopwordPreset");
+ client.collections.create(nsStop, c -> c
+ .properties(
+ Property.text("name_en",
+ p -> p.tokenization(Tokenization.WORD)
+ .textAnalyzer(TextAnalyzer.of(t -> t.stopwordPreset("en")))),
+ Property.text("name_none",
+ p -> p.tokenization(Tokenization.WORD)
+ .textAnalyzer(TextAnalyzer.of(t -> t.stopwordPreset("none"))))));
+
+ var products = client.collections.use(nsStop);
+ var config = products.config.get();
+ Assertions.assertThat(config).isPresent();
+ var props = config.get().properties();
+
+ var nameEn = props.stream()
+ .filter(p -> p.propertyName().equals("name_en")).findFirst().orElseThrow();
+ var nameNone = props.stream()
+ .filter(p -> p.propertyName().equals("name_none")).findFirst().orElseThrow();
+
+ Assertions.assertThat(nameEn.textAnalyzer())
+ .as("name_en persists stopwordPreset=en")
+ .isNotNull()
+ .satisfies(ta -> Assertions.assertThat(ta.stopwordPreset()).isEqualTo("en"));
+
+ Assertions.assertThat(nameNone.textAnalyzer())
+ .as("name_none persists stopwordPreset=none")
+ .isNotNull()
+ .satisfies(ta -> Assertions.assertThat(ta.stopwordPreset()).isEqualTo("none"));
}
- }
-
- @Test
- public void testStopwordPresetRoundTripsThroughConfig() throws Exception {
- var nsStop = ns("StopwordPreset");
- client.collections.create(nsStop, c -> c
- .properties(
- Property.text("name_en",
- p -> p.tokenization(Tokenization.WORD)
- .textAnalyzer(TextAnalyzer.of(t -> t.stopwordPreset("en")))),
- Property.text("name_none",
- p -> p.tokenization(Tokenization.WORD)
- .textAnalyzer(TextAnalyzer.of(t -> t.stopwordPreset("none"))))));
-
- var products = client.collections.use(nsStop);
- var config = products.config.get();
- Assertions.assertThat(config).isPresent();
- var props = config.get().properties();
-
- var nameEn = props.stream()
- .filter(p -> p.propertyName().equals("name_en")).findFirst().orElseThrow();
- var nameNone = props.stream()
- .filter(p -> p.propertyName().equals("name_none")).findFirst().orElseThrow();
-
- Assertions.assertThat(nameEn.textAnalyzer())
- .as("name_en persists stopwordPreset=en")
- .isNotNull()
- .satisfies(ta -> Assertions.assertThat(ta.stopwordPreset()).isEqualTo("en"));
-
- Assertions.assertThat(nameNone.textAnalyzer())
- .as("name_none persists stopwordPreset=none")
- .isNotNull()
- .satisfies(ta -> Assertions.assertThat(ta.stopwordPreset()).isEqualTo("none"));
- }
}
From 7605016c69fd8e4538f83352ed745a1d75f9d7f8 Mon Sep 17 00:00:00 2001
From: Ivan Despot <66276597+g-despot@users.noreply.github.com>
Date: Thu, 30 Apr 2026 15:54:43 +0200
Subject: [PATCH 3/4] Bump version
---
.github/workflows/test.yaml | 2 +-
src/it/java/io/weaviate/integration/TextAnalyzerITest.java | 4 ----
2 files changed, 1 insertion(+), 5 deletions(-)
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 5b13f073e..c635d2be3 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -92,7 +92,7 @@ jobs:
fail-fast: false
matrix:
WEAVIATE_VERSION:
- ["1.32.24", "1.33.11", "1.34.7", "1.35.2", "1.36.9", "1.37.1"]
+ ["1.32.24", "1.33.11", "1.34.7", "1.35.2", "1.36.9", "1.37.2"]
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
diff --git a/src/it/java/io/weaviate/integration/TextAnalyzerITest.java b/src/it/java/io/weaviate/integration/TextAnalyzerITest.java
index a6ca565e4..deaf6685f 100644
--- a/src/it/java/io/weaviate/integration/TextAnalyzerITest.java
+++ b/src/it/java/io/weaviate/integration/TextAnalyzerITest.java
@@ -41,7 +41,6 @@ public void testAsciiFoldRoundTripsThroughConfigAndAffectsFilters() throws Excep
var products = client.collections.use(nsAccent);
- // ---- Layer 1: schema round-trip -------------------------------------
var config = products.config.get();
Assertions.assertThat(config).isPresent();
var props = config.get().properties();
@@ -72,13 +71,11 @@ public void testAsciiFoldRoundTripsThroughConfigAndAffectsFilters() throws Excep
Assertions.assertThat(ta.keepAscii()).containsExactly("é");
});
- // ---- Layer 2: behavioral --------------------------------------------
products.data.insert(Map.of(
"text_default", "Café Crème Bio",
"text_folded", "Café Crème Bio",
"text_folded_keep_e", "Café Crème Bio"));
- // "cafe" (lowercase, no accents) must match only the fully-folded property.
var defaultMatches = products.query.fetchObjects(
q -> q.filters(Filter.property("text_default").eq("cafe")));
Assertions.assertThat(defaultMatches.objects())
@@ -97,7 +94,6 @@ public void testAsciiFoldRoundTripsThroughConfigAndAffectsFilters() throws Excep
.as("text_folded_keep_e preserves é, 'cafe' must NOT match 'Café Crème Bio'")
.isEmpty();
- // The exact accented form matches everywhere.
for (String prop : new String[] { "text_default", "text_folded", "text_folded_keep_e" }) {
var hits = products.query.fetchObjects(
q -> q.filters(Filter.property(prop).eq("Café")));
From 59da10c11dbf0617dda2036482f411db41fd7d5a Mon Sep 17 00:00:00 2001
From: Ivan Despot <66276597+g-despot@users.noreply.github.com>
Date: Tue, 5 May 2026 09:54:32 +0200
Subject: [PATCH 4/4] Implement feedback
---
.../integration/CollectionsITest.java | 63 ++++++++
.../integration/TextAnalyzerITest.java | 138 ------------------
2 files changed, 63 insertions(+), 138 deletions(-)
delete mode 100644 src/it/java/io/weaviate/integration/TextAnalyzerITest.java
diff --git a/src/it/java/io/weaviate/integration/CollectionsITest.java b/src/it/java/io/weaviate/integration/CollectionsITest.java
index 889041277..927b2ac05 100644
--- a/src/it/java/io/weaviate/integration/CollectionsITest.java
+++ b/src/it/java/io/weaviate/integration/CollectionsITest.java
@@ -18,6 +18,8 @@
import io.weaviate.client6.v1.api.collections.Quantization;
import io.weaviate.client6.v1.api.collections.ReferenceProperty;
import io.weaviate.client6.v1.api.collections.Replication;
+import io.weaviate.client6.v1.api.collections.TextAnalyzer;
+import io.weaviate.client6.v1.api.collections.Tokenization;
import io.weaviate.client6.v1.api.collections.Replication.AsyncReplicationConfig;
import io.weaviate.client6.v1.api.collections.VectorConfig;
import io.weaviate.client6.v1.api.collections.VectorIndex;
@@ -399,6 +401,67 @@ public void test_dropVectorIndex() throws IOException {
.matches(VectorIndex::isNone).as("is 'none'");
}
+ @Test
+ public void testTextAnalyzer() throws Exception {
+ Weaviate.Version.V137.orSkip();
+
+ var nsTextAnalyzer = ns("TextAnalyzer");
+ var textAnalyzer = client.collections.create(nsTextAnalyzer, c -> c
+ .properties(
+ Property.text("text_default",
+ p -> p.tokenization(Tokenization.WORD)),
+ Property.text("text_folded",
+ p -> p.tokenization(Tokenization.WORD)
+ .textAnalyzer(TextAnalyzer.of(t -> t.foldAscii(true)))),
+ Property.text("text_folded_keep_e",
+ p -> p.tokenization(Tokenization.WORD)
+ .textAnalyzer(TextAnalyzer.of(t -> t
+ .foldAscii(true)
+ .keepAscii("é")))),
+ Property.text("name_en",
+ p -> p.tokenization(Tokenization.WORD)
+ .textAnalyzer(TextAnalyzer.of(t -> t.stopwordPreset("en")))),
+ Property.text("name_none",
+ p -> p.tokenization(Tokenization.WORD)
+ .textAnalyzer(TextAnalyzer.of(t -> t.stopwordPreset("none"))))));
+
+ Assertions.assertThat(textAnalyzer.config.get())
+ .get()
+ .extracting(CollectionConfig::properties, InstanceOfAssertFactories.list(Property.class))
+ .allSatisfy(property -> {
+ var analyzer = property.textAnalyzer();
+ switch (property.propertyName()) {
+ case "text_default":
+ Assertions.assertThat(analyzer)
+ .as("default property has no textAnalyzer config")
+ .isNull();
+ break;
+ case "text_folded":
+ Assertions.assertThat(analyzer)
+ .as("text_folded persists asciiFold=true")
+ .returns(true, TextAnalyzer::foldAscii);
+ break;
+ case "text_folded_keep_e":
+ Assertions.assertThat(analyzer)
+ .as("text_folded_keep_e persists asciiFold=true and asciiFoldIgnore=[é]")
+ .returns(true, TextAnalyzer::foldAscii)
+ .extracting(TextAnalyzer::keepAscii, InstanceOfAssertFactories.list(String.class))
+ .containsExactly("é");
+ break;
+ case "name_en":
+ Assertions.assertThat(analyzer)
+ .as("name_en persists stopwordPreset=en")
+ .returns("en", TextAnalyzer::stopwordPreset);
+ break;
+ case "name_none":
+ Assertions.assertThat(analyzer)
+ .as("name_none persists stopwordPreset=none")
+ .returns("none", TextAnalyzer::stopwordPreset);
+ break;
+ }
+ });
+ }
+
@Test
public void test_asyncReplicationConfig() throws IOException {
Weaviate.Version.latest().orSkip();
diff --git a/src/it/java/io/weaviate/integration/TextAnalyzerITest.java b/src/it/java/io/weaviate/integration/TextAnalyzerITest.java
deleted file mode 100644
index deaf6685f..000000000
--- a/src/it/java/io/weaviate/integration/TextAnalyzerITest.java
+++ /dev/null
@@ -1,138 +0,0 @@
-package io.weaviate.integration;
-
-import java.util.Map;
-
-import org.assertj.core.api.Assertions;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-import io.weaviate.ConcurrentTest;
-import io.weaviate.client6.v1.api.WeaviateClient;
-import io.weaviate.client6.v1.api.collections.Property;
-import io.weaviate.client6.v1.api.collections.TextAnalyzer;
-import io.weaviate.client6.v1.api.collections.Tokenization;
-import io.weaviate.client6.v1.api.collections.query.Filter;
-import io.weaviate.containers.Container;
-import io.weaviate.containers.Weaviate;
-
-public class TextAnalyzerITest extends ConcurrentTest {
- private static final WeaviateClient client = Container.WEAVIATE.getClient();
-
- @BeforeClass
- public static void __() {
- Weaviate.Version.V137.orSkip();
- }
-
- @Test
- public void testAsciiFoldRoundTripsThroughConfigAndAffectsFilters() throws Exception {
- var nsAccent = ns("AccentFolding");
- client.collections.create(nsAccent, c -> c
- .properties(
- Property.text("text_default",
- p -> p.tokenization(Tokenization.WORD)),
- Property.text("text_folded",
- p -> p.tokenization(Tokenization.WORD)
- .textAnalyzer(TextAnalyzer.of(t -> t.foldAscii(true)))),
- Property.text("text_folded_keep_e",
- p -> p.tokenization(Tokenization.WORD)
- .textAnalyzer(TextAnalyzer.of(t -> t
- .foldAscii(true)
- .keepAscii("é"))))));
-
- var products = client.collections.use(nsAccent);
-
- var config = products.config.get();
- Assertions.assertThat(config).isPresent();
- var props = config.get().properties();
-
- var textDefault = props.stream()
- .filter(p -> p.propertyName().equals("text_default")).findFirst().orElseThrow();
- var textFolded = props.stream()
- .filter(p -> p.propertyName().equals("text_folded")).findFirst().orElseThrow();
- var textFoldedKeepE = props.stream()
- .filter(p -> p.propertyName().equals("text_folded_keep_e")).findFirst().orElseThrow();
-
- Assertions.assertThat(textDefault.textAnalyzer())
- .as("default property has no textAnalyzer config")
- .isNull();
-
- Assertions.assertThat(textFolded.textAnalyzer())
- .as("text_folded persists asciiFold=true")
- .isNotNull()
- .satisfies(ta -> {
- Assertions.assertThat(ta.foldAscii()).isTrue();
- });
-
- Assertions.assertThat(textFoldedKeepE.textAnalyzer())
- .as("text_folded_keep_e persists asciiFold=true and asciiFoldIgnore=[é]")
- .isNotNull()
- .satisfies(ta -> {
- Assertions.assertThat(ta.foldAscii()).isTrue();
- Assertions.assertThat(ta.keepAscii()).containsExactly("é");
- });
-
- products.data.insert(Map.of(
- "text_default", "Café Crème Bio",
- "text_folded", "Café Crème Bio",
- "text_folded_keep_e", "Café Crème Bio"));
-
- var defaultMatches = products.query.fetchObjects(
- q -> q.filters(Filter.property("text_default").eq("cafe")));
- Assertions.assertThat(defaultMatches.objects())
- .as("text_default has no folding, 'cafe' should not match 'Café Crème Bio'")
- .isEmpty();
-
- var foldedMatches = products.query.fetchObjects(
- q -> q.filters(Filter.property("text_folded").eq("cafe")));
- Assertions.assertThat(foldedMatches.objects())
- .as("text_folded has asciiFold=true, 'cafe' must match 'Café Crème Bio'")
- .hasSize(1);
-
- var keepEMatches = products.query.fetchObjects(
- q -> q.filters(Filter.property("text_folded_keep_e").eq("cafe")));
- Assertions.assertThat(keepEMatches.objects())
- .as("text_folded_keep_e preserves é, 'cafe' must NOT match 'Café Crème Bio'")
- .isEmpty();
-
- for (String prop : new String[] { "text_default", "text_folded", "text_folded_keep_e" }) {
- var hits = products.query.fetchObjects(
- q -> q.filters(Filter.property(prop).eq("Café")));
- Assertions.assertThat(hits.objects())
- .as("'Café' (exact) should match on %s regardless of folding", prop)
- .hasSize(1);
- }
- }
-
- @Test
- public void testStopwordPresetRoundTripsThroughConfig() throws Exception {
- var nsStop = ns("StopwordPreset");
- client.collections.create(nsStop, c -> c
- .properties(
- Property.text("name_en",
- p -> p.tokenization(Tokenization.WORD)
- .textAnalyzer(TextAnalyzer.of(t -> t.stopwordPreset("en")))),
- Property.text("name_none",
- p -> p.tokenization(Tokenization.WORD)
- .textAnalyzer(TextAnalyzer.of(t -> t.stopwordPreset("none"))))));
-
- var products = client.collections.use(nsStop);
- var config = products.config.get();
- Assertions.assertThat(config).isPresent();
- var props = config.get().properties();
-
- var nameEn = props.stream()
- .filter(p -> p.propertyName().equals("name_en")).findFirst().orElseThrow();
- var nameNone = props.stream()
- .filter(p -> p.propertyName().equals("name_none")).findFirst().orElseThrow();
-
- Assertions.assertThat(nameEn.textAnalyzer())
- .as("name_en persists stopwordPreset=en")
- .isNotNull()
- .satisfies(ta -> Assertions.assertThat(ta.stopwordPreset()).isEqualTo("en"));
-
- Assertions.assertThat(nameNone.textAnalyzer())
- .as("name_none persists stopwordPreset=none")
- .isNotNull()
- .satisfies(ta -> Assertions.assertThat(ta.stopwordPreset()).isEqualTo("none"));
- }
-}