Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -596,8 +596,10 @@ private static boolean addVectorizationOptions(
"tests.vectorsize",
"Sets preferred vector size in bits.",
project.provider(() -> defaultVectorizationOption.get() ? "default" : randomVectorSize));
buildOptions.addBooleanOption("test.native.dotProduct", "Native dot product.", false);

optionsInheritedAsProperties.add("tests.vectorsize");
optionsInheritedAsProperties.add("test.native.dotProduct");

return defaultVectorizationOption.get();
}
Expand Down
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ New Features

* GITHUB#14792: Introduced OffHeapQuantizedFloatVectorValues class to access float vectors when only quantized byte vectors are available in the index. (Pulkit Gupta)

* GITHUB#15508: Use native dot product in Lucene. (Ankur Goel, Shubham Chaudhary, Dawid Weiss)

Improvements
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,13 @@ public void init() {
}
}

@Benchmark
@Fork(
jvmArgsPrepend = {"--add-modules=jdk.incubator.vector", "-Dlucene.useNativeDotProduct=true"})
public int dot8sNative() {
return VectorUtil.dotProduct(bytesA, bytesB);
}

@Benchmark
public float binaryCosineScalar() {
return VectorUtil.cosine(bytesA, bytesB);
Expand Down
7 changes: 7 additions & 0 deletions lucene/core/src/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@

exports org.apache.lucene.util.quantization;
exports org.apache.lucene.codecs.hnsw;
exports org.apache.lucene.internal.vectorization to
org.apache.lucene.benchmark.jmh;

provides org.apache.lucene.analysis.TokenizerFactory with
org.apache.lucene.analysis.standard.StandardTokenizerFactory;
Expand All @@ -91,6 +93,10 @@
org.apache.lucene.search.SortField.Provider,
org.apache.lucene.search.SortedNumericSortField.Provider,
org.apache.lucene.search.SortedSetSortField.Provider;
provides org.apache.lucene.internal.vectorization.VectorizationProviderService with
org.apache.lucene.internal.vectorization.DefaultVectorizationProviderService,
org.apache.lucene.internal.vectorization.PanamaVectorizationProviderService,
org.apache.lucene.internal.vectorization.NativeVectorizationProviderService;

uses org.apache.lucene.analysis.CharFilterFactory;
uses org.apache.lucene.analysis.TokenFilterFactory;
Expand All @@ -100,4 +106,5 @@
uses org.apache.lucene.codecs.KnnVectorsFormat;
uses org.apache.lucene.codecs.PostingsFormat;
uses org.apache.lucene.index.SortFieldProvider;
uses org.apache.lucene.internal.vectorization.VectorizationProviderService;
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.SuppressForbidden;

final class DefaultVectorUtilSupport implements VectorUtilSupport {
/** Default scalar implementation of vector utilities without SIMD optimizations. */
public final class DefaultVectorUtilSupport implements VectorUtilSupport {

DefaultVectorUtilSupport() {}

Expand Down Expand Up @@ -323,19 +324,20 @@ public float recalculateScalarQuantizationOffset(
.recalculateOffset(vector, 0, oldAlpha, oldMinQuantile);
}

static class ScalarQuantizer {
/** Scalar quantizer for converting float vectors to byte vectors. */
public static final class ScalarQuantizer {
private final float alpha;
private final float scale;
private final float minQuantile, maxQuantile;

ScalarQuantizer(float alpha, float scale, float minQuantile, float maxQuantile) {
public ScalarQuantizer(float alpha, float scale, float minQuantile, float maxQuantile) {
this.alpha = alpha;
this.scale = scale;
this.minQuantile = minQuantile;
this.maxQuantile = maxQuantile;
}

float quantize(float[] vector, byte[] dest, int start) {
public float quantize(float[] vector, byte[] dest, int start) {
assert vector.length == dest.length;
float correction = 0;
for (int i = start; i < vector.length; i++) {
Expand All @@ -344,7 +346,7 @@ float quantize(float[] vector, byte[] dest, int start) {
return correction;
}

float recalculateOffset(byte[] vector, int start, float oldAlpha, float oldMinQuantile) {
public float recalculateOffset(byte[] vector, int start, float oldAlpha, float oldMinQuantile) {
float correction = 0;
for (int i = start; i < vector.length; i++) {
// undo the old quantization
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.internal.vectorization;

/** Service that provides the default vectorization provider */
public class DefaultVectorizationProviderService implements VectorizationProviderService {
@Override
public boolean isUsable() {
return true;
}

@Override
public String name() {
return "default";
}

@Override
public VectorizationProvider newInstance() {
return new DefaultVectorizationProvider();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.internal.vectorization;

import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.util.Optional;
import org.apache.lucene.util.Constants;

/**
* Service that provides native vectorization provider. Only available when Panama is usable and
* required native binary is present.
*/
// this should live in a separate module, really. but for now - since we use
// mr-jars, we have to look up the class by reflection (it isn't visible from this module).
public class NativeVectorizationProviderService implements VectorizationProviderService {
/**
* Looks up the vector module from Lucene's {@link ModuleLayer} or the root layer (if unnamed).
*/
private static Optional<Module> lookupVectorModule() {
return Optional.ofNullable(VectorizationProvider.class.getModule().getLayer())
.orElse(ModuleLayer.boot())
.findModule("jdk.incubator.vector");
}

@Override
public boolean isUsable() {
final int runtimeVersion = Runtime.version().feature();
assert runtimeVersion >= 25;

// only use vector module with Hotspot VM
if (!Constants.IS_HOTSPOT_VM) {
return false;
}

// don't use vector module with JVMCI (it does not work)
if (Constants.IS_JVMCI_VM) {
return false;
}

// is the incubator module present and readable (JVM providers may to exclude them or it is
// build with jlink)
final var vectorMod = lookupVectorModule();
if (vectorMod.isEmpty()) {
return false;
}
vectorMod.ifPresent(VectorizationProvider.class.getModule()::addReads);

// TODO: check for testMode and otherwise fallback to default if slowness could happen

try {
return newInstance() != null;
} catch (Throwable _) {
return false;
}
}

@Override
public String name() {
return "native";
}

@Override
public VectorizationProvider newInstance() {
try {
final var lookup = MethodHandles.lookup();
final var cls =
lookup.findClass(
"org.apache.lucene.internal.vectorization.panama.NativeVectorizationProvider");
final var constr = lookup.findConstructor(cls, MethodType.methodType(void.class));
return (VectorizationProvider) constr.invoke();
} catch (Throwable t) {
// TODO: we should probably check what happened more thoroughly...
throw new RuntimeException(t);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.internal.vectorization;

import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.util.Optional;
import org.apache.lucene.util.Constants;

/**
* Service that provides panama vectorization provider which uses Vector API for SIMD operations.
*/
// this should live in a separate module, really. but for now - since we use
// mr-jars, we have to look up the class by reflection (it isn't visible from this module).
public class PanamaVectorizationProviderService implements VectorizationProviderService {
/**
* Looks up the vector module from Lucene's {@link ModuleLayer} or the root layer (if unnamed).
*/
private static Optional<Module> lookupVectorModule() {
return Optional.ofNullable(VectorizationProvider.class.getModule().getLayer())
.orElse(ModuleLayer.boot())
.findModule("jdk.incubator.vector");
}

@Override
public boolean isUsable() {
final int runtimeVersion = Runtime.version().feature();
assert runtimeVersion >= 21;

// only use vector module with Hotspot VM
if (!Constants.IS_HOTSPOT_VM) {
return false;
}

// don't use vector module with JVMCI (it does not work)
if (Constants.IS_JVMCI_VM) {
return false;
}

// is the incubator module present and readable (JVM providers may to exclude them or it is
// build with jlink)
final var vectorMod = lookupVectorModule();
if (vectorMod.isEmpty()) {
return false;
}
vectorMod.ifPresent(VectorizationProvider.class.getModule()::addReads);

// TODO: check for testMode and otherwise fallback to default if slowness could happen

try {
return newInstance() != null;
} catch (Throwable _) {
return false;
}
}

@Override
public String name() {
return "panama";
}

@Override
public VectorizationProvider newInstance() {
try {
final var lookup = MethodHandles.lookup();
final var cls =
lookup.findClass(
"org.apache.lucene.internal.vectorization.panama.PanamaVectorizationProvider");
final var constr = lookup.findConstructor(cls, MethodType.methodType(void.class));
return (VectorizationProvider) constr.invoke();
} catch (Throwable t) {
// TODO: we should probably check what happened more thoroughly...
throw new RuntimeException(t);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public class PostingDecodingUtil {
public final IndexInput in;

/** Sole constructor, called by sub-classes. */
protected PostingDecodingUtil(IndexInput in) {
public PostingDecodingUtil(IndexInput in) {
this.in = in;
}

Expand Down
Loading