Integrate BACKBEAT SDK and resolve KACHING license validation

Major integrations and fixes:
- Added BACKBEAT SDK integration for P2P operation timing
- Implemented beat-aware status tracking for distributed operations
- Added Docker secrets support for secure license management
- Resolved KACHING license validation via HTTPS/TLS
- Updated docker-compose configuration for clean stack deployment
- Disabled rollback policies to prevent deployment failures
- Added license credential storage (CHORUS-DEV-MULTI-001)

Technical improvements:
- BACKBEAT P2P operation tracking with phase management
- Enhanced configuration system with file-based secrets
- Improved error handling for license validation
- Clean separation of KACHING and CHORUS deployment stacks

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
anthonyrawlins
2025-09-06 07:56:26 +10:00
parent 543ab216f9
commit 9bdcbe0447
4730 changed files with 1480093 additions and 1916 deletions

View File

@@ -0,0 +1,159 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeDocument int
func init() {
var d Document
reflectStaticSizeDocument = int(reflect.TypeOf(d).Size())
}
type Document struct {
id string `json:"id"`
Fields []Field `json:"fields"`
CompositeFields []*CompositeField
StoredFieldsSize uint64
indexed bool
}
func (d *Document) StoredFieldsBytes() uint64 {
return d.StoredFieldsSize
}
func NewDocument(id string) *Document {
return &Document{
id: id,
Fields: make([]Field, 0),
CompositeFields: make([]*CompositeField, 0),
}
}
func NewSynonymDocument(id string) *Document {
return &Document{
id: id,
Fields: make([]Field, 0),
}
}
func (d *Document) Size() int {
sizeInBytes := reflectStaticSizeDocument + size.SizeOfPtr +
len(d.id)
for _, entry := range d.Fields {
sizeInBytes += entry.Size()
}
for _, entry := range d.CompositeFields {
sizeInBytes += entry.Size()
}
return sizeInBytes
}
func (d *Document) AddField(f Field) *Document {
switch f := f.(type) {
case *CompositeField:
d.CompositeFields = append(d.CompositeFields, f)
default:
d.Fields = append(d.Fields, f)
}
return d
}
func (d *Document) GoString() string {
fields := ""
for i, field := range d.Fields {
if i != 0 {
fields += ", "
}
fields += fmt.Sprintf("%#v", field)
}
compositeFields := ""
for i, field := range d.CompositeFields {
if i != 0 {
compositeFields += ", "
}
compositeFields += fmt.Sprintf("%#v", field)
}
return fmt.Sprintf("&document.Document{ID:%s, Fields: %s, CompositeFields: %s}", d.ID(), fields, compositeFields)
}
func (d *Document) NumPlainTextBytes() uint64 {
rv := uint64(0)
for _, field := range d.Fields {
rv += field.NumPlainTextBytes()
}
for _, compositeField := range d.CompositeFields {
for _, field := range d.Fields {
if compositeField.includesField(field.Name()) {
rv += field.NumPlainTextBytes()
}
}
}
return rv
}
func (d *Document) ID() string {
return d.id
}
func (d *Document) SetID(id string) {
d.id = id
}
func (d *Document) AddIDField() {
d.AddField(NewTextFieldCustom("_id", nil, []byte(d.ID()), index.IndexField|index.StoreField, nil))
}
func (d *Document) VisitFields(visitor index.FieldVisitor) {
for _, f := range d.Fields {
visitor(f)
}
}
func (d *Document) VisitComposite(visitor index.CompositeFieldVisitor) {
for _, f := range d.CompositeFields {
visitor(f)
}
}
func (d *Document) HasComposite() bool {
return len(d.CompositeFields) > 0
}
func (d *Document) VisitSynonymFields(visitor index.SynonymFieldVisitor) {
for _, f := range d.Fields {
if sf, ok := f.(index.SynonymField); ok {
visitor(sf)
}
}
}
func (d *Document) SetIndexed() {
d.indexed = true
}
func (d *Document) Indexed() bool {
return d.indexed
}

View File

@@ -0,0 +1,45 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
index "github.com/blevesearch/bleve_index_api"
)
type Field interface {
// Name returns the path of the field from the root DocumentMapping.
// A root field path is "field", a subdocument field is "parent.field".
Name() string
// ArrayPositions returns the intermediate document and field indices
// required to resolve the field value in the document. For example, if the
// field path is "doc1.doc2.field" where doc1 and doc2 are slices or
// arrays, ArrayPositions returns 2 indices used to resolve "doc2" value in
// "doc1", then "field" in "doc2".
ArrayPositions() []uint64
Options() index.FieldIndexingOptions
Analyze()
Value() []byte
// NumPlainTextBytes should return the number of plain text bytes
// that this field represents - this is a common metric for tracking
// the rate of indexing
NumPlainTextBytes() uint64
Size() int
EncodedFieldType() byte
AnalyzedLength() int
AnalyzedTokenFrequencies() index.TokenFrequencies
}

View File

@@ -0,0 +1,142 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeBooleanField int
func init() {
var f BooleanField
reflectStaticSizeBooleanField = int(reflect.TypeOf(f).Size())
}
const DefaultBooleanIndexingOptions = index.StoreField | index.IndexField | index.DocValues
type BooleanField struct {
name string
arrayPositions []uint64
options index.FieldIndexingOptions
value []byte
numPlainTextBytes uint64
length int
frequencies index.TokenFrequencies
}
func (b *BooleanField) Size() int {
var freqSize int
if b.frequencies != nil {
freqSize = b.frequencies.Size()
}
return reflectStaticSizeBooleanField + size.SizeOfPtr +
len(b.name) +
len(b.arrayPositions)*size.SizeOfUint64 +
len(b.value) +
freqSize
}
func (b *BooleanField) Name() string {
return b.name
}
func (b *BooleanField) ArrayPositions() []uint64 {
return b.arrayPositions
}
func (b *BooleanField) Options() index.FieldIndexingOptions {
return b.options
}
func (b *BooleanField) Analyze() {
tokens := make(analysis.TokenStream, 0)
tokens = append(tokens, &analysis.Token{
Start: 0,
End: len(b.value),
Term: b.value,
Position: 1,
Type: analysis.Boolean,
})
b.length = len(tokens)
b.frequencies = analysis.TokenFrequency(tokens, b.arrayPositions, b.options)
}
func (b *BooleanField) Value() []byte {
return b.value
}
func (b *BooleanField) Boolean() (bool, error) {
if len(b.value) == 1 {
return b.value[0] == 'T', nil
}
return false, fmt.Errorf("boolean field has %d bytes", len(b.value))
}
func (b *BooleanField) GoString() string {
return fmt.Sprintf("&document.BooleanField{Name:%s, Options: %s, Value: %s}", b.name, b.options, b.value)
}
func (b *BooleanField) NumPlainTextBytes() uint64 {
return b.numPlainTextBytes
}
func (b *BooleanField) EncodedFieldType() byte {
return 'b'
}
func (b *BooleanField) AnalyzedLength() int {
return b.length
}
func (b *BooleanField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return b.frequencies
}
func NewBooleanFieldFromBytes(name string, arrayPositions []uint64, value []byte) *BooleanField {
return &BooleanField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultBooleanIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
func NewBooleanField(name string, arrayPositions []uint64, b bool) *BooleanField {
return NewBooleanFieldWithIndexingOptions(name, arrayPositions, b, DefaultBooleanIndexingOptions)
}
func NewBooleanFieldWithIndexingOptions(name string, arrayPositions []uint64, b bool, options index.FieldIndexingOptions) *BooleanField {
numPlainTextBytes := 5
v := []byte("F")
if b {
numPlainTextBytes = 4
v = []byte("T")
}
return &BooleanField{
name: name,
arrayPositions: arrayPositions,
value: v,
options: options,
numPlainTextBytes: uint64(numPlainTextBytes),
}
}

View File

@@ -0,0 +1,138 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"reflect"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeCompositeField int
func init() {
var cf CompositeField
reflectStaticSizeCompositeField = int(reflect.TypeOf(cf).Size())
}
const DefaultCompositeIndexingOptions = index.IndexField
type CompositeField struct {
name string
includedFields map[string]bool
excludedFields map[string]bool
defaultInclude bool
options index.FieldIndexingOptions
totalLength int
compositeFrequencies index.TokenFrequencies
}
func NewCompositeField(name string, defaultInclude bool, include []string, exclude []string) *CompositeField {
return NewCompositeFieldWithIndexingOptions(name, defaultInclude, include, exclude, DefaultCompositeIndexingOptions)
}
func NewCompositeFieldWithIndexingOptions(name string, defaultInclude bool, include []string, exclude []string, options index.FieldIndexingOptions) *CompositeField {
rv := &CompositeField{
name: name,
options: options,
defaultInclude: defaultInclude,
includedFields: make(map[string]bool, len(include)),
excludedFields: make(map[string]bool, len(exclude)),
compositeFrequencies: make(index.TokenFrequencies),
}
for _, i := range include {
rv.includedFields[i] = true
}
for _, e := range exclude {
rv.excludedFields[e] = true
}
return rv
}
func (c *CompositeField) Size() int {
sizeInBytes := reflectStaticSizeCompositeField + size.SizeOfPtr +
len(c.name)
for k := range c.includedFields {
sizeInBytes += size.SizeOfString + len(k) + size.SizeOfBool
}
for k := range c.excludedFields {
sizeInBytes += size.SizeOfString + len(k) + size.SizeOfBool
}
if c.compositeFrequencies != nil {
sizeInBytes += c.compositeFrequencies.Size()
}
return sizeInBytes
}
func (c *CompositeField) Name() string {
return c.name
}
func (c *CompositeField) ArrayPositions() []uint64 {
return []uint64{}
}
func (c *CompositeField) Options() index.FieldIndexingOptions {
return c.options
}
func (c *CompositeField) Analyze() {
}
func (c *CompositeField) Value() []byte {
return []byte{}
}
func (c *CompositeField) NumPlainTextBytes() uint64 {
return 0
}
func (c *CompositeField) includesField(field string) bool {
shouldInclude := c.defaultInclude
_, fieldShouldBeIncluded := c.includedFields[field]
if fieldShouldBeIncluded {
shouldInclude = true
}
_, fieldShouldBeExcluded := c.excludedFields[field]
if fieldShouldBeExcluded {
shouldInclude = false
}
return shouldInclude
}
func (c *CompositeField) Compose(field string, length int, freq index.TokenFrequencies) {
if c.includesField(field) {
c.totalLength += length
c.compositeFrequencies.MergeAll(field, freq)
}
}
func (c *CompositeField) EncodedFieldType() byte {
return 'c'
}
func (c *CompositeField) AnalyzedLength() int {
return c.totalLength
}
func (c *CompositeField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return c.compositeFrequencies
}

View File

@@ -0,0 +1,202 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"bytes"
"fmt"
"math"
"reflect"
"time"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var dateTimeValueSeperator = []byte{'\xff'}
var reflectStaticSizeDateTimeField int
func init() {
var f DateTimeField
reflectStaticSizeDateTimeField = int(reflect.TypeOf(f).Size())
}
const DefaultDateTimeIndexingOptions = index.StoreField | index.IndexField | index.DocValues
const DefaultDateTimePrecisionStep uint = 4
var MinTimeRepresentable = time.Unix(0, math.MinInt64)
var MaxTimeRepresentable = time.Unix(0, math.MaxInt64)
type DateTimeField struct {
name string
arrayPositions []uint64
options index.FieldIndexingOptions
value numeric.PrefixCoded
numPlainTextBytes uint64
length int
frequencies index.TokenFrequencies
}
func (n *DateTimeField) Size() int {
var freqSize int
if n.frequencies != nil {
freqSize = n.frequencies.Size()
}
return reflectStaticSizeDateTimeField + size.SizeOfPtr +
len(n.name) +
len(n.arrayPositions)*size.SizeOfUint64 +
len(n.value) +
freqSize
}
func (n *DateTimeField) Name() string {
return n.name
}
func (n *DateTimeField) ArrayPositions() []uint64 {
return n.arrayPositions
}
func (n *DateTimeField) Options() index.FieldIndexingOptions {
return n.options
}
func (n *DateTimeField) EncodedFieldType() byte {
return 'd'
}
func (n *DateTimeField) AnalyzedLength() int {
return n.length
}
func (n *DateTimeField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return n.frequencies
}
// split the value into the prefix coded date and the layout
// using the dateTimeValueSeperator as the split point
func (n *DateTimeField) splitValue() (numeric.PrefixCoded, string) {
parts := bytes.SplitN(n.value, dateTimeValueSeperator, 2)
if len(parts) == 1 {
return numeric.PrefixCoded(parts[0]), ""
}
return numeric.PrefixCoded(parts[0]), string(parts[1])
}
func (n *DateTimeField) Analyze() {
valueWithoutLayout, _ := n.splitValue()
tokens := make(analysis.TokenStream, 0)
tokens = append(tokens, &analysis.Token{
Start: 0,
End: len(valueWithoutLayout),
Term: valueWithoutLayout,
Position: 1,
Type: analysis.DateTime,
})
original, err := valueWithoutLayout.Int64()
if err == nil {
shift := DefaultDateTimePrecisionStep
for shift < 64 {
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift)
if err != nil {
break
}
token := analysis.Token{
Start: 0,
End: len(shiftEncoded),
Term: shiftEncoded,
Position: 1,
Type: analysis.DateTime,
}
tokens = append(tokens, &token)
shift += DefaultDateTimePrecisionStep
}
}
n.length = len(tokens)
n.frequencies = analysis.TokenFrequency(tokens, n.arrayPositions, n.options)
}
func (n *DateTimeField) Value() []byte {
return n.value
}
func (n *DateTimeField) DateTime() (time.Time, string, error) {
date, layout := n.splitValue()
i64, err := date.Int64()
if err != nil {
return time.Time{}, "", err
}
return time.Unix(0, i64).UTC(), layout, nil
}
func (n *DateTimeField) GoString() string {
return fmt.Sprintf("&document.DateField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}
func (n *DateTimeField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func NewDateTimeFieldFromBytes(name string, arrayPositions []uint64, value []byte) *DateTimeField {
return &DateTimeField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultDateTimeIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
func NewDateTimeField(name string, arrayPositions []uint64, dt time.Time, layout string) (*DateTimeField, error) {
return NewDateTimeFieldWithIndexingOptions(name, arrayPositions, dt, layout, DefaultDateTimeIndexingOptions)
}
func NewDateTimeFieldWithIndexingOptions(name string, arrayPositions []uint64, dt time.Time, layout string, options index.FieldIndexingOptions) (*DateTimeField, error) {
if canRepresent(dt) {
dtInt64 := dt.UnixNano()
prefixCoded := numeric.MustNewPrefixCodedInt64(dtInt64, 0)
// The prefixCoded value is combined with the layout.
// This is necessary because the storage layer stores a fields value as a byte slice
// without storing extra information like layout. So by making value = prefixCoded + layout,
// both pieces of information are stored in the byte slice.
// During a query, the layout is extracted from the byte slice stored to correctly
// format the prefixCoded value.
valueWithLayout := append(prefixCoded, dateTimeValueSeperator...)
valueWithLayout = append(valueWithLayout, []byte(layout)...)
return &DateTimeField{
name: name,
arrayPositions: arrayPositions,
value: valueWithLayout,
options: options,
// not correct, just a place holder until we revisit how fields are
// represented and can fix this better
numPlainTextBytes: uint64(8),
}, nil
}
return nil, fmt.Errorf("cannot represent %s in this type", dt)
}
func canRepresent(dt time.Time) bool {
if dt.Before(MinTimeRepresentable) || dt.After(MaxTimeRepresentable) {
return false
}
return true
}

View File

@@ -0,0 +1,199 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeGeoPointField int
func init() {
var f GeoPointField
reflectStaticSizeGeoPointField = int(reflect.TypeOf(f).Size())
}
var GeoPrecisionStep uint = 9
type GeoPointField struct {
name string
arrayPositions []uint64
options index.FieldIndexingOptions
value numeric.PrefixCoded
numPlainTextBytes uint64
length int
frequencies index.TokenFrequencies
spatialplugin index.SpatialAnalyzerPlugin
}
func (n *GeoPointField) Size() int {
var freqSize int
if n.frequencies != nil {
freqSize = n.frequencies.Size()
}
return reflectStaticSizeGeoPointField + size.SizeOfPtr +
len(n.name) +
len(n.arrayPositions)*size.SizeOfUint64 +
len(n.value) +
freqSize
}
func (n *GeoPointField) Name() string {
return n.name
}
func (n *GeoPointField) ArrayPositions() []uint64 {
return n.arrayPositions
}
func (n *GeoPointField) Options() index.FieldIndexingOptions {
return n.options
}
func (n *GeoPointField) EncodedFieldType() byte {
return 'g'
}
func (n *GeoPointField) AnalyzedLength() int {
return n.length
}
func (n *GeoPointField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return n.frequencies
}
func (n *GeoPointField) Analyze() {
tokens := make(analysis.TokenStream, 0, 8)
tokens = append(tokens, &analysis.Token{
Start: 0,
End: len(n.value),
Term: n.value,
Position: 1,
Type: analysis.Numeric,
})
if n.spatialplugin != nil {
lat, _ := n.Lat()
lon, _ := n.Lon()
p := &geo.Point{Lat: lat, Lon: lon}
terms := n.spatialplugin.GetIndexTokens(p)
for _, term := range terms {
token := analysis.Token{
Start: 0,
End: len(term),
Term: []byte(term),
Position: 1,
Type: analysis.AlphaNumeric,
}
tokens = append(tokens, &token)
}
} else {
original, err := n.value.Int64()
if err == nil {
shift := GeoPrecisionStep
for shift < 64 {
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift)
if err != nil {
break
}
token := analysis.Token{
Start: 0,
End: len(shiftEncoded),
Term: shiftEncoded,
Position: 1,
Type: analysis.Numeric,
}
tokens = append(tokens, &token)
shift += GeoPrecisionStep
}
}
}
n.length = len(tokens)
n.frequencies = analysis.TokenFrequency(tokens, n.arrayPositions, n.options)
}
func (n *GeoPointField) Value() []byte {
return n.value
}
func (n *GeoPointField) Lon() (float64, error) {
i64, err := n.value.Int64()
if err != nil {
return 0.0, err
}
return geo.MortonUnhashLon(uint64(i64)), nil
}
func (n *GeoPointField) Lat() (float64, error) {
i64, err := n.value.Int64()
if err != nil {
return 0.0, err
}
return geo.MortonUnhashLat(uint64(i64)), nil
}
func (n *GeoPointField) GoString() string {
return fmt.Sprintf("&document.GeoPointField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}
func (n *GeoPointField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func NewGeoPointFieldFromBytes(name string, arrayPositions []uint64, value []byte) *GeoPointField {
return &GeoPointField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
func NewGeoPointField(name string, arrayPositions []uint64, lon, lat float64) *GeoPointField {
return NewGeoPointFieldWithIndexingOptions(name, arrayPositions, lon, lat, DefaultNumericIndexingOptions)
}
func NewGeoPointFieldWithIndexingOptions(name string, arrayPositions []uint64, lon, lat float64, options index.FieldIndexingOptions) *GeoPointField {
mhash := geo.MortonHash(lon, lat)
prefixCoded := numeric.MustNewPrefixCodedInt64(int64(mhash), 0)
return &GeoPointField{
name: name,
arrayPositions: arrayPositions,
value: prefixCoded,
options: options,
// not correct, just a place holder until we revisit how fields are
// represented and can fix this better
numPlainTextBytes: uint64(8),
}
}
// SetSpatialAnalyzerPlugin implements the
// index.TokenisableSpatialField interface.
func (n *GeoPointField) SetSpatialAnalyzerPlugin(
plugin index.SpatialAnalyzerPlugin) {
n.spatialplugin = plugin
}

View File

@@ -0,0 +1,265 @@
// Copyright (c) 2022 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
"github.com/blevesearch/geo/geojson"
)
var reflectStaticSizeGeoShapeField int
func init() {
var f GeoShapeField
reflectStaticSizeGeoShapeField = int(reflect.TypeOf(f).Size())
}
const DefaultGeoShapeIndexingOptions = index.IndexField | index.DocValues
type GeoShapeField struct {
name string
shape index.GeoJSON
arrayPositions []uint64
options index.FieldIndexingOptions
numPlainTextBytes uint64
length int
encodedValue []byte
value []byte
frequencies index.TokenFrequencies
}
func (n *GeoShapeField) Size() int {
var freqSize int
if n.frequencies != nil {
freqSize = n.frequencies.Size()
}
return reflectStaticSizeGeoShapeField + size.SizeOfPtr +
len(n.name) +
len(n.arrayPositions)*size.SizeOfUint64 +
len(n.encodedValue) +
len(n.value) +
freqSize
}
func (n *GeoShapeField) Name() string {
return n.name
}
func (n *GeoShapeField) ArrayPositions() []uint64 {
return n.arrayPositions
}
func (n *GeoShapeField) Options() index.FieldIndexingOptions {
return n.options
}
func (n *GeoShapeField) EncodedFieldType() byte {
return 's'
}
func (n *GeoShapeField) AnalyzedLength() int {
return n.length
}
func (n *GeoShapeField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return n.frequencies
}
func (n *GeoShapeField) Analyze() {
// compute the bytes representation for the coordinates
tokens := make(analysis.TokenStream, 0)
rti := geo.GetSpatialAnalyzerPlugin("s2")
terms := rti.GetIndexTokens(n.shape)
for _, term := range terms {
token := analysis.Token{
Start: 0,
End: len(term),
Term: []byte(term),
Position: 1,
Type: analysis.AlphaNumeric,
}
tokens = append(tokens, &token)
}
n.length = len(tokens)
n.frequencies = analysis.TokenFrequency(tokens, n.arrayPositions, n.options)
}
func (n *GeoShapeField) Value() []byte {
return n.value
}
func (n *GeoShapeField) GoString() string {
return fmt.Sprintf("&document.GeoShapeField{Name:%s, Options: %s, Value: %s}",
n.name, n.options, n.value)
}
func (n *GeoShapeField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func (n *GeoShapeField) EncodedShape() []byte {
return n.encodedValue
}
func NewGeoShapeField(name string, arrayPositions []uint64,
coordinates [][][][]float64, typ string) *GeoShapeField {
return NewGeoShapeFieldWithIndexingOptions(name, arrayPositions,
coordinates, typ, DefaultGeoShapeIndexingOptions)
}
func NewGeoShapeFieldFromBytes(name string, arrayPositions []uint64,
value []byte) *GeoShapeField {
return &GeoShapeField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultGeoShapeIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
func NewGeoShapeFieldWithIndexingOptions(name string, arrayPositions []uint64,
coordinates [][][][]float64, typ string,
options index.FieldIndexingOptions) *GeoShapeField {
shape := &geojson.GeoShape{
Coordinates: coordinates,
Type: typ,
}
return NewGeoShapeFieldFromShapeWithIndexingOptions(name,
arrayPositions, shape, options)
}
func NewGeoShapeFieldFromShapeWithIndexingOptions(name string, arrayPositions []uint64,
geoShape *geojson.GeoShape, options index.FieldIndexingOptions) *GeoShapeField {
var shape index.GeoJSON
var encodedValue []byte
var err error
if geoShape.Type == geo.CircleType {
shape, encodedValue, err = geo.NewGeoCircleShape(geoShape.Center, geoShape.Radius)
} else {
shape, encodedValue, err = geo.NewGeoJsonShape(geoShape.Coordinates, geoShape.Type)
}
if err != nil {
return nil
}
// extra glue bytes to work around the term splitting logic from interfering
// the custom encoding of the geoshape coordinates inside the docvalues.
encodedValue = append(geo.GlueBytes, append(encodedValue, geo.GlueBytes...)...)
// get the byte value for the geoshape.
value, err := shape.Value()
if err != nil {
return nil
}
// docvalues are always enabled for geoshape fields, even if the
// indexing options are set to not include docvalues.
options = options | index.DocValues
return &GeoShapeField{
shape: shape,
name: name,
arrayPositions: arrayPositions,
options: options,
encodedValue: encodedValue,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}
func NewGeometryCollectionFieldWithIndexingOptions(name string,
arrayPositions []uint64, coordinates [][][][][]float64, types []string,
options index.FieldIndexingOptions) *GeoShapeField {
if len(coordinates) != len(types) {
return nil
}
shapes := make([]*geojson.GeoShape, len(types))
for i := range coordinates {
shapes[i] = &geojson.GeoShape{
Coordinates: coordinates[i],
Type: types[i],
}
}
return NewGeometryCollectionFieldFromShapesWithIndexingOptions(name,
arrayPositions, shapes, options)
}
func NewGeometryCollectionFieldFromShapesWithIndexingOptions(name string,
arrayPositions []uint64, geoShapes []*geojson.GeoShape,
options index.FieldIndexingOptions) *GeoShapeField {
shape, encodedValue, err := geo.NewGeometryCollectionFromShapes(geoShapes)
if err != nil {
return nil
}
// extra glue bytes to work around the term splitting logic from interfering
// the custom encoding of the geoshape coordinates inside the docvalues.
encodedValue = append(geo.GlueBytes, append(encodedValue, geo.GlueBytes...)...)
// get the byte value for the geometryCollection.
value, err := shape.Value()
if err != nil {
return nil
}
// docvalues are always enabled for geoshape fields, even if the
// indexing options are set to not include docvalues.
options = options | index.DocValues
return &GeoShapeField{
shape: shape,
name: name,
arrayPositions: arrayPositions,
options: options,
encodedValue: encodedValue,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}
func NewGeoCircleFieldWithIndexingOptions(name string, arrayPositions []uint64,
centerPoint []float64, radius string,
options index.FieldIndexingOptions) *GeoShapeField {
shape := &geojson.GeoShape{
Center: centerPoint,
Radius: radius,
Type: geo.CircleType,
}
return NewGeoShapeFieldFromShapeWithIndexingOptions(name,
arrayPositions, shape, options)
}
// GeoShape is an implementation of the index.GeoShapeField interface.
func (n *GeoShapeField) GeoShape() (index.GeoJSON, error) {
return geojson.ParseGeoJSONShape(n.value)
}

View File

@@ -0,0 +1,137 @@
// Copyright (c) 2021 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"net"
"reflect"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeIPField int
func init() {
var f IPField
reflectStaticSizeIPField = int(reflect.TypeOf(f).Size())
}
const DefaultIPIndexingOptions = index.StoreField | index.IndexField | index.DocValues
type IPField struct {
name string
arrayPositions []uint64
options index.FieldIndexingOptions
value net.IP
numPlainTextBytes uint64
length int
frequencies index.TokenFrequencies
}
func (b *IPField) Size() int {
var freqSize int
if b.frequencies != nil {
freqSize = b.frequencies.Size()
}
return reflectStaticSizeIPField + size.SizeOfPtr +
len(b.name) +
len(b.arrayPositions)*size.SizeOfUint64 +
len(b.value) +
freqSize
}
func (b *IPField) Name() string {
return b.name
}
func (b *IPField) ArrayPositions() []uint64 {
return b.arrayPositions
}
func (b *IPField) Options() index.FieldIndexingOptions {
return b.options
}
func (n *IPField) EncodedFieldType() byte {
return 'i'
}
func (n *IPField) AnalyzedLength() int {
return n.length
}
func (n *IPField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return n.frequencies
}
func (b *IPField) Analyze() {
tokens := analysis.TokenStream{
&analysis.Token{
Start: 0,
End: len(b.value),
Term: b.value,
Position: 1,
Type: analysis.IP,
},
}
b.length = 1
b.frequencies = analysis.TokenFrequency(tokens, b.arrayPositions, b.options)
}
func (b *IPField) Value() []byte {
return b.value
}
func (b *IPField) IP() (net.IP, error) {
return net.IP(b.value), nil
}
func (b *IPField) GoString() string {
return fmt.Sprintf("&document.IPField{Name:%s, Options: %s, Value: %s}", b.name, b.options, net.IP(b.value))
}
func (b *IPField) NumPlainTextBytes() uint64 {
return b.numPlainTextBytes
}
func NewIPFieldFromBytes(name string, arrayPositions []uint64, value []byte) *IPField {
return &IPField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultIPIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
func NewIPField(name string, arrayPositions []uint64, v net.IP) *IPField {
return NewIPFieldWithIndexingOptions(name, arrayPositions, v, DefaultIPIndexingOptions)
}
func NewIPFieldWithIndexingOptions(name string, arrayPositions []uint64, b net.IP, options index.FieldIndexingOptions) *IPField {
v := b.To16()
return &IPField{
name: name,
arrayPositions: arrayPositions,
value: v,
options: options,
numPlainTextBytes: net.IPv6len,
}
}

View File

@@ -0,0 +1,165 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeNumericField int
func init() {
var f NumericField
reflectStaticSizeNumericField = int(reflect.TypeOf(f).Size())
}
const DefaultNumericIndexingOptions = index.StoreField | index.IndexField | index.DocValues
const DefaultPrecisionStep uint = 4
type NumericField struct {
name string
arrayPositions []uint64
options index.FieldIndexingOptions
value numeric.PrefixCoded
numPlainTextBytes uint64
length int
frequencies index.TokenFrequencies
}
func (n *NumericField) Size() int {
var freqSize int
if n.frequencies != nil {
freqSize = n.frequencies.Size()
}
return reflectStaticSizeNumericField + size.SizeOfPtr +
len(n.name) +
len(n.arrayPositions)*size.SizeOfUint64 +
len(n.value) +
freqSize
}
func (n *NumericField) Name() string {
return n.name
}
func (n *NumericField) ArrayPositions() []uint64 {
return n.arrayPositions
}
func (n *NumericField) Options() index.FieldIndexingOptions {
return n.options
}
func (n *NumericField) EncodedFieldType() byte {
return 'n'
}
func (n *NumericField) AnalyzedLength() int {
return n.length
}
func (n *NumericField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return n.frequencies
}
func (n *NumericField) Analyze() {
tokens := make(analysis.TokenStream, 0)
tokens = append(tokens, &analysis.Token{
Start: 0,
End: len(n.value),
Term: n.value,
Position: 1,
Type: analysis.Numeric,
})
original, err := n.value.Int64()
if err == nil {
shift := DefaultPrecisionStep
for shift < 64 {
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift)
if err != nil {
break
}
token := analysis.Token{
Start: 0,
End: len(shiftEncoded),
Term: shiftEncoded,
Position: 1,
Type: analysis.Numeric,
}
tokens = append(tokens, &token)
shift += DefaultPrecisionStep
}
}
n.length = len(tokens)
n.frequencies = analysis.TokenFrequency(tokens, n.arrayPositions, n.options)
}
func (n *NumericField) Value() []byte {
return n.value
}
func (n *NumericField) Number() (float64, error) {
i64, err := n.value.Int64()
if err != nil {
return 0.0, err
}
return numeric.Int64ToFloat64(i64), nil
}
func (n *NumericField) GoString() string {
return fmt.Sprintf("&document.NumericField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}
func (n *NumericField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func NewNumericFieldFromBytes(name string, arrayPositions []uint64, value []byte) *NumericField {
return &NumericField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
func NewNumericField(name string, arrayPositions []uint64, number float64) *NumericField {
return NewNumericFieldWithIndexingOptions(name, arrayPositions, number, DefaultNumericIndexingOptions)
}
func NewNumericFieldWithIndexingOptions(name string, arrayPositions []uint64, number float64, options index.FieldIndexingOptions) *NumericField {
numberInt64 := numeric.Float64ToInt64(number)
prefixCoded := numeric.MustNewPrefixCodedInt64(numberInt64, 0)
return &NumericField{
name: name,
arrayPositions: arrayPositions,
value: prefixCoded,
options: options,
// not correct, just a place holder until we revisit how fields are
// represented and can fix this better
numPlainTextBytes: uint64(8),
}
}

View File

@@ -0,0 +1,149 @@
// Copyright (c) 2024 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"reflect"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeSynonymField int
func init() {
var f SynonymField
reflectStaticSizeSynonymField = int(reflect.TypeOf(f).Size())
}
const DefaultSynonymIndexingOptions = index.IndexField
type SynonymField struct {
name string
analyzer analysis.Analyzer
options index.FieldIndexingOptions
input []string
synonyms []string
numPlainTextBytes uint64
// populated during analysis
synonymMap map[string][]string
}
func (s *SynonymField) Size() int {
return reflectStaticSizeSynonymField + size.SizeOfPtr +
len(s.name)
}
func (s *SynonymField) Name() string {
return s.name
}
func (s *SynonymField) ArrayPositions() []uint64 {
return nil
}
func (s *SynonymField) Options() index.FieldIndexingOptions {
return s.options
}
func (s *SynonymField) NumPlainTextBytes() uint64 {
return s.numPlainTextBytes
}
func (s *SynonymField) AnalyzedLength() int {
return 0
}
func (s *SynonymField) EncodedFieldType() byte {
return 'y'
}
func (s *SynonymField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return nil
}
func (s *SynonymField) Analyze() {
var analyzedInput []string
if len(s.input) > 0 {
analyzedInput = make([]string, 0, len(s.input))
for _, term := range s.input {
analyzedTerm := analyzeSynonymTerm(term, s.analyzer)
if analyzedTerm != "" {
analyzedInput = append(analyzedInput, analyzedTerm)
}
}
}
analyzedSynonyms := make([]string, 0, len(s.synonyms))
for _, syn := range s.synonyms {
analyzedTerm := analyzeSynonymTerm(syn, s.analyzer)
if analyzedTerm != "" {
analyzedSynonyms = append(analyzedSynonyms, analyzedTerm)
}
}
s.synonymMap = processSynonymData(analyzedInput, analyzedSynonyms)
}
func (s *SynonymField) Value() []byte {
return nil
}
func (s *SynonymField) IterateSynonyms(visitor func(term string, synonyms []string)) {
for term, synonyms := range s.synonymMap {
visitor(term, synonyms)
}
}
func NewSynonymField(name string, analyzer analysis.Analyzer, input []string, synonyms []string) *SynonymField {
return &SynonymField{
name: name,
analyzer: analyzer,
options: DefaultSynonymIndexingOptions,
input: input,
synonyms: synonyms,
}
}
func processSynonymData(input []string, synonyms []string) map[string][]string {
var synonymMap map[string][]string
if len(input) > 0 {
// Map each term to the same list of synonyms.
synonymMap = make(map[string][]string, len(input))
for _, term := range input {
synonymMap[term] = synonyms
}
} else {
synonymMap = make(map[string][]string, len(synonyms))
// Precompute a map where each synonym points to all other synonyms.
for i, elem := range synonyms {
synonymMap[elem] = make([]string, 0, len(synonyms)-1)
for j, otherElem := range synonyms {
if i != j {
synonymMap[elem] = append(synonymMap[elem], otherElem)
}
}
}
}
return synonymMap
}
func analyzeSynonymTerm(term string, analyzer analysis.Analyzer) string {
tokenStream := analyzer.Analyze([]byte(term))
if len(tokenStream) == 1 {
return string(tokenStream[0].Term)
}
return ""
}

View File

@@ -0,0 +1,162 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeTextField int
func init() {
var f TextField
reflectStaticSizeTextField = int(reflect.TypeOf(f).Size())
}
const DefaultTextIndexingOptions = index.IndexField | index.DocValues
type TextField struct {
name string
arrayPositions []uint64
options index.FieldIndexingOptions
analyzer analysis.Analyzer
value []byte
numPlainTextBytes uint64
length int
frequencies index.TokenFrequencies
}
func (t *TextField) Size() int {
var freqSize int
if t.frequencies != nil {
freqSize = t.frequencies.Size()
}
return reflectStaticSizeTextField + size.SizeOfPtr +
len(t.name) +
len(t.arrayPositions)*size.SizeOfUint64 +
len(t.value) +
freqSize
}
func (t *TextField) Name() string {
return t.name
}
func (t *TextField) ArrayPositions() []uint64 {
return t.arrayPositions
}
func (t *TextField) Options() index.FieldIndexingOptions {
return t.options
}
func (t *TextField) EncodedFieldType() byte {
return 't'
}
func (t *TextField) AnalyzedLength() int {
return t.length
}
func (t *TextField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return t.frequencies
}
func (t *TextField) Analyze() {
var tokens analysis.TokenStream
if t.analyzer != nil {
bytesToAnalyze := t.Value()
if t.options.IsStored() {
// need to copy
bytesCopied := make([]byte, len(bytesToAnalyze))
copy(bytesCopied, bytesToAnalyze)
bytesToAnalyze = bytesCopied
}
tokens = t.analyzer.Analyze(bytesToAnalyze)
} else {
tokens = analysis.TokenStream{
&analysis.Token{
Start: 0,
End: len(t.value),
Term: t.value,
Position: 1,
Type: analysis.AlphaNumeric,
},
}
}
t.length = len(tokens) // number of tokens in this doc field
t.frequencies = analysis.TokenFrequency(tokens, t.arrayPositions, t.options)
}
func (t *TextField) Analyzer() analysis.Analyzer {
return t.analyzer
}
func (t *TextField) Value() []byte {
return t.value
}
func (t *TextField) Text() string {
return string(t.value)
}
func (t *TextField) GoString() string {
return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %v, Value: %s, ArrayPositions: %v}", t.name, t.options, t.analyzer, t.value, t.arrayPositions)
}
func (t *TextField) NumPlainTextBytes() uint64 {
return t.numPlainTextBytes
}
func NewTextField(name string, arrayPositions []uint64, value []byte) *TextField {
return NewTextFieldWithIndexingOptions(name, arrayPositions, value, DefaultTextIndexingOptions)
}
func NewTextFieldWithIndexingOptions(name string, arrayPositions []uint64, value []byte, options index.FieldIndexingOptions) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: options,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}
func NewTextFieldWithAnalyzer(name string, arrayPositions []uint64, value []byte, analyzer analysis.Analyzer) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: DefaultTextIndexingOptions,
analyzer: analyzer,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}
func NewTextFieldCustom(name string, arrayPositions []uint64, value []byte, options index.FieldIndexingOptions, analyzer analysis.Analyzer) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: options,
analyzer: analyzer,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}

View File

@@ -0,0 +1,146 @@
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package document
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeVectorField int
func init() {
var f VectorField
reflectStaticSizeVectorField = int(reflect.TypeOf(f).Size())
}
const DefaultVectorIndexingOptions = index.IndexField
type VectorField struct {
name string
dims int // Dimensionality of the vector
similarity string // Similarity metric to use for scoring
options index.FieldIndexingOptions
value []float32
numPlainTextBytes uint64
vectorIndexOptimizedFor string // Optimization applied to this index.
}
func (n *VectorField) Size() int {
return reflectStaticSizeVectorField + size.SizeOfPtr +
len(n.name) +
len(n.similarity) +
len(n.vectorIndexOptimizedFor) +
int(numBytesFloat32s(n.value))
}
func (n *VectorField) Name() string {
return n.name
}
func (n *VectorField) ArrayPositions() []uint64 {
return nil
}
func (n *VectorField) Options() index.FieldIndexingOptions {
return n.options
}
func (n *VectorField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func (n *VectorField) AnalyzedLength() int {
// vectors aren't analyzed
return 0
}
func (n *VectorField) EncodedFieldType() byte {
return 'v'
}
func (n *VectorField) AnalyzedTokenFrequencies() index.TokenFrequencies {
// vectors aren't analyzed
return nil
}
func (n *VectorField) Analyze() {
// vectors aren't analyzed
}
func (n *VectorField) Value() []byte {
return nil
}
func (n *VectorField) GoString() string {
return fmt.Sprintf("&document.VectorField{Name:%s, Options: %s, "+
"Value: %+v}", n.name, n.options, n.value)
}
// For the sake of not polluting the API, we are keeping arrayPositions as a
// parameter, but it is not used.
func NewVectorField(name string, arrayPositions []uint64,
vector []float32, dims int, similarity, vectorIndexOptimizedFor string) *VectorField {
return NewVectorFieldWithIndexingOptions(name, arrayPositions,
vector, dims, similarity, vectorIndexOptimizedFor,
DefaultVectorIndexingOptions)
}
// For the sake of not polluting the API, we are keeping arrayPositions as a
// parameter, but it is not used.
func NewVectorFieldWithIndexingOptions(name string, arrayPositions []uint64,
vector []float32, dims int, similarity, vectorIndexOptimizedFor string,
options index.FieldIndexingOptions) *VectorField {
return &VectorField{
name: name,
dims: dims,
similarity: similarity,
options: options,
value: vector,
numPlainTextBytes: numBytesFloat32s(vector),
vectorIndexOptimizedFor: vectorIndexOptimizedFor,
}
}
func numBytesFloat32s(value []float32) uint64 {
return uint64(len(value) * size.SizeOfFloat32)
}
// -----------------------------------------------------------------------------
// Following methods help in implementing the bleve_index_api's VectorField
// interface.
func (n *VectorField) Vector() []float32 {
return n.value
}
func (n *VectorField) Dims() int {
return n.dims
}
func (n *VectorField) Similarity() string {
return n.similarity
}
func (n *VectorField) IndexOptimizedFor() string {
return n.vectorIndexOptimizedFor
}

View File

@@ -0,0 +1,163 @@
// Copyright (c) 2024 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package document
import (
"encoding/base64"
"encoding/binary"
"fmt"
"math"
"reflect"
"github.com/blevesearch/bleve/v2/size"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeVectorBase64Field int
func init() {
var f VectorBase64Field
reflectStaticSizeVectorBase64Field = int(reflect.TypeOf(f).Size())
}
type VectorBase64Field struct {
vectorField *VectorField
base64Encoding string
}
func (n *VectorBase64Field) Size() int {
var vecFieldSize int
if n.vectorField != nil {
vecFieldSize = n.vectorField.Size()
}
return reflectStaticSizeVectorBase64Field + size.SizeOfPtr +
len(n.base64Encoding) +
vecFieldSize
}
func (n *VectorBase64Field) Name() string {
return n.vectorField.Name()
}
func (n *VectorBase64Field) ArrayPositions() []uint64 {
return n.vectorField.ArrayPositions()
}
func (n *VectorBase64Field) Options() index.FieldIndexingOptions {
return n.vectorField.Options()
}
func (n *VectorBase64Field) NumPlainTextBytes() uint64 {
return n.vectorField.NumPlainTextBytes()
}
func (n *VectorBase64Field) AnalyzedLength() int {
return n.vectorField.AnalyzedLength()
}
func (n *VectorBase64Field) EncodedFieldType() byte {
return 'e'
}
func (n *VectorBase64Field) AnalyzedTokenFrequencies() index.TokenFrequencies {
return n.vectorField.AnalyzedTokenFrequencies()
}
func (n *VectorBase64Field) Analyze() {
}
func (n *VectorBase64Field) Value() []byte {
return n.vectorField.Value()
}
func (n *VectorBase64Field) GoString() string {
return fmt.Sprintf("&document.vectorFieldBase64Field{Name:%s, Options: %s, "+
"Value: %+v}", n.vectorField.Name(), n.vectorField.Options(), n.vectorField.Value())
}
// For the sake of not polluting the API, we are keeping arrayPositions as a
// parameter, but it is not used.
func NewVectorBase64Field(name string, arrayPositions []uint64, vectorBase64 string,
dims int, similarity, vectorIndexOptimizedFor string) (*VectorBase64Field, error) {
decodedVector, err := DecodeVector(vectorBase64)
if err != nil {
return nil, err
}
return &VectorBase64Field{
vectorField: NewVectorFieldWithIndexingOptions(name, arrayPositions,
decodedVector, dims, similarity,
vectorIndexOptimizedFor, DefaultVectorIndexingOptions),
base64Encoding: vectorBase64,
}, nil
}
// This function takes a base64 encoded string and decodes it into
// a vector.
func DecodeVector(encodedValue string) ([]float32, error) {
// We first decode the encoded string into a byte array.
decodedString, err := base64.StdEncoding.DecodeString(encodedValue)
if err != nil {
return nil, err
}
// The array is expected to be divisible by 4 because each float32
// should occupy 4 bytes
if len(decodedString)%size.SizeOfFloat32 != 0 {
return nil, fmt.Errorf("decoded byte array not divisible by %d", size.SizeOfFloat32)
}
dims := int(len(decodedString) / size.SizeOfFloat32)
if dims <= 0 {
return nil, fmt.Errorf("unable to decode encoded vector")
}
decodedVector := make([]float32, dims)
// We iterate through the array 4 bytes at a time and convert each of
// them to a float32 value by reading them in a little endian notation
for i := 0; i < dims; i++ {
bytes := decodedString[i*size.SizeOfFloat32 : (i+1)*size.SizeOfFloat32]
entry := math.Float32frombits(binary.LittleEndian.Uint32(bytes))
if !util.IsValidFloat32(float64(entry)) {
return nil, fmt.Errorf("invalid float32 value: %f", entry)
}
decodedVector[i] = entry
}
return decodedVector, nil
}
func (n *VectorBase64Field) Vector() []float32 {
return n.vectorField.Vector()
}
func (n *VectorBase64Field) Dims() int {
return n.vectorField.Dims()
}
func (n *VectorBase64Field) Similarity() string {
return n.vectorField.Similarity()
}
func (n *VectorBase64Field) IndexOptimizedFor() string {
return n.vectorField.IndexOptimizedFor()
}