Integrate BACKBEAT SDK and resolve KACHING license validation

Major integrations and fixes:
- Added BACKBEAT SDK integration for P2P operation timing
- Implemented beat-aware status tracking for distributed operations
- Added Docker secrets support for secure license management
- Resolved KACHING license validation via HTTPS/TLS
- Updated docker-compose configuration for clean stack deployment
- Disabled rollback policies to prevent deployment failures
- Added license credential storage (CHORUS-DEV-MULTI-001)

Technical improvements:
- BACKBEAT P2P operation tracking with phase management
- Enhanced configuration system with file-based secrets
- Improved error handling for license validation
- Clean separation of KACHING and CHORUS deployment stacks

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
anthonyrawlins
2025-09-06 07:56:26 +10:00
parent 543ab216f9
commit 9bdcbe0447
4730 changed files with 1480093 additions and 1916 deletions

View File

@@ -0,0 +1,107 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mapping
type customAnalysis struct {
CharFilters map[string]map[string]interface{} `json:"char_filters,omitempty"`
Tokenizers map[string]map[string]interface{} `json:"tokenizers,omitempty"`
TokenMaps map[string]map[string]interface{} `json:"token_maps,omitempty"`
TokenFilters map[string]map[string]interface{} `json:"token_filters,omitempty"`
Analyzers map[string]map[string]interface{} `json:"analyzers,omitempty"`
DateTimeParsers map[string]map[string]interface{} `json:"date_time_parsers,omitempty"`
SynonymSources map[string]map[string]interface{} `json:"synonym_sources,omitempty"`
}
func (c *customAnalysis) registerAll(i *IndexMappingImpl) error {
for name, config := range c.CharFilters {
_, err := i.cache.DefineCharFilter(name, config)
if err != nil {
return err
}
}
if len(c.Tokenizers) > 0 {
// put all the names in map tracking work to do
todo := map[string]struct{}{}
for name := range c.Tokenizers {
todo[name] = struct{}{}
}
registered := 1
errs := []error{}
// as long as we keep making progress, keep going
for len(todo) > 0 && registered > 0 {
registered = 0
errs = []error{}
for name := range todo {
config := c.Tokenizers[name]
_, err := i.cache.DefineTokenizer(name, config)
if err != nil {
errs = append(errs, err)
} else {
delete(todo, name)
registered++
}
}
}
if len(errs) > 0 {
return errs[0]
}
}
for name, config := range c.TokenMaps {
_, err := i.cache.DefineTokenMap(name, config)
if err != nil {
return err
}
}
for name, config := range c.TokenFilters {
_, err := i.cache.DefineTokenFilter(name, config)
if err != nil {
return err
}
}
for name, config := range c.Analyzers {
_, err := i.cache.DefineAnalyzer(name, config)
if err != nil {
return err
}
}
for name, config := range c.DateTimeParsers {
_, err := i.cache.DefineDateTimeParser(name, config)
if err != nil {
return err
}
}
for name, config := range c.SynonymSources {
_, err := i.cache.DefineSynonymSource(name, config)
if err != nil {
return err
}
}
return nil
}
func newCustomAnalysis() *customAnalysis {
rv := customAnalysis{
CharFilters: make(map[string]map[string]interface{}),
Tokenizers: make(map[string]map[string]interface{}),
TokenMaps: make(map[string]map[string]interface{}),
TokenFilters: make(map[string]map[string]interface{}),
Analyzers: make(map[string]map[string]interface{}),
DateTimeParsers: make(map[string]map[string]interface{}),
SynonymSources: make(map[string]map[string]interface{}),
}
return &rv
}

View File

@@ -0,0 +1,646 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mapping
import (
"encoding"
"encoding/json"
"fmt"
"net"
"reflect"
"time"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/util"
)
// A DocumentMapping describes how a type of document
// should be indexed.
// As documents can be hierarchical, named sub-sections
// of documents are mapped using the same structure in
// the Properties field.
// Each value inside a document can be indexed 0 or more
// ways. These index entries are called fields and
// are stored in the Fields field.
// Entire sections of a document can be ignored or
// excluded by setting Enabled to false.
// If not explicitly mapped, default mapping operations
// are used. To disable this automatic handling, set
// Dynamic to false.
type DocumentMapping struct {
Enabled bool `json:"enabled"`
Dynamic bool `json:"dynamic"`
Properties map[string]*DocumentMapping `json:"properties,omitempty"`
Fields []*FieldMapping `json:"fields,omitempty"`
DefaultAnalyzer string `json:"default_analyzer,omitempty"`
DefaultSynonymSource string `json:"default_synonym_source,omitempty"`
// StructTagKey overrides "json" when looking for field names in struct tags
StructTagKey string `json:"struct_tag_key,omitempty"`
}
func (dm *DocumentMapping) Validate(cache *registry.Cache,
parentName string, fieldAliasCtx map[string]*FieldMapping,
) error {
var err error
if dm.DefaultAnalyzer != "" {
_, err := cache.AnalyzerNamed(dm.DefaultAnalyzer)
if err != nil {
return err
}
}
if dm.DefaultSynonymSource != "" {
_, err := cache.SynonymSourceNamed(dm.DefaultSynonymSource)
if err != nil {
return err
}
}
for propertyName, property := range dm.Properties {
newParent := propertyName
if parentName != "" {
newParent = fmt.Sprintf("%s.%s", parentName, propertyName)
}
err = property.Validate(cache, newParent, fieldAliasCtx)
if err != nil {
return err
}
}
for _, field := range dm.Fields {
if field.Analyzer != "" {
_, err = cache.AnalyzerNamed(field.Analyzer)
if err != nil {
return err
}
}
if field.DateFormat != "" {
_, err = cache.DateTimeParserNamed(field.DateFormat)
if err != nil {
return err
}
}
if field.SynonymSource != "" {
_, err = cache.SynonymSourceNamed(field.SynonymSource)
if err != nil {
return err
}
}
err := validateFieldMapping(field, parentName, fieldAliasCtx)
if err != nil {
return err
}
}
return nil
}
func validateFieldType(field *FieldMapping) error {
switch field.Type {
case "text", "datetime", "number", "boolean", "geopoint", "geoshape", "IP":
return nil
default:
return fmt.Errorf("field: '%s', unknown field type: '%s'",
field.Name, field.Type)
}
}
// analyzerNameForPath attempts to first find the field
// described by this path, then returns the analyzer
// configured for that field
func (dm *DocumentMapping) analyzerNameForPath(path string) string {
field := dm.fieldDescribedByPath(path)
if field != nil {
return field.Analyzer
}
return ""
}
// synonymSourceForPath attempts to first find the field
// described by this path, then returns the analyzer
// configured for that field
func (dm *DocumentMapping) synonymSourceForPath(path string) string {
field := dm.fieldDescribedByPath(path)
if field != nil {
return field.SynonymSource
}
return ""
}
func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping {
pathElements := decodePath(path)
if len(pathElements) > 1 {
// easy case, there is more than 1 path element remaining
// the next path element must match a property name
// at this level
for propName, subDocMapping := range dm.Properties {
if propName == pathElements[0] {
return subDocMapping.fieldDescribedByPath(encodePath(pathElements[1:]))
}
}
}
// either the path just had one element
// or it had multiple, but no match for the first element at this level
// look for match with full path
// first look for property name with empty field
for propName, subDocMapping := range dm.Properties {
if propName == path {
// found property name match, now look at its fields
for _, field := range subDocMapping.Fields {
if field.Name == "" || field.Name == path {
// match
return field
}
}
}
}
// next, walk the properties again, looking for field overriding the name
for propName, subDocMapping := range dm.Properties {
if propName != path {
// property name isn't a match, but field name could override it
for _, field := range subDocMapping.Fields {
if field.Name == path {
return field
}
}
}
}
return nil
}
// documentMappingForPathElements returns the EXACT and closest matches for a sub
// document or for an explicitly mapped field; the closest most specific
// document mapping could be one that matches part of the provided path.
func (dm *DocumentMapping) documentMappingForPathElements(pathElements []string) (
*DocumentMapping, *DocumentMapping,
) {
var pathElementsCopy []string
if len(pathElements) == 0 {
pathElementsCopy = []string{""}
} else {
pathElementsCopy = pathElements
}
current := dm
OUTER:
for i, pathElement := range pathElementsCopy {
if subDocMapping, exists := current.Properties[pathElement]; exists {
current = subDocMapping
continue OUTER
}
// no subDocMapping matches this pathElement
// only if this is the last element check for field name
if i == len(pathElementsCopy)-1 {
for _, field := range current.Fields {
if field.Name == pathElement {
break
}
}
}
return nil, current
}
return current, current
}
// documentMappingForPath returns the EXACT and closest matches for a sub
// document or for an explicitly mapped field; the closest most specific
// document mapping could be one that matches part of the provided path.
func (dm *DocumentMapping) documentMappingForPath(path string) (
*DocumentMapping, *DocumentMapping,
) {
pathElements := decodePath(path)
return dm.documentMappingForPathElements(pathElements)
}
// NewDocumentMapping returns a new document mapping
// with all the default values.
func NewDocumentMapping() *DocumentMapping {
return &DocumentMapping{
Enabled: true,
Dynamic: true,
}
}
// NewDocumentStaticMapping returns a new document
// mapping that will not automatically index parts
// of a document without an explicit mapping.
func NewDocumentStaticMapping() *DocumentMapping {
return &DocumentMapping{
Enabled: true,
}
}
// NewDocumentDisabledMapping returns a new document
// mapping that will not perform any indexing.
func NewDocumentDisabledMapping() *DocumentMapping {
return &DocumentMapping{}
}
// AddSubDocumentMapping adds the provided DocumentMapping as a sub-mapping
// for the specified named subsection.
func (dm *DocumentMapping) AddSubDocumentMapping(property string, sdm *DocumentMapping) {
if dm.Properties == nil {
dm.Properties = make(map[string]*DocumentMapping)
}
dm.Properties[property] = sdm
}
// AddFieldMappingsAt adds one or more FieldMappings
// at the named sub-document. If the named sub-document
// doesn't yet exist it is created for you.
// This is a convenience function to make most common
// mappings more concise.
// Otherwise, you would:
//
// subMapping := NewDocumentMapping()
// subMapping.AddFieldMapping(fieldMapping)
// parentMapping.AddSubDocumentMapping(property, subMapping)
func (dm *DocumentMapping) AddFieldMappingsAt(property string, fms ...*FieldMapping) {
if dm.Properties == nil {
dm.Properties = make(map[string]*DocumentMapping)
}
sdm, ok := dm.Properties[property]
if !ok {
sdm = NewDocumentMapping()
}
for _, fm := range fms {
sdm.AddFieldMapping(fm)
}
dm.Properties[property] = sdm
}
// AddFieldMapping adds the provided FieldMapping for this section
// of the document.
func (dm *DocumentMapping) AddFieldMapping(fm *FieldMapping) {
if dm.Fields == nil {
dm.Fields = make([]*FieldMapping, 0)
}
dm.Fields = append(dm.Fields, fm)
}
// UnmarshalJSON offers custom unmarshaling with optional strict validation
func (dm *DocumentMapping) UnmarshalJSON(data []byte) error {
var tmp map[string]json.RawMessage
err := util.UnmarshalJSON(data, &tmp)
if err != nil {
return err
}
// set defaults for fields which might have been omitted
dm.Enabled = true
dm.Dynamic = true
var invalidKeys []string
for k, v := range tmp {
switch k {
case "enabled":
err := util.UnmarshalJSON(v, &dm.Enabled)
if err != nil {
return err
}
case "dynamic":
err := util.UnmarshalJSON(v, &dm.Dynamic)
if err != nil {
return err
}
case "default_analyzer":
err := util.UnmarshalJSON(v, &dm.DefaultAnalyzer)
if err != nil {
return err
}
case "default_synonym_source":
err := util.UnmarshalJSON(v, &dm.DefaultSynonymSource)
if err != nil {
return err
}
case "properties":
err := util.UnmarshalJSON(v, &dm.Properties)
if err != nil {
return err
}
case "fields":
err := util.UnmarshalJSON(v, &dm.Fields)
if err != nil {
return err
}
case "struct_tag_key":
err := util.UnmarshalJSON(v, &dm.StructTagKey)
if err != nil {
return err
}
default:
invalidKeys = append(invalidKeys, k)
}
}
if MappingJSONStrict && len(invalidKeys) > 0 {
return fmt.Errorf("document mapping contains invalid keys: %v", invalidKeys)
}
return nil
}
func (dm *DocumentMapping) defaultAnalyzerName(path []string) string {
current := dm
rv := current.DefaultAnalyzer
for _, pathElement := range path {
var ok bool
current, ok = current.Properties[pathElement]
if !ok {
break
}
if current.DefaultAnalyzer != "" {
rv = current.DefaultAnalyzer
}
}
return rv
}
func (dm *DocumentMapping) defaultSynonymSource(path []string) string {
current := dm
rv := current.DefaultSynonymSource
for _, pathElement := range path {
var ok bool
current, ok = current.Properties[pathElement]
if !ok {
break
}
if current.DefaultSynonymSource != "" {
rv = current.DefaultSynonymSource
}
}
return rv
}
func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
// allow default "json" tag to be overridden
structTagKey := dm.StructTagKey
if structTagKey == "" {
structTagKey = "json"
}
val := reflect.ValueOf(data)
if !val.IsValid() {
return
}
typ := val.Type()
switch typ.Kind() {
case reflect.Map:
// FIXME can add support for other map keys in the future
if typ.Key().Kind() == reflect.String {
for _, key := range val.MapKeys() {
fieldName := key.String()
fieldVal := val.MapIndex(key).Interface()
dm.processProperty(fieldVal, append(path, fieldName), indexes, context)
}
}
case reflect.Struct:
for i := 0; i < val.NumField(); i++ {
field := typ.Field(i)
fieldName := field.Name
// anonymous fields of type struct can elide the type name
if field.Anonymous && field.Type.Kind() == reflect.Struct {
fieldName = ""
}
// if the field has a name under the specified tag, prefer that
tag := field.Tag.Get(structTagKey)
tagFieldName := parseTagName(tag)
if tagFieldName == "-" {
continue
}
// allow tag to set field name to empty, only if anonymous
if field.Tag != "" && (tagFieldName != "" || field.Anonymous) {
fieldName = tagFieldName
}
if val.Field(i).CanInterface() {
fieldVal := val.Field(i).Interface()
newpath := path
if fieldName != "" {
newpath = append(path, fieldName)
}
dm.processProperty(fieldVal, newpath, indexes, context)
}
}
case reflect.Slice, reflect.Array:
for i := 0; i < val.Len(); i++ {
if val.Index(i).CanInterface() {
fieldVal := val.Index(i).Interface()
dm.processProperty(fieldVal, path, append(indexes, uint64(i)), context)
}
}
case reflect.Ptr:
ptrElem := val.Elem()
if ptrElem.IsValid() && ptrElem.CanInterface() {
dm.processProperty(ptrElem.Interface(), path, indexes, context)
}
case reflect.String:
dm.processProperty(val.String(), path, indexes, context)
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
dm.processProperty(float64(val.Int()), path, indexes, context)
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
dm.processProperty(float64(val.Uint()), path, indexes, context)
case reflect.Float32, reflect.Float64:
dm.processProperty(float64(val.Float()), path, indexes, context)
case reflect.Bool:
dm.processProperty(val.Bool(), path, indexes, context)
}
}
func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {
// look to see if there is a mapping for this field
subDocMapping, closestDocMapping := dm.documentMappingForPathElements(path)
// check to see if we even need to do further processing
if subDocMapping != nil && !subDocMapping.Enabled {
return
}
propertyValue := reflect.ValueOf(property)
if !propertyValue.IsValid() {
// cannot do anything with the zero value
return
}
pathString := encodePath(path)
propertyType := propertyValue.Type()
switch propertyType.Kind() {
case reflect.String:
propertyValueString := propertyValue.String()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
switch fieldMapping.Type {
case "geoshape":
fieldMapping.processGeoShape(property, pathString, path, indexes, context)
case "geopoint":
fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
case "vector_base64":
fieldMapping.processVectorBase64(property, pathString, path, indexes, context)
default:
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
}
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
// first see if it can be parsed by the default date parser
dateTimeParser := context.im.DateTimeParserNamed(context.im.DefaultDateTimeParser)
if dateTimeParser != nil {
parsedDateTime, layout, err := dateTimeParser.ParseDateTime(propertyValueString)
if err != nil {
// index as text
fieldMapping := newTextFieldMappingDynamic(context.im)
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
} else {
// index as datetime
fieldMapping := newDateTimeFieldMappingDynamic(context.im)
fieldMapping.processTime(parsedDateTime, layout, pathString, path, indexes, context)
}
}
}
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
dm.processProperty(float64(propertyValue.Int()), path, indexes, context)
return
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
dm.processProperty(float64(propertyValue.Uint()), path, indexes, context)
return
case reflect.Float64, reflect.Float32:
propertyValFloat := propertyValue.Float()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
fieldMapping := newNumericFieldMappingDynamic(context.im)
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
}
case reflect.Bool:
propertyValBool := propertyValue.Bool()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
fieldMapping := newBooleanFieldMappingDynamic(context.im)
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
}
case reflect.Struct:
switch property := property.(type) {
case time.Time:
// don't descend into the time struct
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processTime(property, time.RFC3339, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
fieldMapping := newDateTimeFieldMappingDynamic(context.im)
fieldMapping.processTime(property, time.RFC3339, pathString, path, indexes, context)
}
case encoding.TextMarshaler:
txt, err := property.MarshalText()
if err == nil && subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type == "text" {
fieldMapping.processString(string(txt), pathString, path, indexes, context)
}
}
}
dm.walkDocument(property, path, indexes, context)
default:
if subDocMapping != nil {
for _, fieldMapping := range subDocMapping.Fields {
switch fieldMapping.Type {
case "geopoint":
fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
case "geoshape":
fieldMapping.processGeoShape(property, pathString, path, indexes, context)
}
}
}
dm.walkDocument(property, path, indexes, context)
}
case reflect.Map, reflect.Slice:
walkDocument := false
if subDocMapping != nil && len(subDocMapping.Fields) != 0 {
for _, fieldMapping := range subDocMapping.Fields {
switch fieldMapping.Type {
case "vector":
fieldMapping.processVector(property, pathString, path,
indexes, context)
case "geopoint":
fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
walkDocument = true
case "IP":
ip, ok := property.(net.IP)
if ok {
fieldMapping.processIP(ip, pathString, path, indexes, context)
}
walkDocument = true
case "geoshape":
fieldMapping.processGeoShape(property, pathString, path, indexes, context)
walkDocument = true
default:
walkDocument = true
}
}
} else {
walkDocument = true
}
if walkDocument {
dm.walkDocument(property, path, indexes, context)
}
case reflect.Ptr:
if !propertyValue.IsNil() {
switch property := property.(type) {
case encoding.TextMarshaler:
// ONLY process TextMarshaler if there is an explicit mapping
// AND all of the fields are of type text
// OTHERWISE process field without TextMarshaler
if subDocMapping != nil {
allFieldsText := true
for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type != "text" {
allFieldsText = false
break
}
}
txt, err := property.MarshalText()
if err == nil && allFieldsText {
txtStr := string(txt)
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processString(txtStr, pathString, path, indexes, context)
}
return
}
}
dm.walkDocument(property, path, indexes, context)
default:
dm.walkDocument(property, path, indexes, context)
}
}
default:
dm.walkDocument(property, path, indexes, context)
}
}

492
vendor/github.com/blevesearch/bleve/v2/mapping/field.go generated vendored Normal file
View File

@@ -0,0 +1,492 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mapping
import (
"encoding/json"
"fmt"
"net"
"time"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
"github.com/blevesearch/geo/geojson"
)
// control the default behavior for dynamic fields (those not explicitly mapped)
var (
IndexDynamic = true
StoreDynamic = true
DocValuesDynamic = true // TODO revisit default?
)
// A FieldMapping describes how a specific item
// should be put into the index.
type FieldMapping struct {
Name string `json:"name,omitempty"`
Type string `json:"type,omitempty"`
// Analyzer specifies the name of the analyzer to use for this field. If
// Analyzer is empty, traverse the DocumentMapping tree toward the root and
// pick the first non-empty DefaultAnalyzer found. If there is none, use
// the IndexMapping.DefaultAnalyzer.
Analyzer string `json:"analyzer,omitempty"`
// Store indicates whether to store field values in the index. Stored
// values can be retrieved from search results using SearchRequest.Fields.
Store bool `json:"store,omitempty"`
Index bool `json:"index,omitempty"`
// IncludeTermVectors, if true, makes terms occurrences to be recorded for
// this field. It includes the term position within the terms sequence and
// the term offsets in the source document field. Term vectors are required
// to perform phrase queries or terms highlighting in source documents.
IncludeTermVectors bool `json:"include_term_vectors,omitempty"`
IncludeInAll bool `json:"include_in_all,omitempty"`
DateFormat string `json:"date_format,omitempty"`
// DocValues, if true makes the index uninverting possible for this field
// It is useful for faceting and sorting queries.
DocValues bool `json:"docvalues,omitempty"`
// SkipFreqNorm, if true, avoids the indexing of frequency and norm values
// of the tokens for this field. This option would be useful for saving
// the processing of freq/norm details when the default score based relevancy
// isn't needed.
SkipFreqNorm bool `json:"skip_freq_norm,omitempty"`
// Dimensionality of the vector
Dims int `json:"dims,omitempty"`
// Similarity is the similarity algorithm used for scoring
// field's content while performing search on it.
// See: index.SimilarityModels
Similarity string `json:"similarity,omitempty"`
// Applicable to vector fields only - optimization string
VectorIndexOptimizedFor string `json:"vector_index_optimized_for,omitempty"`
SynonymSource string `json:"synonym_source,omitempty"`
}
// NewTextFieldMapping returns a default field mapping for text
func NewTextFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "text",
Store: true,
Index: true,
IncludeTermVectors: true,
IncludeInAll: true,
DocValues: true,
}
}
func newTextFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
rv := NewTextFieldMapping()
rv.Store = im.StoreDynamic
rv.Index = im.IndexDynamic
rv.DocValues = im.DocValuesDynamic
return rv
}
// NewKeywordFieldMapping returns a default field mapping for text with analyzer "keyword".
func NewKeywordFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "text",
Analyzer: keyword.Name,
Store: true,
Index: true,
IncludeTermVectors: true,
IncludeInAll: true,
DocValues: true,
}
}
// NewNumericFieldMapping returns a default field mapping for numbers
func NewNumericFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "number",
Store: true,
Index: true,
IncludeInAll: true,
DocValues: true,
}
}
func newNumericFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
rv := NewNumericFieldMapping()
rv.Store = im.StoreDynamic
rv.Index = im.IndexDynamic
rv.DocValues = im.DocValuesDynamic
return rv
}
// NewDateTimeFieldMapping returns a default field mapping for dates
func NewDateTimeFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "datetime",
Store: true,
Index: true,
IncludeInAll: true,
DocValues: true,
}
}
func newDateTimeFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
rv := NewDateTimeFieldMapping()
rv.Store = im.StoreDynamic
rv.Index = im.IndexDynamic
rv.DocValues = im.DocValuesDynamic
return rv
}
// NewBooleanFieldMapping returns a default field mapping for booleans
func NewBooleanFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "boolean",
Store: true,
Index: true,
IncludeInAll: true,
DocValues: true,
}
}
func newBooleanFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
rv := NewBooleanFieldMapping()
rv.Store = im.StoreDynamic
rv.Index = im.IndexDynamic
rv.DocValues = im.DocValuesDynamic
return rv
}
// NewGeoPointFieldMapping returns a default field mapping for geo points
func NewGeoPointFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "geopoint",
Store: true,
Index: true,
IncludeInAll: true,
DocValues: true,
}
}
// NewGeoShapeFieldMapping returns a default field mapping
// for geoshapes
func NewGeoShapeFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "geoshape",
Store: true,
Index: true,
IncludeInAll: true,
DocValues: true,
}
}
// NewIPFieldMapping returns a default field mapping for IP points
func NewIPFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "IP",
Store: true,
Index: true,
IncludeInAll: true,
}
}
// Options returns the indexing options for this field.
func (fm *FieldMapping) Options() index.FieldIndexingOptions {
var rv index.FieldIndexingOptions
if fm.Store {
rv |= index.StoreField
}
if fm.Index {
rv |= index.IndexField
}
if fm.IncludeTermVectors {
rv |= index.IncludeTermVectors
}
if fm.DocValues {
rv |= index.DocValues
}
if fm.SkipFreqNorm {
rv |= index.SkipFreqNorm
}
return rv
}
func (fm *FieldMapping) processString(propertyValueString string, pathString string, path []string, indexes []uint64, context *walkContext) {
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
switch fm.Type {
case "text":
analyzer := fm.analyzerForField(path, context)
field := document.NewTextFieldCustom(fieldName, indexes, []byte(propertyValueString), options, analyzer)
context.doc.AddField(field)
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
case "datetime":
dateTimeFormat := context.im.DefaultDateTimeParser
if fm.DateFormat != "" {
dateTimeFormat = fm.DateFormat
}
dateTimeParser := context.im.DateTimeParserNamed(dateTimeFormat)
if dateTimeParser != nil {
parsedDateTime, layout, err := dateTimeParser.ParseDateTime(propertyValueString)
if err == nil {
fm.processTime(parsedDateTime, layout, pathString, path, indexes, context)
}
}
case "IP":
ip := net.ParseIP(propertyValueString)
if ip != nil {
fm.processIP(ip, pathString, path, indexes, context)
}
}
}
func (fm *FieldMapping) processFloat64(propertyValFloat float64, pathString string, path []string, indexes []uint64, context *walkContext) {
fieldName := getFieldName(pathString, path, fm)
if fm.Type == "number" {
options := fm.Options()
field := document.NewNumericFieldWithIndexingOptions(fieldName, indexes, propertyValFloat, options)
context.doc.AddField(field)
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
}
func (fm *FieldMapping) processTime(propertyValueTime time.Time, layout string, pathString string, path []string, indexes []uint64, context *walkContext) {
fieldName := getFieldName(pathString, path, fm)
if fm.Type == "datetime" {
options := fm.Options()
field, err := document.NewDateTimeFieldWithIndexingOptions(fieldName, indexes, propertyValueTime, layout, options)
if err == nil {
context.doc.AddField(field)
} else {
logger.Printf("could not build date %v", err)
}
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
}
func (fm *FieldMapping) processBoolean(propertyValueBool bool, pathString string, path []string, indexes []uint64, context *walkContext) {
fieldName := getFieldName(pathString, path, fm)
if fm.Type == "boolean" {
options := fm.Options()
field := document.NewBooleanFieldWithIndexingOptions(fieldName, indexes, propertyValueBool, options)
context.doc.AddField(field)
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
}
func (fm *FieldMapping) processGeoPoint(propertyMightBeGeoPoint interface{}, pathString string, path []string, indexes []uint64, context *walkContext) {
lon, lat, found := geo.ExtractGeoPoint(propertyMightBeGeoPoint)
if found {
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
field := document.NewGeoPointFieldWithIndexingOptions(fieldName, indexes, lon, lat, options)
context.doc.AddField(field)
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
}
func (fm *FieldMapping) processIP(ip net.IP, pathString string, path []string, indexes []uint64, context *walkContext) {
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
field := document.NewIPFieldWithIndexingOptions(fieldName, indexes, ip, options)
context.doc.AddField(field)
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
func (fm *FieldMapping) processGeoShape(propertyMightBeGeoShape interface{},
pathString string, path []string, indexes []uint64, context *walkContext,
) {
coordValue, shape, err := geo.ParseGeoShapeField(propertyMightBeGeoShape)
if err != nil {
return
}
if shape == geo.GeometryCollectionType {
geoShapes, found := geo.ExtractGeometryCollection(propertyMightBeGeoShape)
if found {
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
field := document.NewGeometryCollectionFieldFromShapesWithIndexingOptions(fieldName,
indexes, geoShapes, options)
context.doc.AddField(field)
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
} else {
var geoShape *geojson.GeoShape
var found bool
if shape == geo.CircleType {
geoShape, found = geo.ExtractCircle(propertyMightBeGeoShape)
} else {
geoShape, found = geo.ExtractGeoShapeCoordinates(coordValue, shape)
}
if found {
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
field := document.NewGeoShapeFieldFromShapeWithIndexingOptions(fieldName,
indexes, geoShape, options)
context.doc.AddField(field)
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
}
}
func (fm *FieldMapping) analyzerForField(path []string, context *walkContext) analysis.Analyzer {
analyzerName := fm.Analyzer
if analyzerName == "" {
analyzerName = context.dm.defaultAnalyzerName(path)
if analyzerName == "" {
analyzerName = context.im.DefaultAnalyzer
}
}
return context.im.AnalyzerNamed(analyzerName)
}
func getFieldName(pathString string, path []string, fieldMapping *FieldMapping) string {
fieldName := pathString
if fieldMapping.Name != "" {
parentName := ""
if len(path) > 1 {
parentName = encodePath(path[:len(path)-1]) + pathSeparator
}
fieldName = parentName + fieldMapping.Name
}
return fieldName
}
// UnmarshalJSON offers custom unmarshaling with optional strict validation
func (fm *FieldMapping) UnmarshalJSON(data []byte) error {
var tmp map[string]json.RawMessage
err := util.UnmarshalJSON(data, &tmp)
if err != nil {
return err
}
var invalidKeys []string
for k, v := range tmp {
switch k {
case "name":
err := util.UnmarshalJSON(v, &fm.Name)
if err != nil {
return err
}
case "type":
err := util.UnmarshalJSON(v, &fm.Type)
if err != nil {
return err
}
case "analyzer":
err := util.UnmarshalJSON(v, &fm.Analyzer)
if err != nil {
return err
}
case "store":
err := util.UnmarshalJSON(v, &fm.Store)
if err != nil {
return err
}
case "index":
err := util.UnmarshalJSON(v, &fm.Index)
if err != nil {
return err
}
case "include_term_vectors":
err := util.UnmarshalJSON(v, &fm.IncludeTermVectors)
if err != nil {
return err
}
case "include_in_all":
err := util.UnmarshalJSON(v, &fm.IncludeInAll)
if err != nil {
return err
}
case "date_format":
err := util.UnmarshalJSON(v, &fm.DateFormat)
if err != nil {
return err
}
case "docvalues":
err := util.UnmarshalJSON(v, &fm.DocValues)
if err != nil {
return err
}
case "skip_freq_norm":
err := util.UnmarshalJSON(v, &fm.SkipFreqNorm)
if err != nil {
return err
}
case "dims":
err := util.UnmarshalJSON(v, &fm.Dims)
if err != nil {
return err
}
case "similarity":
err := util.UnmarshalJSON(v, &fm.Similarity)
if err != nil {
return err
}
case "vector_index_optimized_for":
err := util.UnmarshalJSON(v, &fm.VectorIndexOptimizedFor)
if err != nil {
return err
}
case "synonym_source":
err := util.UnmarshalJSON(v, &fm.SynonymSource)
if err != nil {
return err
}
default:
invalidKeys = append(invalidKeys, k)
}
}
if MappingJSONStrict && len(invalidKeys) > 0 {
return fmt.Errorf("field mapping contains invalid keys: %v", invalidKeys)
}
return nil
}

573
vendor/github.com/blevesearch/bleve/v2/mapping/index.go generated vendored Normal file
View File

@@ -0,0 +1,573 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mapping
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/analyzer/standard"
"github.com/blevesearch/bleve/v2/analysis/datetime/optional"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
var MappingJSONStrict = false
const defaultTypeField = "_type"
const defaultType = "_default"
const defaultField = "_all"
const defaultAnalyzer = standard.Name
const defaultDateTimeParser = optional.Name
// An IndexMappingImpl controls how objects are placed
// into an index.
// First the type of the object is determined.
// Once the type is know, the appropriate
// DocumentMapping is selected by the type.
// If no mapping was determined for that type,
// a DefaultMapping will be used.
type IndexMappingImpl struct {
TypeMapping map[string]*DocumentMapping `json:"types,omitempty"`
DefaultMapping *DocumentMapping `json:"default_mapping"`
TypeField string `json:"type_field"`
DefaultType string `json:"default_type"`
DefaultAnalyzer string `json:"default_analyzer"`
DefaultDateTimeParser string `json:"default_datetime_parser"`
DefaultSynonymSource string `json:"default_synonym_source,omitempty"`
ScoringModel string `json:"scoring_model,omitempty"`
DefaultField string `json:"default_field"`
StoreDynamic bool `json:"store_dynamic"`
IndexDynamic bool `json:"index_dynamic"`
DocValuesDynamic bool `json:"docvalues_dynamic"`
CustomAnalysis *customAnalysis `json:"analysis,omitempty"`
cache *registry.Cache
}
// AddCustomCharFilter defines a custom char filter for use in this mapping
func (im *IndexMappingImpl) AddCustomCharFilter(name string, config map[string]interface{}) error {
_, err := im.cache.DefineCharFilter(name, config)
if err != nil {
return err
}
im.CustomAnalysis.CharFilters[name] = config
return nil
}
// AddCustomTokenizer defines a custom tokenizer for use in this mapping
func (im *IndexMappingImpl) AddCustomTokenizer(name string, config map[string]interface{}) error {
_, err := im.cache.DefineTokenizer(name, config)
if err != nil {
return err
}
im.CustomAnalysis.Tokenizers[name] = config
return nil
}
// AddCustomTokenMap defines a custom token map for use in this mapping
func (im *IndexMappingImpl) AddCustomTokenMap(name string, config map[string]interface{}) error {
_, err := im.cache.DefineTokenMap(name, config)
if err != nil {
return err
}
im.CustomAnalysis.TokenMaps[name] = config
return nil
}
// AddCustomTokenFilter defines a custom token filter for use in this mapping
func (im *IndexMappingImpl) AddCustomTokenFilter(name string, config map[string]interface{}) error {
_, err := im.cache.DefineTokenFilter(name, config)
if err != nil {
return err
}
im.CustomAnalysis.TokenFilters[name] = config
return nil
}
// AddCustomAnalyzer defines a custom analyzer for use in this mapping. The
// config map must have a "type" string entry to resolve the analyzer
// constructor. The constructor is invoked with the remaining entries and
// returned analyzer is registered in the IndexMapping.
//
// bleve comes with predefined analyzers, like
// github.com/blevesearch/bleve/analysis/analyzer/custom. They are
// available only if their package is imported by client code. To achieve this,
// use their metadata to fill configuration entries:
//
// import (
// "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
// "github.com/blevesearch/bleve/v2/analysis/char/html"
// "github.com/blevesearch/bleve/v2/analysis/token/lowercase"
// "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
// )
//
// m := bleve.NewIndexMapping()
// err := m.AddCustomAnalyzer("html", map[string]interface{}{
// "type": custom.Name,
// "char_filters": []string{
// html.Name,
// },
// "tokenizer": unicode.Name,
// "token_filters": []string{
// lowercase.Name,
// ...
// },
// })
func (im *IndexMappingImpl) AddCustomAnalyzer(name string, config map[string]interface{}) error {
_, err := im.cache.DefineAnalyzer(name, config)
if err != nil {
return err
}
im.CustomAnalysis.Analyzers[name] = config
return nil
}
// AddCustomDateTimeParser defines a custom date time parser for use in this mapping
func (im *IndexMappingImpl) AddCustomDateTimeParser(name string, config map[string]interface{}) error {
_, err := im.cache.DefineDateTimeParser(name, config)
if err != nil {
return err
}
im.CustomAnalysis.DateTimeParsers[name] = config
return nil
}
func (im *IndexMappingImpl) AddSynonymSource(name string, config map[string]interface{}) error {
_, err := im.cache.DefineSynonymSource(name, config)
if err != nil {
return err
}
im.CustomAnalysis.SynonymSources[name] = config
return nil
}
// NewIndexMapping creates a new IndexMapping that will use all the default indexing rules
func NewIndexMapping() *IndexMappingImpl {
return &IndexMappingImpl{
TypeMapping: make(map[string]*DocumentMapping),
DefaultMapping: NewDocumentMapping(),
TypeField: defaultTypeField,
DefaultType: defaultType,
DefaultAnalyzer: defaultAnalyzer,
DefaultDateTimeParser: defaultDateTimeParser,
DefaultField: defaultField,
IndexDynamic: IndexDynamic,
StoreDynamic: StoreDynamic,
DocValuesDynamic: DocValuesDynamic,
CustomAnalysis: newCustomAnalysis(),
cache: registry.NewCache(),
}
}
// Validate will walk the entire structure ensuring the following
// explicitly named and default analyzers can be built
func (im *IndexMappingImpl) Validate() error {
_, err := im.cache.AnalyzerNamed(im.DefaultAnalyzer)
if err != nil {
return err
}
_, err = im.cache.DateTimeParserNamed(im.DefaultDateTimeParser)
if err != nil {
return err
}
if im.DefaultSynonymSource != "" {
_, err = im.cache.SynonymSourceNamed(im.DefaultSynonymSource)
if err != nil {
return err
}
}
fieldAliasCtx := make(map[string]*FieldMapping)
err = im.DefaultMapping.Validate(im.cache, "", fieldAliasCtx)
if err != nil {
return err
}
for _, docMapping := range im.TypeMapping {
err = docMapping.Validate(im.cache, "", fieldAliasCtx)
if err != nil {
return err
}
}
if _, ok := index.SupportedScoringModels[im.ScoringModel]; !ok && im.ScoringModel != "" {
return fmt.Errorf("unsupported scoring model: %s", im.ScoringModel)
}
return nil
}
// AddDocumentMapping sets a custom document mapping for the specified type
func (im *IndexMappingImpl) AddDocumentMapping(doctype string, dm *DocumentMapping) {
im.TypeMapping[doctype] = dm
}
func (im *IndexMappingImpl) mappingForType(docType string) *DocumentMapping {
docMapping := im.TypeMapping[docType]
if docMapping == nil {
docMapping = im.DefaultMapping
}
return docMapping
}
// UnmarshalJSON offers custom unmarshaling with optional strict validation
func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error {
var tmp map[string]json.RawMessage
err := util.UnmarshalJSON(data, &tmp)
if err != nil {
return err
}
// set defaults for fields which might have been omitted
im.cache = registry.NewCache()
im.CustomAnalysis = newCustomAnalysis()
im.TypeField = defaultTypeField
im.DefaultType = defaultType
im.DefaultAnalyzer = defaultAnalyzer
im.DefaultDateTimeParser = defaultDateTimeParser
im.DefaultField = defaultField
im.DefaultMapping = NewDocumentMapping()
im.TypeMapping = make(map[string]*DocumentMapping)
im.StoreDynamic = StoreDynamic
im.IndexDynamic = IndexDynamic
im.DocValuesDynamic = DocValuesDynamic
var invalidKeys []string
for k, v := range tmp {
switch k {
case "analysis":
err := util.UnmarshalJSON(v, &im.CustomAnalysis)
if err != nil {
return err
}
case "type_field":
err := util.UnmarshalJSON(v, &im.TypeField)
if err != nil {
return err
}
case "default_type":
err := util.UnmarshalJSON(v, &im.DefaultType)
if err != nil {
return err
}
case "default_analyzer":
err := util.UnmarshalJSON(v, &im.DefaultAnalyzer)
if err != nil {
return err
}
case "default_datetime_parser":
err := util.UnmarshalJSON(v, &im.DefaultDateTimeParser)
if err != nil {
return err
}
case "default_synonym_source":
err := util.UnmarshalJSON(v, &im.DefaultSynonymSource)
if err != nil {
return err
}
case "default_field":
err := util.UnmarshalJSON(v, &im.DefaultField)
if err != nil {
return err
}
case "default_mapping":
err := util.UnmarshalJSON(v, &im.DefaultMapping)
if err != nil {
return err
}
case "types":
err := util.UnmarshalJSON(v, &im.TypeMapping)
if err != nil {
return err
}
case "store_dynamic":
err := util.UnmarshalJSON(v, &im.StoreDynamic)
if err != nil {
return err
}
case "index_dynamic":
err := util.UnmarshalJSON(v, &im.IndexDynamic)
if err != nil {
return err
}
case "docvalues_dynamic":
err := util.UnmarshalJSON(v, &im.DocValuesDynamic)
if err != nil {
return err
}
case "scoring_model":
err := util.UnmarshalJSON(v, &im.ScoringModel)
if err != nil {
return err
}
default:
invalidKeys = append(invalidKeys, k)
}
}
if MappingJSONStrict && len(invalidKeys) > 0 {
return fmt.Errorf("index mapping contains invalid keys: %v", invalidKeys)
}
err = im.CustomAnalysis.registerAll(im)
if err != nil {
return err
}
return nil
}
func (im *IndexMappingImpl) determineType(data interface{}) string {
// first see if the object implements bleveClassifier
bleveClassifier, ok := data.(bleveClassifier)
if ok {
return bleveClassifier.BleveType()
}
// next see if the object implements Classifier
classifier, ok := data.(Classifier)
if ok {
return classifier.Type()
}
// now see if we can find a type using the mapping
typ, ok := mustString(lookupPropertyPath(data, im.TypeField))
if ok {
return typ
}
return im.DefaultType
}
func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error {
docType := im.determineType(data)
docMapping := im.mappingForType(docType)
if docMapping.Enabled {
walkContext := im.newWalkContext(doc, docMapping)
docMapping.walkDocument(data, []string{}, []uint64{}, walkContext)
// see if the _all field was disabled
allMapping, _ := docMapping.documentMappingForPath("_all")
if allMapping == nil || allMapping.Enabled {
field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, index.IndexField|index.IncludeTermVectors)
doc.AddField(field)
}
doc.SetIndexed()
}
return nil
}
func (im *IndexMappingImpl) MapSynonymDocument(doc *document.Document, collection string, input []string, synonyms []string) error {
// determine all the synonym sources with the given collection
// and create a synonym field for each
err := im.SynonymSourceVisitor(func(name string, item analysis.SynonymSource) error {
if item.Collection() == collection {
// create a new field with the name of the synonym source
analyzer := im.AnalyzerNamed(item.Analyzer())
if analyzer == nil {
return fmt.Errorf("unknown analyzer named: %s", item.Analyzer())
}
field := document.NewSynonymField(name, analyzer, input, synonyms)
doc.AddField(field)
}
return nil
})
return err
}
type walkContext struct {
doc *document.Document
im *IndexMappingImpl
dm *DocumentMapping
excludedFromAll []string
}
func (im *IndexMappingImpl) newWalkContext(doc *document.Document, dm *DocumentMapping) *walkContext {
return &walkContext{
doc: doc,
im: im,
dm: dm,
excludedFromAll: []string{"_id"},
}
}
// AnalyzerNameForPath attempts to find the best analyzer to use with only a
// field name will walk all the document types, look for field mappings at the
// provided path, if one exists and it has an explicit analyzer that is
// returned.
func (im *IndexMappingImpl) AnalyzerNameForPath(path string) string {
// first we look for explicit mapping on the field
for _, docMapping := range im.TypeMapping {
analyzerName := docMapping.analyzerNameForPath(path)
if analyzerName != "" {
return analyzerName
}
}
// now try the default mapping
pathMapping, _ := im.DefaultMapping.documentMappingForPath(path)
if pathMapping != nil {
if len(pathMapping.Fields) > 0 {
if pathMapping.Fields[0].Analyzer != "" {
return pathMapping.Fields[0].Analyzer
}
}
}
// next we will try default analyzers for the path
pathDecoded := decodePath(path)
for _, docMapping := range im.TypeMapping {
if docMapping.Enabled {
rv := docMapping.defaultAnalyzerName(pathDecoded)
if rv != "" {
return rv
}
}
}
// now the default analyzer for the default mapping
if im.DefaultMapping.Enabled {
rv := im.DefaultMapping.defaultAnalyzerName(pathDecoded)
if rv != "" {
return rv
}
}
return im.DefaultAnalyzer
}
func (im *IndexMappingImpl) AnalyzerNamed(name string) analysis.Analyzer {
analyzer, err := im.cache.AnalyzerNamed(name)
if err != nil {
logger.Printf("error using analyzer named: %s", name)
return nil
}
return analyzer
}
func (im *IndexMappingImpl) DateTimeParserNamed(name string) analysis.DateTimeParser {
if name == "" {
name = im.DefaultDateTimeParser
}
dateTimeParser, err := im.cache.DateTimeParserNamed(name)
if err != nil {
logger.Printf("error using datetime parser named: %s", name)
return nil
}
return dateTimeParser
}
func (im *IndexMappingImpl) AnalyzeText(analyzerName string, text []byte) (analysis.TokenStream, error) {
analyzer, err := im.cache.AnalyzerNamed(analyzerName)
if err != nil {
return nil, err
}
return analyzer.Analyze(text), nil
}
// FieldAnalyzer returns the name of the analyzer used on a field.
func (im *IndexMappingImpl) FieldAnalyzer(field string) string {
return im.AnalyzerNameForPath(field)
}
// FieldMappingForPath returns the mapping for a specific field 'path'.
func (im *IndexMappingImpl) FieldMappingForPath(path string) FieldMapping {
if im.TypeMapping != nil {
for _, v := range im.TypeMapping {
fm := v.fieldDescribedByPath(path)
if fm != nil {
return *fm
}
}
}
fm := im.DefaultMapping.fieldDescribedByPath(path)
if fm != nil {
return *fm
}
return FieldMapping{}
}
// wrapper to satisfy new interface
func (im *IndexMappingImpl) DefaultSearchField() string {
return im.DefaultField
}
func (im *IndexMappingImpl) SynonymSourceNamed(name string) analysis.SynonymSource {
syn, err := im.cache.SynonymSourceNamed(name)
if err != nil {
logger.Printf("error using synonym source named: %s", name)
return nil
}
return syn
}
func (im *IndexMappingImpl) SynonymSourceForPath(path string) string {
// first we look for explicit mapping on the field
for _, docMapping := range im.TypeMapping {
synonymSource := docMapping.synonymSourceForPath(path)
if synonymSource != "" {
return synonymSource
}
}
// now try the default mapping
pathMapping, _ := im.DefaultMapping.documentMappingForPath(path)
if pathMapping != nil {
if len(pathMapping.Fields) > 0 {
if pathMapping.Fields[0].SynonymSource != "" {
return pathMapping.Fields[0].SynonymSource
}
}
}
// next we will try default synonym sources for the path
pathDecoded := decodePath(path)
for _, docMapping := range im.TypeMapping {
if docMapping.Enabled {
rv := docMapping.defaultSynonymSource(pathDecoded)
if rv != "" {
return rv
}
}
}
// now the default analyzer for the default mapping
if im.DefaultMapping.Enabled {
rv := im.DefaultMapping.defaultSynonymSource(pathDecoded)
if rv != "" {
return rv
}
}
return im.DefaultSynonymSource
}
// SynonymCount() returns the number of synonym sources defined in the mapping
func (im *IndexMappingImpl) SynonymCount() int {
return len(im.CustomAnalysis.SynonymSources)
}
// SynonymSourceVisitor() allows a visitor to iterate over all synonym sources
func (im *IndexMappingImpl) SynonymSourceVisitor(visitor analysis.SynonymSourceVisitor) error {
err := im.cache.SynonymSources.VisitSynonymSources(visitor)
if err != nil {
return err
}
return nil
}

View File

@@ -0,0 +1,76 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mapping
import (
"io"
"log"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/document"
)
// A Classifier is an interface describing any object which knows how to
// identify its own type. Alternatively, if a struct already has a Type
// field or method in conflict, one can use BleveType instead.
type Classifier interface {
Type() string
}
// A bleveClassifier is an interface describing any object which knows how
// to identify its own type. This is introduced as an alternative to the
// Classifier interface which often has naming conflicts with existing
// structures.
type bleveClassifier interface {
BleveType() string
}
var logger = log.New(io.Discard, "bleve mapping ", log.LstdFlags)
// SetLog sets the logger used for logging
// by default log messages are sent to io.Discard
func SetLog(l *log.Logger) {
logger = l
}
type IndexMapping interface {
MapDocument(doc *document.Document, data interface{}) error
Validate() error
DateTimeParserNamed(name string) analysis.DateTimeParser
DefaultSearchField() string
AnalyzerNameForPath(path string) string
AnalyzerNamed(name string) analysis.Analyzer
FieldMappingForPath(path string) FieldMapping
}
// A SynonymMapping extends the IndexMapping interface to provide
// additional methods for working with synonyms.
type SynonymMapping interface {
IndexMapping
MapSynonymDocument(doc *document.Document, collection string, input []string, synonyms []string) error
SynonymSourceForPath(path string) string
SynonymSourceNamed(name string) analysis.SynonymSource
SynonymCount() int
SynonymSourceVisitor(visitor analysis.SynonymSourceVisitor) error
}

View File

@@ -0,0 +1,44 @@
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !vectors
// +build !vectors
package mapping
func NewVectorFieldMapping() *FieldMapping {
return nil
}
func NewVectorBase64FieldMapping() *FieldMapping {
return nil
}
func (fm *FieldMapping) processVector(propertyMightBeVector interface{},
pathString string, path []string, indexes []uint64, context *walkContext) bool {
return false
}
func (fm *FieldMapping) processVectorBase64(propertyMightBeVector interface{},
pathString string, path []string, indexes []uint64, context *walkContext) {
}
// -----------------------------------------------------------------------------
// document validation functions
func validateFieldMapping(field *FieldMapping, parentName string,
fieldAliasCtx map[string]*FieldMapping) error {
return validateFieldType(field)
}

View File

@@ -0,0 +1,272 @@
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package mapping
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
faiss "github.com/blevesearch/go-faiss"
)
// Min and Max allowed dimensions for a vector field;
// p.s must be set/updated at process init() _only_
var (
MinVectorDims = 1
MaxVectorDims = 4096
)
func NewVectorFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "vector",
Store: false,
Index: true,
IncludeInAll: false,
DocValues: false,
SkipFreqNorm: true,
}
}
func NewVectorBase64FieldMapping() *FieldMapping {
return &FieldMapping{
Type: "vector_base64",
Store: false,
Index: true,
IncludeInAll: false,
DocValues: false,
SkipFreqNorm: true,
}
}
// validate and process a flat vector
func processFlatVector(vecV reflect.Value, dims int) ([]float32, bool) {
if vecV.Len() != dims {
return nil, false
}
rv := make([]float32, dims)
for i := 0; i < vecV.Len(); i++ {
item := vecV.Index(i)
if !item.CanInterface() {
return nil, false
}
itemI := item.Interface()
itemFloat, ok := util.ExtractNumericValFloat32(itemI)
if !ok {
return nil, false
}
rv[i] = itemFloat
}
return rv, true
}
// validate and process a vector
// max supported depth of nesting is 2 ([][]float32)
func processVector(vecI interface{}, dims int) ([]float32, bool) {
vecV := reflect.ValueOf(vecI)
if !vecV.IsValid() || vecV.Kind() != reflect.Slice || vecV.Len() == 0 {
return nil, false
}
// Let's examine the first element (head) of the vector.
// If head is a slice, then vector is nested, otherwise flat.
head := vecV.Index(0)
if !head.CanInterface() {
return nil, false
}
headI := head.Interface()
headV := reflect.ValueOf(headI)
if !headV.IsValid() {
return nil, false
}
if headV.Kind() != reflect.Slice { // vector is flat
return processFlatVector(vecV, dims)
}
// # process nested vector
// pre-allocate memory for the flattened vector
// so that we can use copy() later
rv := make([]float32, dims*vecV.Len())
for i := 0; i < vecV.Len(); i++ {
subVec := vecV.Index(i)
if !subVec.CanInterface() {
return nil, false
}
subVecI := subVec.Interface()
subVecV := reflect.ValueOf(subVecI)
if !subVecV.IsValid() {
return nil, false
}
if subVecV.Kind() != reflect.Slice {
return nil, false
}
flatVector, ok := processFlatVector(subVecV, dims)
if !ok {
return nil, false
}
copy(rv[i*dims:(i+1)*dims], flatVector)
}
return rv, true
}
func (fm *FieldMapping) processVector(propertyMightBeVector interface{},
pathString string, path []string, indexes []uint64, context *walkContext) bool {
vector, ok := processVector(propertyMightBeVector, fm.Dims)
// Don't add field to document if vector is invalid
if !ok {
return false
}
// normalize raw vector if similarity is cosine
if fm.Similarity == index.CosineSimilarity {
vector = NormalizeVector(vector)
}
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
field := document.NewVectorFieldWithIndexingOptions(fieldName, indexes, vector,
fm.Dims, fm.Similarity, fm.VectorIndexOptimizedFor, options)
context.doc.AddField(field)
// "_all" composite field is not applicable for vector field
context.excludedFromAll = append(context.excludedFromAll, fieldName)
return true
}
func (fm *FieldMapping) processVectorBase64(propertyMightBeVectorBase64 interface{},
pathString string, path []string, indexes []uint64, context *walkContext) {
encodedString, ok := propertyMightBeVectorBase64.(string)
if !ok {
return
}
decodedVector, err := document.DecodeVector(encodedString)
if err != nil || len(decodedVector) != fm.Dims {
return
}
// normalize raw vector if similarity is cosine
if fm.Similarity == index.CosineSimilarity {
decodedVector = NormalizeVector(decodedVector)
}
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
field := document.NewVectorFieldWithIndexingOptions(fieldName, indexes, decodedVector,
fm.Dims, fm.Similarity, fm.VectorIndexOptimizedFor, options)
context.doc.AddField(field)
// "_all" composite field is not applicable for vector_base64 field
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
// -----------------------------------------------------------------------------
// document validation functions
func validateFieldMapping(field *FieldMapping, parentName string,
fieldAliasCtx map[string]*FieldMapping) error {
switch field.Type {
case "vector", "vector_base64":
return validateVectorFieldAlias(field, parentName, fieldAliasCtx)
default: // non-vector field
return validateFieldType(field)
}
}
func validateVectorFieldAlias(field *FieldMapping, parentName string,
fieldAliasCtx map[string]*FieldMapping) error {
if field.Name == "" {
field.Name = parentName
}
if field.Similarity == "" {
field.Similarity = index.DefaultVectorSimilarityMetric
}
if field.VectorIndexOptimizedFor == "" {
field.VectorIndexOptimizedFor = index.DefaultIndexOptimization
}
if _, exists := index.SupportedVectorIndexOptimizations[field.VectorIndexOptimizedFor]; !exists {
// if an unsupported config is provided, override to default
field.VectorIndexOptimizedFor = index.DefaultIndexOptimization
}
// following fields are not applicable for vector
// thus, we set them to default values
field.IncludeInAll = false
field.IncludeTermVectors = false
field.Store = false
field.DocValues = false
field.SkipFreqNorm = true
// # If alias is present, validate the field options as per the alias
// note: reading from a nil map is safe
if fieldAlias, ok := fieldAliasCtx[field.Name]; ok {
if field.Dims != fieldAlias.Dims {
return fmt.Errorf("field: '%s', invalid alias "+
"(different dimensions %d and %d)", fieldAlias.Name, field.Dims,
fieldAlias.Dims)
}
if field.Similarity != fieldAlias.Similarity {
return fmt.Errorf("field: '%s', invalid alias "+
"(different similarity values %s and %s)", fieldAlias.Name,
field.Similarity, fieldAlias.Similarity)
}
return nil
}
// # Validate field options
if field.Dims < MinVectorDims || field.Dims > MaxVectorDims {
return fmt.Errorf("field: '%s', invalid vector dimension: %d,"+
" value should be in range (%d, %d)", field.Name, field.Dims,
MinVectorDims, MaxVectorDims)
}
if _, ok := index.SupportedVectorSimilarityMetrics[field.Similarity]; !ok {
return fmt.Errorf("field: '%s', invalid similarity "+
"metric: '%s', valid metrics are: %+v", field.Name, field.Similarity,
reflect.ValueOf(index.SupportedVectorSimilarityMetrics).MapKeys())
}
if fieldAliasCtx != nil { // writing to a nil map is unsafe
fieldAliasCtx[field.Name] = field
}
return nil
}
func NormalizeVector(vec []float32) []float32 {
// make a copy of the vector to avoid modifying the original
// vector in-place
vecCopy := make([]float32, len(vec))
copy(vecCopy, vec)
// normalize the vector copy using in-place normalization provided by faiss
return faiss.NormalizeVector(vecCopy)
}

View File

@@ -0,0 +1,92 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mapping
import (
"reflect"
"strings"
)
func lookupPropertyPath(data interface{}, path string) interface{} {
pathParts := decodePath(path)
current := data
for _, part := range pathParts {
current = lookupPropertyPathPart(current, part)
if current == nil {
break
}
}
return current
}
func lookupPropertyPathPart(data interface{}, part string) interface{} {
val := reflect.ValueOf(data)
if !val.IsValid() {
return nil
}
typ := val.Type()
switch typ.Kind() {
case reflect.Map:
// FIXME can add support for other map keys in the future
if typ.Key().Kind() == reflect.String {
key := reflect.ValueOf(part)
entry := val.MapIndex(key)
if entry.IsValid() {
return entry.Interface()
}
}
case reflect.Struct:
field := val.FieldByName(part)
if field.IsValid() && field.CanInterface() {
return field.Interface()
}
case reflect.Ptr:
ptrElem := val.Elem()
if ptrElem.IsValid() && ptrElem.CanInterface() {
return lookupPropertyPathPart(ptrElem.Interface(), part)
}
}
return nil
}
const pathSeparator = "."
func decodePath(path string) []string {
return strings.Split(path, pathSeparator)
}
func encodePath(pathElements []string) string {
return strings.Join(pathElements, pathSeparator)
}
func mustString(data interface{}) (string, bool) {
if data != nil {
str, ok := data.(string)
if ok {
return str, true
}
}
return "", false
}
// parseTagName extracts the field name from a struct tag
func parseTagName(tag string) string {
if idx := strings.Index(tag, ","); idx != -1 {
return tag[:idx]
}
return tag
}

View File

@@ -0,0 +1,71 @@
// Copyright (c) 2024 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mapping
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
type SynonymSource struct {
CollectionName string `json:"collection"`
AnalyzerName string `json:"analyzer"`
}
func NewSynonymSource(collection, analyzer string) *SynonymSource {
return &SynonymSource{
CollectionName: collection,
AnalyzerName: analyzer,
}
}
func (s *SynonymSource) Collection() string {
return s.CollectionName
}
func (s *SynonymSource) Analyzer() string {
return s.AnalyzerName
}
func (s *SynonymSource) SetCollection(c string) {
s.CollectionName = c
}
func (s *SynonymSource) SetAnalyzer(a string) {
s.AnalyzerName = a
}
func SynonymSourceConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.SynonymSource, error) {
collection, ok := config["collection"].(string)
if !ok {
return nil, fmt.Errorf("must specify collection")
}
analyzer, ok := config["analyzer"].(string)
if !ok {
return nil, fmt.Errorf("must specify analyzer")
}
if _, err := cache.AnalyzerNamed(analyzer); err != nil {
return nil, fmt.Errorf("analyzer named '%s' not found", analyzer)
}
return NewSynonymSource(collection, analyzer), nil
}
func init() {
err := registry.RegisterSynonymSource(analysis.SynonymSourceType, SynonymSourceConstructor)
if err != nil {
panic(err)
}
}