Files
CHORUS/vendor/github.com/blevesearch/geo/s2/region_term_indexer.go
anthonyrawlins 9bdcbe0447 Integrate BACKBEAT SDK and resolve KACHING license validation
Major integrations and fixes:
- Added BACKBEAT SDK integration for P2P operation timing
- Implemented beat-aware status tracking for distributed operations
- Added Docker secrets support for secure license management
- Resolved KACHING license validation via HTTPS/TLS
- Updated docker-compose configuration for clean stack deployment
- Disabled rollback policies to prevent deployment failures
- Added license credential storage (CHORUS-DEV-MULTI-001)

Technical improvements:
- BACKBEAT P2P operation tracking with phase management
- Enhanced configuration system with file-based secrets
- Improved error handling for license validation
- Clean separation of KACHING and CHORUS deployment stacks

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-06 07:56:26 +10:00

442 lines
15 KiB
Go

// Copyright 2021 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Indexing Strategy
// -----------------
//
// Given a query region, we want to find all of the document regions that
// intersect it. The first step is to represent all the regions as S2Cell
// coverings (see S2RegionCoverer). We then split the problem into two parts,
// namely finding the document regions that are "smaller" than the query
// region and those that are "larger" than the query region.
//
// We do this by defining two terms for each S2CellId: a "covering term" and
// an "ancestor term". (In the implementation below, covering terms are
// distinguished by prefixing a '$' to them.) For each document region, we
// insert a covering term for every cell in the region's covering, and we
// insert an ancestor term for these cells *and* all of their ancestors.
//
// Then given a query region, we can look up all the document regions that
// intersect its covering by querying the union of the following terms:
//
// 1. An "ancestor term" for each cell in the query region. These terms
// ensure that we find all document regions that are "smaller" than the
// query region, i.e. where the query region contains a cell that is either
// a cell of a document region or one of its ancestors.
//
// 2. A "covering term" for every ancestor of the cells in the query region.
// These terms ensure that we find all the document regions that are
// "larger" than the query region, i.e. where document region contains a
// cell that is a (proper) ancestor of a cell in the query region.
//
// Together, these terms find all of the document regions that intersect the
// query region. Furthermore, the number of terms to be indexed and queried
// are both fairly small, and can be bounded in terms of max_cells() and the
// number of cell levels used.
//
// Optimizations
// -------------
//
// + Cells at the maximum level being indexed (max_level()) have the special
// property that they will never be an ancestor of a cell in the query
// region. Therefore we can safely skip generating "covering terms" for
// these cells (see query step 2 above).
//
// + If the index will contain only points (rather than general regions), then
// we can skip all the covering terms mentioned above because there will
// never be any document regions larger than the query region. This can
// significantly reduce the size of queries.
//
// + If it is more important to optimize index size rather than query speed,
// the number of index terms can be reduced by creating ancestor terms only
// for the *proper* ancestors of the cells in a document region, and
// compensating for this by including covering terms for all cells in the
// query region (in addition to their ancestors).
//
// Effectively, when the query region and a document region contain exactly
// the same cell, we have a choice about whether to treat this match as a
// "covering term" or an "ancestor term". One choice minimizes query size
// while the other minimizes index size.
package s2
import (
"strings"
"github.com/blevesearch/geo/s1"
)
type TermType int
var marker = string('$')
const (
ANCESTOR TermType = iota + 1
COVERING
)
var defaultMaxCells = int(8)
type Options struct {
///////////////// Options Inherited From S2RegionCoverer ////////////////
// maxCells controls the maximum number of cells when approximating
// each region. This parameter value may be changed as often as desired.
// e.g. to approximate some regions more accurately than others.
//
// Increasing this value during indexing will make indexes more accurate
// but larger. Increasing this value for queries will make queries more
// accurate but slower. (See regioncoverer.go for details on how this
// parameter affects accuracy.) For example, if you don't mind large
// indexes but want fast serving, it might be reasonable to set
// max_cells() == 100 during indexing and max_cells() == 8 for queries.
//
// DEFAULT: 8 (coarse approximations)
maxCells int
// minLevel and maxLevel control the minimum and maximum size of the
// S2Cells used to approximate regions. Setting these parameters
// appropriately can reduce the size of the index and speed up queries by
// reducing the number of terms needed. For example, if you know that
// your query regions will rarely be less than 100 meters in width, then
// you could set maxLevel to 100.
//
// This restricts the index to S2Cells that are approximately 100 meters
// across or larger. Similar, if you know that query regions will rarely
// be larger than 1000km across, then you could set minLevel similarly.
//
// If minLevel is set too high, then large regions may generate too
// many query terms. If maxLevel() set too low, then small query
// regions will not be able to discriminate which regions they intersect
// very precisely and may return many more candidates than necessary.
//
// If you have no idea about the scale of the regions being queried,
// it is perfectly fine to set minLevel to 0 and maxLevel to 30.
// The only drawback is that may result in a larger index and slower queries.
//
// The default parameter values are suitable for query regions ranging
// from about 100 meters to 3000 km across.
//
// DEFAULT: 4 (average cell width == 600km)
minLevel int
// DEFAULT: 16 (average cell width == 150m)
maxLevel int
// Setting levelMod to a value greater than 1 increases the effective
// branching factor of the S2Cell hierarchy by skipping some levels. For
// example, if levelMod to 2 then every second level is skipped (which
// increases the effective branching factor to 16). You might want to
// consider doing this if your query regions are typically very small
// (e.g., single points) and you don't mind increasing the index size
// (since skipping levels will reduce the accuracy of cell coverings for a
// given maxCells limit).
//
// DEFAULT: 1 (don't skip any cell levels)
levelMod int
// If your index will only contain points (rather than regions), be sure
// to set this flag. This will generate smaller and faster queries that
// are specialized for the points-only case.
//
// With the default quality settings, this flag reduces the number of
// query terms by about a factor of two. (The improvement gets smaller
// as maxCells is increased, but there is really no reason not to use
// this flag if your index consists entirely of points.)
//
// DEFAULT: false
pointsOnly bool
// If true, the index will be optimized for space rather than for query
// time. With the default quality settings, this flag reduces the number
// of index terms and increases the number of query terms by the same
// factor (approximately 1.3). The factor increases up to a limiting
// ratio of 2.0 as maxCells is increased.
//
// CAVEAT: This option has no effect if the index contains only points.
//
// DEFAULT: false
optimizeSpace bool
}
func (o *Options) MaxCells() int {
return o.maxCells
}
func (o *Options) SetMaxCells(mc int) {
o.maxCells = mc
}
func (o *Options) MinLevel() int {
return o.minLevel
}
func (o *Options) SetMinLevel(ml int) {
o.minLevel = ml
}
func (o *Options) MaxLevel() int {
return o.maxLevel
}
func (o *Options) SetMaxLevel(ml int) {
o.maxLevel = ml
}
func (o *Options) LevelMod() int {
return o.levelMod
}
func (o *Options) SetLevelMod(lm int) {
o.levelMod = lm
}
func (o *Options) SetPointsOnly(v bool) {
o.pointsOnly = v
}
func (o *Options) SetOptimizeSpace(v bool) {
o.optimizeSpace = v
}
func (o *Options) trueMaxLevel() int {
trueMax := o.maxLevel
if o.levelMod != 1 {
trueMax = o.maxLevel - (o.maxLevel-o.minLevel)%o.levelMod
}
return trueMax
}
// RegionTermIndexer is a helper struct for adding spatial data to an
// information retrieval system. Such systems work by converting documents
// into a collection of "index terms" (e.g., representing words or phrases),
// and then building an "inverted index" that maps each term to a list of
// documents (and document positions) where that term occurs.
//
// This class deals with the problem of converting spatial data into index
// terms, which can then be indexed along with the other document information.
//
// Spatial data is represented using the S2Region type. Useful S2Region
// subtypes include:
//
// S2Cap
// - a disc-shaped region
//
// S2LatLngRect
// - a rectangle in latitude-longitude coordinates
//
// S2Polyline
// - a polyline
//
// S2Polygon
// - a polygon, possibly with multiple holes and/or shells
//
// S2CellUnion
// - a region approximated as a collection of S2CellIds
//
// S2ShapeIndexRegion
// - an arbitrary collection of points, polylines, and polygons
//
// S2ShapeIndexBufferedRegion
// - like the above, but expanded by a given radius
//
// S2RegionUnion, S2RegionIntersection
// - the union or intersection of arbitrary other regions
//
// So for example, if you want to query documents that are within 500 meters
// of a polyline, you could use an S2ShapeIndexBufferedRegion containing the
// polyline with a radius of 500 meters.
//
// For example usage refer:
// https://github.com/google/s2geometry/blob/ad1489e898f369ca09e2099353ccd55bd0fd7a26/src/s2/s2region_term_indexer.h#L58
type RegionTermIndexer struct {
options Options
regionCoverer RegionCoverer
}
func NewRegionTermIndexer() *RegionTermIndexer {
rv := &RegionTermIndexer{
options: Options{
maxCells: 8,
minLevel: 4,
maxLevel: 16,
levelMod: 1,
},
}
return rv
}
func NewRegionTermIndexerWithOptions(option Options) *RegionTermIndexer {
return &RegionTermIndexer{options: option}
}
func (rti *RegionTermIndexer) GetTerm(termTyp TermType, id CellID,
prefix string) string {
if termTyp == ANCESTOR {
return prefix + id.ToToken()
}
return prefix + marker + id.ToToken()
}
func (rti *RegionTermIndexer) GetIndexTermsForPoint(p Point, prefix string) []string {
// See the top of this file for an overview of the indexing strategy.
//
// The last cell generated by this loop is effectively the covering for
// the given point. You might expect that this cell would be indexed as a
// covering term, but as an optimization we always index these cells as
// ancestor terms only. This is possible because query regions will never
// contain a descendant of such cells. Note that this is true even when
// max_level() != true_max_level() (see S2RegionCoverer::Options).
cellID := cellIDFromPoint(p)
var rv []string
for l := rti.options.minLevel; l <= rti.options.maxLevel; l += rti.options.levelMod {
rv = append(rv, rti.GetTerm(ANCESTOR, cellID.Parent(l), prefix))
}
return rv
}
func (rti *RegionTermIndexer) GetIndexTermsForRegion(region Region,
prefix string) []string {
rti.regionCoverer.LevelMod = rti.options.levelMod
rti.regionCoverer.MaxLevel = rti.options.maxLevel
rti.regionCoverer.MinLevel = rti.options.minLevel
rti.regionCoverer.MaxCells = rti.options.maxCells
covering := rti.regionCoverer.Covering(region)
return rti.GetIndexTermsForCanonicalCovering(covering, prefix)
}
func (rti *RegionTermIndexer) GetIndexTermsForCanonicalCovering(
covering CellUnion, prefix string) []string {
// See the top of this file for an overview of the indexing strategy.
//
// Cells in the covering are normally indexed as covering terms. If we are
// optimizing for query time rather than index space, they are also indexed
// as ancestor terms (since this lets us reduce the number of terms in the
// query). Finally, as an optimization we always index true_max_level()
// cells as ancestor cells only, since these cells have the special property
// that query regions will never contain a descendant of these cells.
var rv []string
prevID := CellID(0)
tml := rti.options.trueMaxLevel()
for _, cellID := range covering {
level := cellID.Level()
if level < tml {
rv = append(rv, rti.GetTerm(COVERING, cellID, prefix))
}
if level == tml || !rti.options.optimizeSpace {
rv = append(rv, rti.GetTerm(ANCESTOR, cellID.Parent(level), prefix))
}
for (level - rti.options.levelMod) >= rti.options.minLevel {
level -= rti.options.levelMod
ancestorID := cellID.Parent(level)
if prevID != CellID(0) && prevID.Level() > level &&
prevID.Parent(level) == ancestorID {
break
}
rv = append(rv, rti.GetTerm(ANCESTOR, ancestorID, prefix))
}
prevID = cellID
}
return rv
}
func (rti *RegionTermIndexer) GetQueryTermsForPoint(p Point, prefix string) []string {
cellID := cellIDFromPoint(p)
var rv []string
level := rti.options.trueMaxLevel()
rv = append(rv, rti.GetTerm(ANCESTOR, cellID.Parent(level), prefix))
if rti.options.pointsOnly {
return rv
}
for level >= rti.options.minLevel {
rv = append(rv, rti.GetTerm(COVERING, cellID.Parent(level), prefix))
level -= rti.options.levelMod
}
return rv
}
func (rti *RegionTermIndexer) GetQueryTermsForRegion(region Region,
prefix string) []string {
rti.regionCoverer.LevelMod = rti.options.levelMod
rti.regionCoverer.MaxLevel = rti.options.maxLevel
rti.regionCoverer.MinLevel = rti.options.minLevel
rti.regionCoverer.MaxCells = rti.options.maxCells
covering := rti.regionCoverer.Covering(region)
return rti.GetQueryTermsForCanonicalCovering(covering, prefix)
}
func (rti *RegionTermIndexer) GetQueryTermsForCanonicalCovering(
covering CellUnion, prefix string) []string {
var rv []string
prevID := CellID(0)
tml := rti.options.trueMaxLevel()
for _, cellID := range covering {
level := cellID.Level()
rv = append(rv, rti.GetTerm(ANCESTOR, cellID, prefix))
if rti.options.pointsOnly {
continue
}
if rti.options.optimizeSpace && level < tml {
rv = append(rv, rti.GetTerm(COVERING, cellID, prefix))
}
for level-rti.options.levelMod >= rti.options.minLevel {
level -= rti.options.levelMod
ancestorID := cellID.Parent(level)
if prevID != CellID(0) && prevID.Level() > level &&
prevID.Parent(level) == ancestorID {
break
}
rv = append(rv, rti.GetTerm(COVERING, ancestorID, prefix))
}
prevID = cellID
}
return rv
}
func CapFromCenterAndRadius(centerLat, centerLon, dist float64) Cap {
return CapFromCenterAngle(PointFromLatLng(
LatLngFromDegrees(centerLat, centerLon)), s1.Angle((dist/1000)/6378))
}
// FilterOutCoveringTerms filters out the covering terms so that
// it helps to reduce the search terms while searching in a one
// dimensional space. (point only indexing usecase)
func FilterOutCoveringTerms(terms []string) []string {
rv := make([]string, 0, len(terms))
for _, term := range terms {
if strings.HasPrefix(term, marker) {
continue
}
rv = append(rv, term)
}
return rv
}