// Copyright 2023 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS-IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

package s2

import (
	"bytes"
	"fmt"
	"math"
)

// This library provides code to compute vertex alignments between Polylines.
//
// A vertex "alignment" or "warp" between two polylines is a matching between
// pairs of their vertices. Users can imagine pairing each vertex from
// Polyline `a` with at least one other vertex in Polyline `b`. The "cost"
// of an arbitrary alignment is defined as the summed value of the squared
// chordal distance between each pair of points in the warp path. An "optimal
// alignment" for a pair of polylines is defined as the alignment with least
// cost. Note: optimal alignments are not necessarily unique. The standard way
// of computing an optimal alignment between two sequences is the use of the
// `Dynamic Timewarp` algorithm.
//
// We provide three methods for computing (via Dynamic Timewarp) the optimal
// alignment between two Polylines. These methods are performance-sensitive,
// and have been reasonably optimized for space- and time- usage. On modern
// hardware, it is possible to compute exact alignments between 4096x4096
// element polylines in ~70ms, and approximate alignments much more quickly.
//
// The results of an alignment operation are captured in a VertexAlignment
// object. In particular, a VertexAlignment keeps track of the total cost of
// alignment, as well as the warp path (a sequence of pairs of indices into each
// polyline whose vertices are linked together in the optimal alignment)
//
// For a worked example, consider the polylines
//
// a = [(1, 0), (5, 0), (6, 0), (9, 0)] and
// b = [(2, 0), (7, 0), (8, 0)].
//
// The "cost matrix" between these two polylines (using chordal
// distance, .Norm(), as our distance function) looks like this:
//
//        (2, 0)  (7, 0)  (8, 0)
// (1, 0)     1       6       7
// (5, 0)     3       2       3
// (6, 0)     4       1       2
// (9, 0)     7       2       1
//
// The Dynamic Timewarp DP table for this cost matrix has cells defined by
//
// table[i][j] = cost(i,j) + min(table[i-1][j-1], table[i][j-1], table[i-1, j])
//
//        (2, 0)  (7, 0)  (8, 0)
// (1, 0)     1       7      14
// (5, 0)     4       3       7
// (6, 0)     8       4       6
// (9, 0)    15       6       5
//
// Starting at the bottom right corner of the DP table, we can work our way
// backwards to the upper left corner  to recover the reverse of the warp path:
// (3, 2) -> (2, 1) -> (1, 1) -> (0, 0). The VertexAlignment produced containing
// this has alignment_cost = 7 and warp_path = {(0, 0), (1, 1), (2, 1), (3, 2)}.
//
// We also provide methods for performing alignment of multiple sequences. These
// methods return a single, representative polyline from a non-empty collection
// of polylines, for various definitions of "representative."
//
// GetMedoidPolyline() returns a new polyline (point-for-point-equal to some
// existing polyline from the collection) that minimizes the summed vertex
// alignment cost to all other polylines in the collection.
//
// GetConsensusPolyline() returns a new polyline (unlikely to be present in the
// input collection) that represents a "weighted consensus" polyline. This
// polyline is constructed iteratively using the Dynamic Timewarp Barycenter
// Averaging algorithm of F. Petitjean, A. Ketterlin, and P. Gancarski, which
// can be found here:
// https://pdfs.semanticscholar.org/a596/8ca9488199291ffe5473643142862293d69d.pdf

// A columnStride is a [start, end) range of columns in a search window.
// It enables us to lazily fill up our costTable structures by providing bounds
// checked access for reads. We also use them to keep track of structured,
// sparse window matrices by tracking start and end columns for each row.
type columnStride struct {
	start int
	end   int
}

// InRange reports if the given index is in range of this stride.
func (c columnStride) InRange(index int) bool {
	return c.start <= index && index < c.end
}

// allColumnStride returns a columnStride where inRange evaluates to `true` for all
// non-negative inputs less than math.MaxInt.
func allColumnStride() columnStride {
	return columnStride{-1, math.MaxInt}
}

// A Window is a sparse binary matrix with specific structural constraints
// on allowed element configurations. It is used in this library to represent
// "search windows" for windowed dynamic timewarping.
//
// Valid Windows require the following structural conditions to hold:
//  1. All rows must consist of a single contiguous stride of `true` values.
//  2. All strides are greater than zero length (i.e. no empty rows).
//  3. The index of the first `true` column in a row must be at least as
//     large as the index of the first `true` column in the previous row.
//  4. The index of the last `true` column in a row must be at least as large
//     as the index of the last `true` column in the previous row.
//  5. strides[0].start = 0 (the first cell is always filled).
//  6. strides[n_rows-1].end = n_cols (the last cell is filled).
//
// Example valid strided_masks (* = filled, . = unfilled)
//
//	  0 1 2 3 4 5
//	0 * * * . . .
//	1 . * * * . .
//	2 . * * * . .
//	3 . . * * * *
//	4 . . * * * *
//
//	  0 1 2 3 4 5
//	0 * * * * . .
//	1 . * * * * .
//	2 . . * * * .
//	3 . . . . * *
//	4 . . . . . *
//
//	  0 1 2 3 4 5
//	0 * * . . . .
//	1 . * . . . .
//	2 . . * * * .
//	3 . . . . . *
//	4 . . . . . *
//
// Example invalid strided_masks:
//
//	0 1 2 3 4 5
//
// 0 * * * . * * <-- more than one continuous run
// 1 . * * * . .
// 2 . * * * . .
// 3 . . * * * *
// 4 . . * * * *
//
//	0 1 2 3 4 5
//
// 0 * * * . . .
// 1 . * * * . .
// 2 . * * * . .
// 3 * * * * * * <-- start index not monotonically increasing
// 4 . . * * * *
//
//	0 1 2 3 4 5
//
// 0 * * * . . .
// 1 . * * * * .
// 2 . * * * . . <-- end index not monotonically increasing
// 3 . . * * * *
// 4 . . * * * *
//
//	0 1 2 3 4 5
//
// 0 . * . . . . <-- does not fill upper left corner
// 1 . * . . . .
// 2 . * . . . .
// 3 . * * * . .
// 4 . . * * * *
type window struct {
	rows    int
	cols    int
	strides []columnStride
}

// windowFromStrides creates a window from the given columnStrides.
func windowFromStrides(strides []columnStride) *window {
	return &window{
		rows:    len(strides),
		cols:    strides[len(strides)-1].end,
		strides: strides,
	}
}

// TODO(rsned): Add windowFromWarpPath

// isValid reports if this windows data represents a valid window.
//
// Valid Windows require the following structural conditions to hold:
//  1. All rows must consist of a single contiguous stride of `true` values.
//  2. All strides are greater than zero length (i.e. no empty rows).
//  3. The index of the first `true` column in a row must be at least as
//     large as the index of the first `true` column in the previous row.
//  4. The index of the last `true` column in a row must be at least as large
//     as the index of the last `true` column in the previous row.
//  5. strides[0].start = 0 (the first cell is always filled).
//  6. strides[n_rows-1].end = n_cols (the last cell is filled).
func (w *window) isValid() bool {
	if w.rows <= 0 || w.cols <= 0 || len(w.strides) == 0 ||
		w.strides[0].start != 0 || w.strides[len(w.strides)-1].end != w.cols {
		return false
	}

	var prev = columnStride{-1, -1}
	for _, curr := range w.strides {
		if curr.end <= curr.start || curr.start < prev.start ||
			curr.end < prev.end {
			return false
		}
		prev = curr
	}
	return true

}

func (w *window) columnStride(row int) columnStride {
	return w.strides[row]
}

func (w *window) checkedColumnStride(row int) columnStride {
	if row < 0 {
		return allColumnStride()
	}
	return w.strides[row]
}

// upsample returns a new, larger window that is an upscaled version of this window.
//
// Used by ApproximateAlignment window expansion step.
func (w *window) upsample(newRows, newCols int) *window {
	// TODO(rsned): What to do if the upsample is actually a downsample.
	// C++ has this as a debug CHECK.
	rowScale := float64(newRows) / float64(w.rows)
	colScale := float64(newCols) / float64(w.cols)
	newStrides := make([]columnStride, newRows)
	var fromStride columnStride
	for row := 0; row < newRows; row++ {
		fromStride = w.strides[int((float64(row)+0.5)/rowScale)]
		newStrides[row] = columnStride{
			start: int(colScale*float64(fromStride.start) + 0.5),
			end:   int(colScale*float64(fromStride.end) + 0.5),
		}
	}
	return windowFromStrides(newStrides)
}

// dilate returns a new, equal-size window by dilating this window with a square
// structuring element with half-length `radius`. Radius = 1 corresponds to
// a 3x3 square morphological dilation.
//
// Used by ApproximateAlignment window expansion step.
func (w *window) dilate(radius int) *window {
	// This code takes advantage of the fact that the dilation window is square to
	// ensure that we can compute the stride for each output row in constant time.
	// TODO (mrdmnd): a potential optimization might be to combine this method and
	// the Upsample method into a single "Expand" method. For the sake of
	// testing, I haven't done that here, but I think it would be fairly
	// straightforward to do so. This method generally isn't very expensive so it
	// feels unnecessary to combine them.

	newStrides := make([]columnStride, w.rows)
	for row := 0; row < w.rows; row++ {
		prevRow := maxInt(0, row-radius)
		nextRow := minInt(row+radius, w.rows-1)
		newStrides[row] = columnStride{
			start: maxInt(0, w.strides[prevRow].start-radius),
			end:   minInt(w.strides[nextRow].end+radius, w.cols),
		}
	}

	return windowFromStrides(newStrides)
}

// debugString returns a string representation of this window.
func (w *window) debugString() string {
	var buf bytes.Buffer
	for _, row := range w.strides {
		for col := 0; col < w.cols; col++ {
			if row.InRange(col) {
				buf.WriteString(" *")
			} else {
				buf.WriteString(" .")
			}
		}
		buf.WriteString("\n")
	}
	return buf.String()
}

// halfResolution reduces the number of vertices of polyline p by selecting every other
// vertex for inclusion in a new polyline. Specifically, we take even-index
// vertices [0, 2, 4,...]. For an even-length polyline, the last vertex is not
// selected. For an odd-length polyline, the last vertex is selected.
// Constructs and returns a new Polyline in linear time.
func halfResolution(p *Polyline) *Polyline {
	var p2 Polyline
	for i := 0; i < len(*p); i += 2 {
		p2 = append(p2, (*p)[i])
	}

	return &p2
}

// warpPath represents a pairing between vertex
// a.vertex(i) and vertex b.vertex(j) in the optimal alignment.
// The warpPath is defined in forward order, such that the result of
// aligning polylines `a` and `b` is always a warpPath with warpPath[0] = {0,0}
// and warp_path[n] = {len(a) - 1, len(b)- 1}
//
// Note that this DOES NOT define an alignment from a point sequence to an
// edge sequence. That functionality may come at a later date.
type warpPath []warpPair
type warpPair struct{ a, b int }

type vertexAlignment struct {
	// alignmentCost represents the sum of the squared chordal distances
	// between each pair of vertices in the warp path. Specifically,
	// cost = sum_{(i, j) \in path} (a.vertex(i) - b.vertex(j)).Norm();
	// This means that the units of alignment_cost are distance. This is
	// an optimization to avoid the (expensive) atan computation of the true
	// spherical angular distance between the points. All we need to compute
	// vertex alignment is a metric that satisfies the triangle inequality, and
	// chordal distance works as well as spherical s1.Angle distance for
	// this purpose.
	alignmentCost float64
	warpPath      warpPath
}

type costTable [][]float64

func newCostTable(rows, cols int) costTable {
	c := make([][]float64, rows)
	for i := 0; i < rows; i++ {
		c[i] = make([]float64, cols)
	}
	return c
}

func (c costTable) String() string {
	var buf bytes.Buffer
	for i, row := range c {
		buf.WriteString(fmt.Sprintf("%2d: [", i))
		for _, col := range row {
			buf.WriteString(fmt.Sprintf("%0.3f, ", col))
		}
		buf.WriteString("]\n")
	}
	return buf.String()
}

func (c costTable) boundsCheckedTableCost(row, col int, stride columnStride) float64 {
	if row < 0 && col < 0 {
		return 0.0
	} else if row < 0 || col < 0 || !stride.InRange(col) {
		return math.MaxFloat64
	} else {
		return c[row][col]
	}
}

func (c costTable) cost() float64 {
	r := len(c) - 1
	return c[r][len(c[r])-1]
}

// ExactVertexAlignmentCost takes two non-empty polylines as input, and
// returns the *cost* of their optimal alignment. A standard, traditional
// dynamic timewarp algorithm can output both a warp path and a cost, but
// requires quadratic space to reconstruct the path by walking back through the
// Dynamic Programming cost table. If all you really need is the warp cost (i.e.
// you're inducing a similarity metric between Polylines, or something
// equivalent), you can overwrite the DP table and use constant space -
// O(max(A,B)). This method provides that space-efficiency optimization.
func ExactVertexAlignmentCost(a, b *Polyline) float64 {
	aN := len(*a)
	bN := len(*b)
	cost := make([]float64, bN)
	for i := 0; i < bN; i++ {
		cost[i] = math.MaxFloat64
	}
	leftDiagMinCost := 0.0
	for row := 0; row < aN; row++ {
		for col := 0; col < bN; col++ {
			upCost := cost[col]
			cost[col] = math.Min(leftDiagMinCost, upCost) +
				(*a)[row].Sub((*b)[col].Vector).Norm()
			leftDiagMinCost = math.Min(cost[col], upCost)
		}
		leftDiagMinCost = math.MaxFloat64
	}
	return cost[len(cost)-1]
}

// ExactVertexAlignment takes two non-empty polylines as input, and returns
// the VertexAlignment corresponding to the optimal alignment between them. This
// method is quadratic O(A*B) in both space and time complexity.
func ExactVertexAlignment(a, b *Polyline) *vertexAlignment {
	aN := len(*a)
	bN := len(*b)
	strides := make([]columnStride, aN)
	for i := 0; i < aN; i++ {
		strides[i] = columnStride{start: 0, end: bN}
	}
	w := windowFromStrides(strides)

	return dynamicTimewarp(a, b, w)
}

// Perform dynamic timewarping by filling in the DP table on cells that are
// inside our search window. For an exact (all-squares) evaluation, this
// incurs bounds checking overhead - we don't need to ensure that we're inside
// the appropriate cells in the window, because it's guaranteed. Structuring
// the program to reuse code for both the EXACT and WINDOWED cases by
// abstracting EXACT as a window with full-covering strides is done for
// maintainability reasons. One potential optimization here might be to overload
// this function to skip bounds checking when the window is full.
//
// As a note of general interest, the Dynamic Timewarp algorithm as stated here
// prefers shorter warp paths, when two warp paths might be equally costly. This
// is because it favors progressing in the sequences simultaneously due to the
// equal weighting of a diagonal step in the cost table with a horizontal or
// vertical step. This may be counterintuitive, but represents the standard
// implementation of this algorithm. TODO(user) - future implementations could
// allow weights on the lookup costs to mitigate this.
//
// This is the hottest routine in the whole package, please be careful to
// profile any future changes made here.
//
// This method takes time proportional to the number of cells in the window,
// which can range from O(max(a, b)) cells (best) to O(a*b) cells (worst)
func dynamicTimewarp(a, b *Polyline, w *window) *vertexAlignment {
	rows := len(*a)
	cols := len(*b)
	costs := newCostTable(rows, cols)

	var curr columnStride
	prev := allColumnStride()

	for row := 0; row < rows; row++ {
		curr = w.columnStride(row)
		for col := curr.start; col < curr.end; col++ {
			// The total cost up to (row,col) is the minimum of the cost up, down,
			// left and the distance between the points in row and col. We use
			// the distance between the points, as we are trying to minimize the
			// distance between the two polylines.
			dCost := costs.boundsCheckedTableCost(row-1, col-1, prev)
			uCost := costs.boundsCheckedTableCost(row-1, col-0, prev)
			lCost := costs.boundsCheckedTableCost(row-0, col-1, curr)

			costs[row][col] = minFloat64(dCost, uCost, lCost) +
				(*a)[row].Sub((*b)[col].Vector).Norm()
		}
		prev = curr
	}

	// Now we walk back through the cost table and build up the warp path.
	// Somewhat surprisingly, it is faster to recover the path this way than it
	// is to save the comparisons from the computation we *already did* to get the
	// direction we came from. The author speculates that this behavior is
	// assignment-cost-related: to persist direction, we have to do extra
	// stores/loads of "directional" information, and the extra assignment cost
	// this incurs is larger than the cost to simply redo the comparisons.
	// It's probably worth revisiting this assumption in the future.
	// As it turns out, the following code ends up effectively free.
	warpPath := make([]warpPair, 0, maxInt(rows, cols))
	row := rows - 1
	col := cols - 1
	curr = w.checkedColumnStride(row)
	prev = w.checkedColumnStride(row - 1)
	for row >= 0 && col >= 0 {
		warpPath = append(warpPath, warpPair{row, col})
		dCost := costs.boundsCheckedTableCost(row-1, col-1, prev)
		uCost := costs.boundsCheckedTableCost(row-1, col-0, prev)
		lCost := costs.boundsCheckedTableCost(row-0, col-1, curr)

		if dCost <= uCost && dCost <= lCost {
			row -= 1
			col -= 1
			curr = w.checkedColumnStride(row)
			prev = w.checkedColumnStride(row - 1)
		} else if uCost <= lCost {
			row -= 1
			curr = w.checkedColumnStride(row)
			prev = w.checkedColumnStride(row - 1)
		} else {
			col -= 1
		}
	}

	// TODO(rsned): warpPath.reverse
	return &vertexAlignment{alignmentCost: costs.cost(), warpPath: warpPath}
}

// TODO(rsned): Differences from C++
// ApproxVertexAlignment/Cost
// MedoidPolyline / Options
// ConsensusPolyline / Options