WIP: Save agent roles integration work before CHORUS rebrand
- Agent roles and coordination features - Chat API integration testing - New configuration and workspace management 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
21
vendor/lukechampine.com/blake3/LICENSE
generated
vendored
Normal file
21
vendor/lukechampine.com/blake3/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2020 Luke Champine
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
66
vendor/lukechampine.com/blake3/README.md
generated
vendored
Normal file
66
vendor/lukechampine.com/blake3/README.md
generated
vendored
Normal file
@@ -0,0 +1,66 @@
|
||||
blake3
|
||||
------
|
||||
|
||||
[](https://godoc.org/lukechampine.com/blake3)
|
||||
[](https://goreportcard.com/report/lukechampine.com/blake3)
|
||||
|
||||
```
|
||||
go get lukechampine.com/blake3
|
||||
```
|
||||
|
||||
`blake3` implements the [BLAKE3 cryptographic hash function](https://github.com/BLAKE3-team/BLAKE3).
|
||||
This implementation aims to be performant without sacrificing (too much)
|
||||
readability, in the hopes of eventually landing in `x/crypto`.
|
||||
|
||||
In addition to the pure-Go implementation, this package also contains AVX-512
|
||||
and AVX2 routines (generated by [`avo`](https://github.com/mmcloughlin/avo))
|
||||
that greatly increase performance for large inputs and outputs.
|
||||
|
||||
Contributions are greatly appreciated.
|
||||
[All contributors are eligible to receive an Urbit planet.](https://twitter.com/lukechampine/status/1274797924522885134)
|
||||
|
||||
|
||||
## Benchmarks
|
||||
|
||||
Tested on a 2020 MacBook Air (i5-7600K @ 3.80GHz). Benchmarks will improve as
|
||||
soon as I get access to a beefier AVX-512 machine. :wink:
|
||||
|
||||
### AVX-512
|
||||
|
||||
```
|
||||
BenchmarkSum256/64 120 ns/op 533.00 MB/s
|
||||
BenchmarkSum256/1024 2229 ns/op 459.36 MB/s
|
||||
BenchmarkSum256/65536 16245 ns/op 4034.11 MB/s
|
||||
BenchmarkWrite 245 ns/op 4177.38 MB/s
|
||||
BenchmarkXOF 246 ns/op 4159.30 MB/s
|
||||
```
|
||||
|
||||
### AVX2
|
||||
|
||||
```
|
||||
BenchmarkSum256/64 120 ns/op 533.00 MB/s
|
||||
BenchmarkSum256/1024 2229 ns/op 459.36 MB/s
|
||||
BenchmarkSum256/65536 31137 ns/op 2104.76 MB/s
|
||||
BenchmarkWrite 487 ns/op 2103.12 MB/s
|
||||
BenchmarkXOF 329 ns/op 3111.27 MB/s
|
||||
```
|
||||
|
||||
### Pure Go
|
||||
|
||||
```
|
||||
BenchmarkSum256/64 120 ns/op 533.00 MB/s
|
||||
BenchmarkSum256/1024 2229 ns/op 459.36 MB/s
|
||||
BenchmarkSum256/65536 133505 ns/op 490.89 MB/s
|
||||
BenchmarkWrite 2022 ns/op 506.36 MB/s
|
||||
BenchmarkXOF 1914 ns/op 534.98 MB/s
|
||||
```
|
||||
|
||||
## Shortcomings
|
||||
|
||||
There is no assembly routine for single-block compressions. This is most
|
||||
noticeable for ~1KB inputs.
|
||||
|
||||
Each assembly routine inlines all 7 rounds, causing thousands of lines of
|
||||
duplicated code. Ideally the routines could be merged such that only a single
|
||||
routine is generated for AVX-512 and AVX2, without sacrificing too much
|
||||
performance.
|
||||
151
vendor/lukechampine.com/blake3/bao.go
generated
vendored
Normal file
151
vendor/lukechampine.com/blake3/bao.go
generated
vendored
Normal file
@@ -0,0 +1,151 @@
|
||||
package blake3
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
// BaoEncodedSize returns the size of a Bao encoding for the provided quantity
|
||||
// of data.
|
||||
func BaoEncodedSize(dataLen int, outboard bool) int {
|
||||
size := 8
|
||||
if dataLen > 0 {
|
||||
chunks := (dataLen + chunkSize - 1) / chunkSize
|
||||
cvs := 2*chunks - 2 // no I will not elaborate
|
||||
size += cvs * 32
|
||||
}
|
||||
if !outboard {
|
||||
size += dataLen
|
||||
}
|
||||
return size
|
||||
}
|
||||
|
||||
// BaoEncode computes the intermediate BLAKE3 tree hashes of data and writes
|
||||
// them to dst. If outboard is false, the contents of data are also written to
|
||||
// dst, interleaved with the tree hashes. It also returns the tree root, i.e.
|
||||
// the 256-bit BLAKE3 hash.
|
||||
//
|
||||
// Note that dst is not written sequentially, and therefore must be initialized
|
||||
// with sufficient capacity to hold the encoding; see BaoEncodedSize.
|
||||
func BaoEncode(dst io.WriterAt, data io.Reader, dataLen int64, outboard bool) ([32]byte, error) {
|
||||
var counter uint64
|
||||
var chunkBuf [chunkSize]byte
|
||||
var err error
|
||||
read := func(p []byte) []byte {
|
||||
if err == nil {
|
||||
_, err = io.ReadFull(data, p)
|
||||
}
|
||||
return p
|
||||
}
|
||||
write := func(p []byte, off uint64) {
|
||||
if err == nil {
|
||||
_, err = dst.WriteAt(p, int64(off))
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: unlike the reference implementation, we write directly in
|
||||
// pre-order, rather than writing in post-order and then flipping. This cuts
|
||||
// the I/O required in half, but also makes hashing multiple chunks in SIMD
|
||||
// a lot trickier. I'll save that optimization for a rainy day.
|
||||
var rec func(bufLen uint64, flags uint32, off uint64) (uint64, [8]uint32)
|
||||
rec = func(bufLen uint64, flags uint32, off uint64) (uint64, [8]uint32) {
|
||||
if err != nil {
|
||||
return 0, [8]uint32{}
|
||||
} else if bufLen <= chunkSize {
|
||||
cv := chainingValue(compressChunk(read(chunkBuf[:bufLen]), &iv, counter, flags))
|
||||
counter++
|
||||
if !outboard {
|
||||
write(chunkBuf[:bufLen], off)
|
||||
}
|
||||
return 0, cv
|
||||
}
|
||||
mid := uint64(1) << (bits.Len64(bufLen-1) - 1)
|
||||
lchildren, l := rec(mid, 0, off+64)
|
||||
llen := lchildren * 32
|
||||
if !outboard {
|
||||
llen += (mid / chunkSize) * chunkSize
|
||||
}
|
||||
rchildren, r := rec(bufLen-mid, 0, off+64+llen)
|
||||
write(cvToBytes(&l)[:], off)
|
||||
write(cvToBytes(&r)[:], off+32)
|
||||
return 2 + lchildren + rchildren, chainingValue(parentNode(l, r, iv, flags))
|
||||
}
|
||||
|
||||
binary.LittleEndian.PutUint64(chunkBuf[:8], uint64(dataLen))
|
||||
write(chunkBuf[:8], 0)
|
||||
_, root := rec(uint64(dataLen), flagRoot, 8)
|
||||
return *cvToBytes(&root), err
|
||||
}
|
||||
|
||||
// BaoDecode reads content and tree data from the provided reader(s), and
|
||||
// streams the verified content to dst. It returns false if verification fails.
|
||||
// If the content and tree data are interleaved, outboard should be nil.
|
||||
func BaoDecode(dst io.Writer, data, outboard io.Reader, root [32]byte) (bool, error) {
|
||||
if outboard == nil {
|
||||
outboard = data
|
||||
}
|
||||
var counter uint64
|
||||
var buf [chunkSize]byte
|
||||
var err error
|
||||
read := func(r io.Reader, p []byte) []byte {
|
||||
if err == nil {
|
||||
_, err = io.ReadFull(r, p)
|
||||
}
|
||||
return p
|
||||
}
|
||||
readParent := func() (l, r [8]uint32) {
|
||||
read(outboard, buf[:64])
|
||||
return bytesToCV(buf[:32]), bytesToCV(buf[32:])
|
||||
}
|
||||
|
||||
var rec func(cv [8]uint32, bufLen uint64, flags uint32) bool
|
||||
rec = func(cv [8]uint32, bufLen uint64, flags uint32) bool {
|
||||
if err != nil {
|
||||
return false
|
||||
} else if bufLen <= chunkSize {
|
||||
n := compressChunk(read(data, buf[:bufLen]), &iv, counter, flags)
|
||||
counter++
|
||||
return cv == chainingValue(n)
|
||||
}
|
||||
l, r := readParent()
|
||||
n := parentNode(l, r, iv, flags)
|
||||
mid := uint64(1) << (bits.Len64(bufLen-1) - 1)
|
||||
return chainingValue(n) == cv && rec(l, mid, 0) && rec(r, bufLen-mid, 0)
|
||||
}
|
||||
|
||||
read(outboard, buf[:8])
|
||||
dataLen := binary.LittleEndian.Uint64(buf[:8])
|
||||
ok := rec(bytesToCV(root[:]), dataLen, flagRoot)
|
||||
return ok, err
|
||||
}
|
||||
|
||||
type bufferAt struct {
|
||||
buf []byte
|
||||
}
|
||||
|
||||
func (b *bufferAt) WriteAt(p []byte, off int64) (int, error) {
|
||||
if copy(b.buf[off:], p) != len(p) {
|
||||
panic("bad buffer size")
|
||||
}
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
// BaoEncodeBuf returns the Bao encoding and root (i.e. BLAKE3 hash) for data.
|
||||
func BaoEncodeBuf(data []byte, outboard bool) ([]byte, [32]byte) {
|
||||
buf := bufferAt{buf: make([]byte, BaoEncodedSize(len(data), outboard))}
|
||||
root, _ := BaoEncode(&buf, bytes.NewReader(data), int64(len(data)), outboard)
|
||||
return buf.buf, root
|
||||
}
|
||||
|
||||
// BaoVerifyBuf verifies the Bao encoding and root (i.e. BLAKE3 hash) for data.
|
||||
// If the content and tree data are interleaved, outboard should be nil.
|
||||
func BaoVerifyBuf(data, outboard []byte, root [32]byte) bool {
|
||||
var or io.Reader = bytes.NewReader(outboard)
|
||||
if outboard == nil {
|
||||
or = nil
|
||||
}
|
||||
ok, _ := BaoDecode(io.Discard, bytes.NewReader(data), or, root)
|
||||
return ok
|
||||
}
|
||||
296
vendor/lukechampine.com/blake3/blake3.go
generated
vendored
Normal file
296
vendor/lukechampine.com/blake3/blake3.go
generated
vendored
Normal file
@@ -0,0 +1,296 @@
|
||||
// Package blake3 implements the BLAKE3 cryptographic hash function.
|
||||
package blake3 // import "lukechampine.com/blake3"
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"hash"
|
||||
"io"
|
||||
"math"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
const (
|
||||
flagChunkStart = 1 << iota
|
||||
flagChunkEnd
|
||||
flagParent
|
||||
flagRoot
|
||||
flagKeyedHash
|
||||
flagDeriveKeyContext
|
||||
flagDeriveKeyMaterial
|
||||
|
||||
blockSize = 64
|
||||
chunkSize = 1024
|
||||
|
||||
maxSIMD = 16 // AVX-512 vectors can store 16 words
|
||||
)
|
||||
|
||||
var iv = [8]uint32{
|
||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
|
||||
}
|
||||
|
||||
// A node represents a chunk or parent in the BLAKE3 Merkle tree.
|
||||
type node struct {
|
||||
cv [8]uint32 // chaining value from previous node
|
||||
block [16]uint32
|
||||
counter uint64
|
||||
blockLen uint32
|
||||
flags uint32
|
||||
}
|
||||
|
||||
// parentNode returns a node that incorporates the chaining values of two child
|
||||
// nodes.
|
||||
func parentNode(left, right [8]uint32, key [8]uint32, flags uint32) node {
|
||||
n := node{
|
||||
cv: key,
|
||||
counter: 0, // counter is reset for parents
|
||||
blockLen: blockSize, // block is full
|
||||
flags: flags | flagParent,
|
||||
}
|
||||
copy(n.block[:8], left[:])
|
||||
copy(n.block[8:], right[:])
|
||||
return n
|
||||
}
|
||||
|
||||
// Hasher implements hash.Hash.
|
||||
type Hasher struct {
|
||||
key [8]uint32
|
||||
flags uint32
|
||||
size int // output size, for Sum
|
||||
|
||||
// log(n) set of Merkle subtree roots, at most one per height.
|
||||
stack [50][8]uint32 // 2^50 * maxSIMD * chunkSize = 2^64
|
||||
counter uint64 // number of buffers hashed; also serves as a bit vector indicating which stack elems are occupied
|
||||
|
||||
buf [maxSIMD * chunkSize]byte
|
||||
buflen int
|
||||
}
|
||||
|
||||
func (h *Hasher) hasSubtreeAtHeight(i int) bool {
|
||||
return h.counter&(1<<i) != 0
|
||||
}
|
||||
|
||||
func (h *Hasher) pushSubtree(cv [8]uint32) {
|
||||
// seek to first open stack slot, merging subtrees as we go
|
||||
i := 0
|
||||
for h.hasSubtreeAtHeight(i) {
|
||||
cv = chainingValue(parentNode(h.stack[i], cv, h.key, h.flags))
|
||||
i++
|
||||
}
|
||||
h.stack[i] = cv
|
||||
h.counter++
|
||||
}
|
||||
|
||||
// rootNode computes the root of the Merkle tree. It does not modify the
|
||||
// stack.
|
||||
func (h *Hasher) rootNode() node {
|
||||
n := compressBuffer(&h.buf, h.buflen, &h.key, h.counter*maxSIMD, h.flags)
|
||||
for i := bits.TrailingZeros64(h.counter); i < bits.Len64(h.counter); i++ {
|
||||
if h.hasSubtreeAtHeight(i) {
|
||||
n = parentNode(h.stack[i], chainingValue(n), h.key, h.flags)
|
||||
}
|
||||
}
|
||||
n.flags |= flagRoot
|
||||
return n
|
||||
}
|
||||
|
||||
// Write implements hash.Hash.
|
||||
func (h *Hasher) Write(p []byte) (int, error) {
|
||||
lenp := len(p)
|
||||
for len(p) > 0 {
|
||||
if h.buflen == len(h.buf) {
|
||||
n := compressBuffer(&h.buf, h.buflen, &h.key, h.counter*maxSIMD, h.flags)
|
||||
h.pushSubtree(chainingValue(n))
|
||||
h.buflen = 0
|
||||
}
|
||||
n := copy(h.buf[h.buflen:], p)
|
||||
h.buflen += n
|
||||
p = p[n:]
|
||||
}
|
||||
return lenp, nil
|
||||
}
|
||||
|
||||
// Sum implements hash.Hash.
|
||||
func (h *Hasher) Sum(b []byte) (sum []byte) {
|
||||
// We need to append h.Size() bytes to b. Reuse b's capacity if possible;
|
||||
// otherwise, allocate a new slice.
|
||||
if total := len(b) + h.Size(); cap(b) >= total {
|
||||
sum = b[:total]
|
||||
} else {
|
||||
sum = make([]byte, total)
|
||||
copy(sum, b)
|
||||
}
|
||||
// Read into the appended portion of sum. Use a low-latency-low-throughput
|
||||
// path for small digests (requiring a single compression), and a
|
||||
// high-latency-high-throughput path for large digests.
|
||||
if dst := sum[len(b):]; len(dst) <= 64 {
|
||||
var out [64]byte
|
||||
wordsToBytes(compressNode(h.rootNode()), &out)
|
||||
copy(dst, out[:])
|
||||
} else {
|
||||
h.XOF().Read(dst)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Reset implements hash.Hash.
|
||||
func (h *Hasher) Reset() {
|
||||
h.counter = 0
|
||||
h.buflen = 0
|
||||
}
|
||||
|
||||
// BlockSize implements hash.Hash.
|
||||
func (h *Hasher) BlockSize() int { return 64 }
|
||||
|
||||
// Size implements hash.Hash.
|
||||
func (h *Hasher) Size() int { return h.size }
|
||||
|
||||
// XOF returns an OutputReader initialized with the current hash state.
|
||||
func (h *Hasher) XOF() *OutputReader {
|
||||
return &OutputReader{
|
||||
n: h.rootNode(),
|
||||
}
|
||||
}
|
||||
|
||||
func newHasher(key [8]uint32, flags uint32, size int) *Hasher {
|
||||
return &Hasher{
|
||||
key: key,
|
||||
flags: flags,
|
||||
size: size,
|
||||
}
|
||||
}
|
||||
|
||||
// New returns a Hasher for the specified digest size and key. If key is nil,
|
||||
// the hash is unkeyed. Otherwise, len(key) must be 32.
|
||||
func New(size int, key []byte) *Hasher {
|
||||
if key == nil {
|
||||
return newHasher(iv, 0, size)
|
||||
}
|
||||
var keyWords [8]uint32
|
||||
for i := range keyWords {
|
||||
keyWords[i] = binary.LittleEndian.Uint32(key[i*4:])
|
||||
}
|
||||
return newHasher(keyWords, flagKeyedHash, size)
|
||||
}
|
||||
|
||||
// Sum256 and Sum512 always use the same hasher state, so we can save some time
|
||||
// when hashing small inputs by constructing the hasher ahead of time.
|
||||
var defaultHasher = New(64, nil)
|
||||
|
||||
// Sum256 returns the unkeyed BLAKE3 hash of b, truncated to 256 bits.
|
||||
func Sum256(b []byte) (out [32]byte) {
|
||||
out512 := Sum512(b)
|
||||
copy(out[:], out512[:])
|
||||
return
|
||||
}
|
||||
|
||||
// Sum512 returns the unkeyed BLAKE3 hash of b, truncated to 512 bits.
|
||||
func Sum512(b []byte) (out [64]byte) {
|
||||
var n node
|
||||
if len(b) <= blockSize {
|
||||
hashBlock(&out, b)
|
||||
return
|
||||
} else if len(b) <= chunkSize {
|
||||
n = compressChunk(b, &iv, 0, 0)
|
||||
n.flags |= flagRoot
|
||||
} else {
|
||||
h := *defaultHasher
|
||||
h.Write(b)
|
||||
n = h.rootNode()
|
||||
}
|
||||
wordsToBytes(compressNode(n), &out)
|
||||
return
|
||||
}
|
||||
|
||||
// DeriveKey derives a subkey from ctx and srcKey. ctx should be hardcoded,
|
||||
// globally unique, and application-specific. A good format for ctx strings is:
|
||||
//
|
||||
// [application] [commit timestamp] [purpose]
|
||||
//
|
||||
// e.g.:
|
||||
//
|
||||
// example.com 2019-12-25 16:18:03 session tokens v1
|
||||
//
|
||||
// The purpose of these requirements is to ensure that an attacker cannot trick
|
||||
// two different applications into using the same context string.
|
||||
func DeriveKey(subKey []byte, ctx string, srcKey []byte) {
|
||||
// construct the derivation Hasher
|
||||
const derivationIVLen = 32
|
||||
h := newHasher(iv, flagDeriveKeyContext, 32)
|
||||
h.Write([]byte(ctx))
|
||||
derivationIV := h.Sum(make([]byte, 0, derivationIVLen))
|
||||
var ivWords [8]uint32
|
||||
for i := range ivWords {
|
||||
ivWords[i] = binary.LittleEndian.Uint32(derivationIV[i*4:])
|
||||
}
|
||||
h = newHasher(ivWords, flagDeriveKeyMaterial, 0)
|
||||
// derive the subKey
|
||||
h.Write(srcKey)
|
||||
h.XOF().Read(subKey)
|
||||
}
|
||||
|
||||
// An OutputReader produces an seekable stream of 2^64 - 1 pseudorandom output
|
||||
// bytes.
|
||||
type OutputReader struct {
|
||||
n node
|
||||
buf [maxSIMD * blockSize]byte
|
||||
off uint64
|
||||
}
|
||||
|
||||
// Read implements io.Reader. Callers may assume that Read returns len(p), nil
|
||||
// unless the read would extend beyond the end of the stream.
|
||||
func (or *OutputReader) Read(p []byte) (int, error) {
|
||||
if or.off == math.MaxUint64 {
|
||||
return 0, io.EOF
|
||||
} else if rem := math.MaxUint64 - or.off; uint64(len(p)) > rem {
|
||||
p = p[:rem]
|
||||
}
|
||||
lenp := len(p)
|
||||
for len(p) > 0 {
|
||||
if or.off%(maxSIMD*blockSize) == 0 {
|
||||
or.n.counter = or.off / blockSize
|
||||
compressBlocks(&or.buf, or.n)
|
||||
}
|
||||
n := copy(p, or.buf[or.off%(maxSIMD*blockSize):])
|
||||
p = p[n:]
|
||||
or.off += uint64(n)
|
||||
}
|
||||
return lenp, nil
|
||||
}
|
||||
|
||||
// Seek implements io.Seeker.
|
||||
func (or *OutputReader) Seek(offset int64, whence int) (int64, error) {
|
||||
off := or.off
|
||||
switch whence {
|
||||
case io.SeekStart:
|
||||
if offset < 0 {
|
||||
return 0, errors.New("seek position cannot be negative")
|
||||
}
|
||||
off = uint64(offset)
|
||||
case io.SeekCurrent:
|
||||
if offset < 0 {
|
||||
if uint64(-offset) > off {
|
||||
return 0, errors.New("seek position cannot be negative")
|
||||
}
|
||||
off -= uint64(-offset)
|
||||
} else {
|
||||
off += uint64(offset)
|
||||
}
|
||||
case io.SeekEnd:
|
||||
off = uint64(offset) - 1
|
||||
default:
|
||||
panic("invalid whence")
|
||||
}
|
||||
or.off = off
|
||||
or.n.counter = uint64(off) / blockSize
|
||||
if or.off%(maxSIMD*blockSize) != 0 {
|
||||
compressBlocks(&or.buf, or.n)
|
||||
}
|
||||
// NOTE: or.off >= 2^63 will result in a negative return value.
|
||||
// Nothing we can do about this.
|
||||
return int64(or.off), nil
|
||||
}
|
||||
|
||||
// ensure that Hasher implements hash.Hash
|
||||
var _ hash.Hash = (*Hasher)(nil)
|
||||
5564
vendor/lukechampine.com/blake3/blake3_amd64.s
generated
vendored
Normal file
5564
vendor/lukechampine.com/blake3/blake3_amd64.s
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
144
vendor/lukechampine.com/blake3/compress_amd64.go
generated
vendored
Normal file
144
vendor/lukechampine.com/blake3/compress_amd64.go
generated
vendored
Normal file
@@ -0,0 +1,144 @@
|
||||
package blake3
|
||||
|
||||
import "unsafe"
|
||||
|
||||
//go:generate go run avo/gen.go -out blake3_amd64.s
|
||||
|
||||
//go:noescape
|
||||
func compressChunksAVX512(cvs *[16][8]uint32, buf *[16 * chunkSize]byte, key *[8]uint32, counter uint64, flags uint32)
|
||||
|
||||
//go:noescape
|
||||
func compressChunksAVX2(cvs *[8][8]uint32, buf *[8 * chunkSize]byte, key *[8]uint32, counter uint64, flags uint32)
|
||||
|
||||
//go:noescape
|
||||
func compressBlocksAVX512(out *[1024]byte, block *[16]uint32, cv *[8]uint32, counter uint64, blockLen uint32, flags uint32)
|
||||
|
||||
//go:noescape
|
||||
func compressBlocksAVX2(out *[512]byte, msgs *[16]uint32, cv *[8]uint32, counter uint64, blockLen uint32, flags uint32)
|
||||
|
||||
//go:noescape
|
||||
func compressParentsAVX2(parents *[8][8]uint32, cvs *[16][8]uint32, key *[8]uint32, flags uint32)
|
||||
|
||||
func compressNode(n node) (out [16]uint32) {
|
||||
compressNodeGeneric(&out, n)
|
||||
return
|
||||
}
|
||||
|
||||
func compressBufferAVX512(buf *[maxSIMD * chunkSize]byte, buflen int, key *[8]uint32, counter uint64, flags uint32) node {
|
||||
var cvs [maxSIMD][8]uint32
|
||||
compressChunksAVX512(&cvs, buf, key, counter, flags)
|
||||
numChunks := uint64(buflen / chunkSize)
|
||||
if buflen%chunkSize != 0 {
|
||||
// use non-asm for remainder
|
||||
partialChunk := buf[buflen-buflen%chunkSize : buflen]
|
||||
cvs[numChunks] = chainingValue(compressChunk(partialChunk, key, counter+numChunks, flags))
|
||||
numChunks++
|
||||
}
|
||||
return mergeSubtrees(&cvs, numChunks, key, flags)
|
||||
}
|
||||
|
||||
func compressBufferAVX2(buf *[maxSIMD * chunkSize]byte, buflen int, key *[8]uint32, counter uint64, flags uint32) node {
|
||||
var cvs [maxSIMD][8]uint32
|
||||
cvHalves := (*[2][8][8]uint32)(unsafe.Pointer(&cvs))
|
||||
bufHalves := (*[2][8 * chunkSize]byte)(unsafe.Pointer(buf))
|
||||
compressChunksAVX2(&cvHalves[0], &bufHalves[0], key, counter, flags)
|
||||
numChunks := uint64(buflen / chunkSize)
|
||||
if numChunks > 8 {
|
||||
compressChunksAVX2(&cvHalves[1], &bufHalves[1], key, counter+8, flags)
|
||||
}
|
||||
if buflen%chunkSize != 0 {
|
||||
// use non-asm for remainder
|
||||
partialChunk := buf[buflen-buflen%chunkSize : buflen]
|
||||
cvs[numChunks] = chainingValue(compressChunk(partialChunk, key, counter+numChunks, flags))
|
||||
numChunks++
|
||||
}
|
||||
return mergeSubtrees(&cvs, numChunks, key, flags)
|
||||
}
|
||||
|
||||
func compressBuffer(buf *[maxSIMD * chunkSize]byte, buflen int, key *[8]uint32, counter uint64, flags uint32) node {
|
||||
switch {
|
||||
case haveAVX512 && buflen >= chunkSize*2:
|
||||
return compressBufferAVX512(buf, buflen, key, counter, flags)
|
||||
case haveAVX2 && buflen >= chunkSize*2:
|
||||
return compressBufferAVX2(buf, buflen, key, counter, flags)
|
||||
default:
|
||||
return compressBufferGeneric(buf, buflen, key, counter, flags)
|
||||
}
|
||||
}
|
||||
|
||||
func compressChunk(chunk []byte, key *[8]uint32, counter uint64, flags uint32) node {
|
||||
n := node{
|
||||
cv: *key,
|
||||
counter: counter,
|
||||
blockLen: blockSize,
|
||||
flags: flags | flagChunkStart,
|
||||
}
|
||||
blockBytes := (*[64]byte)(unsafe.Pointer(&n.block))[:]
|
||||
for len(chunk) > blockSize {
|
||||
copy(blockBytes, chunk)
|
||||
chunk = chunk[blockSize:]
|
||||
n.cv = chainingValue(n)
|
||||
n.flags &^= flagChunkStart
|
||||
}
|
||||
// pad last block with zeros
|
||||
n.block = [16]uint32{}
|
||||
copy(blockBytes, chunk)
|
||||
n.blockLen = uint32(len(chunk))
|
||||
n.flags |= flagChunkEnd
|
||||
return n
|
||||
}
|
||||
|
||||
func hashBlock(out *[64]byte, buf []byte) {
|
||||
var block [16]uint32
|
||||
copy((*[64]byte)(unsafe.Pointer(&block))[:], buf)
|
||||
compressNodeGeneric((*[16]uint32)(unsafe.Pointer(out)), node{
|
||||
cv: iv,
|
||||
block: block,
|
||||
blockLen: uint32(len(buf)),
|
||||
flags: flagChunkStart | flagChunkEnd | flagRoot,
|
||||
})
|
||||
}
|
||||
|
||||
func compressBlocks(out *[maxSIMD * blockSize]byte, n node) {
|
||||
switch {
|
||||
case haveAVX512:
|
||||
compressBlocksAVX512(out, &n.block, &n.cv, n.counter, n.blockLen, n.flags)
|
||||
case haveAVX2:
|
||||
outs := (*[2][512]byte)(unsafe.Pointer(out))
|
||||
compressBlocksAVX2(&outs[0], &n.block, &n.cv, n.counter, n.blockLen, n.flags)
|
||||
compressBlocksAVX2(&outs[1], &n.block, &n.cv, n.counter+8, n.blockLen, n.flags)
|
||||
default:
|
||||
outs := (*[maxSIMD][64]byte)(unsafe.Pointer(out))
|
||||
compressBlocksGeneric(outs, n)
|
||||
}
|
||||
}
|
||||
|
||||
func mergeSubtrees(cvs *[maxSIMD][8]uint32, numCVs uint64, key *[8]uint32, flags uint32) node {
|
||||
if !haveAVX2 {
|
||||
return mergeSubtreesGeneric(cvs, numCVs, key, flags)
|
||||
}
|
||||
for numCVs > 2 {
|
||||
if numCVs%2 == 0 {
|
||||
compressParentsAVX2((*[8][8]uint32)(unsafe.Pointer(cvs)), cvs, key, flags)
|
||||
} else {
|
||||
keep := cvs[numCVs-1]
|
||||
compressParentsAVX2((*[8][8]uint32)(unsafe.Pointer(cvs)), cvs, key, flags)
|
||||
cvs[numCVs/2] = keep
|
||||
numCVs++
|
||||
}
|
||||
numCVs /= 2
|
||||
}
|
||||
return parentNode(cvs[0], cvs[1], *key, flags)
|
||||
}
|
||||
|
||||
func wordsToBytes(words [16]uint32, block *[64]byte) {
|
||||
*block = *(*[64]byte)(unsafe.Pointer(&words))
|
||||
}
|
||||
|
||||
func bytesToCV(b []byte) [8]uint32 {
|
||||
return *(*[8]uint32)(unsafe.Pointer(&b[0]))
|
||||
}
|
||||
|
||||
func cvToBytes(cv *[8]uint32) *[32]byte {
|
||||
return (*[32]byte)(unsafe.Pointer(cv))
|
||||
}
|
||||
143
vendor/lukechampine.com/blake3/compress_generic.go
generated
vendored
Normal file
143
vendor/lukechampine.com/blake3/compress_generic.go
generated
vendored
Normal file
@@ -0,0 +1,143 @@
|
||||
package blake3
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
func compressNodeGeneric(out *[16]uint32, n node) {
|
||||
g := func(a, b, c, d, mx, my uint32) (uint32, uint32, uint32, uint32) {
|
||||
a += b + mx
|
||||
d = bits.RotateLeft32(d^a, -16)
|
||||
c += d
|
||||
b = bits.RotateLeft32(b^c, -12)
|
||||
a += b + my
|
||||
d = bits.RotateLeft32(d^a, -8)
|
||||
c += d
|
||||
b = bits.RotateLeft32(b^c, -7)
|
||||
return a, b, c, d
|
||||
}
|
||||
|
||||
// NOTE: we unroll all of the rounds, as well as the permutations that occur
|
||||
// between rounds.
|
||||
|
||||
// round 1 (also initializes state)
|
||||
// columns
|
||||
s0, s4, s8, s12 := g(n.cv[0], n.cv[4], iv[0], uint32(n.counter), n.block[0], n.block[1])
|
||||
s1, s5, s9, s13 := g(n.cv[1], n.cv[5], iv[1], uint32(n.counter>>32), n.block[2], n.block[3])
|
||||
s2, s6, s10, s14 := g(n.cv[2], n.cv[6], iv[2], n.blockLen, n.block[4], n.block[5])
|
||||
s3, s7, s11, s15 := g(n.cv[3], n.cv[7], iv[3], n.flags, n.block[6], n.block[7])
|
||||
// diagonals
|
||||
s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[8], n.block[9])
|
||||
s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[10], n.block[11])
|
||||
s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[12], n.block[13])
|
||||
s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[14], n.block[15])
|
||||
|
||||
// round 2
|
||||
s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[2], n.block[6])
|
||||
s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[3], n.block[10])
|
||||
s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[7], n.block[0])
|
||||
s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[4], n.block[13])
|
||||
s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[1], n.block[11])
|
||||
s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[12], n.block[5])
|
||||
s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[9], n.block[14])
|
||||
s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[15], n.block[8])
|
||||
|
||||
// round 3
|
||||
s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[3], n.block[4])
|
||||
s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[10], n.block[12])
|
||||
s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[13], n.block[2])
|
||||
s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[7], n.block[14])
|
||||
s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[6], n.block[5])
|
||||
s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[9], n.block[0])
|
||||
s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[11], n.block[15])
|
||||
s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[8], n.block[1])
|
||||
|
||||
// round 4
|
||||
s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[10], n.block[7])
|
||||
s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[12], n.block[9])
|
||||
s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[14], n.block[3])
|
||||
s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[13], n.block[15])
|
||||
s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[4], n.block[0])
|
||||
s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[11], n.block[2])
|
||||
s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[5], n.block[8])
|
||||
s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[1], n.block[6])
|
||||
|
||||
// round 5
|
||||
s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[12], n.block[13])
|
||||
s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[9], n.block[11])
|
||||
s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[15], n.block[10])
|
||||
s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[14], n.block[8])
|
||||
s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[7], n.block[2])
|
||||
s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[5], n.block[3])
|
||||
s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[0], n.block[1])
|
||||
s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[6], n.block[4])
|
||||
|
||||
// round 6
|
||||
s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[9], n.block[14])
|
||||
s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[11], n.block[5])
|
||||
s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[8], n.block[12])
|
||||
s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[15], n.block[1])
|
||||
s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[13], n.block[3])
|
||||
s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[0], n.block[10])
|
||||
s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[2], n.block[6])
|
||||
s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[4], n.block[7])
|
||||
|
||||
// round 7
|
||||
s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[11], n.block[15])
|
||||
s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[5], n.block[0])
|
||||
s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[1], n.block[9])
|
||||
s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[8], n.block[6])
|
||||
s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[14], n.block[10])
|
||||
s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[2], n.block[12])
|
||||
s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[3], n.block[4])
|
||||
s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[7], n.block[13])
|
||||
|
||||
// finalization
|
||||
*out = [16]uint32{
|
||||
s0 ^ s8, s1 ^ s9, s2 ^ s10, s3 ^ s11,
|
||||
s4 ^ s12, s5 ^ s13, s6 ^ s14, s7 ^ s15,
|
||||
s8 ^ n.cv[0], s9 ^ n.cv[1], s10 ^ n.cv[2], s11 ^ n.cv[3],
|
||||
s12 ^ n.cv[4], s13 ^ n.cv[5], s14 ^ n.cv[6], s15 ^ n.cv[7],
|
||||
}
|
||||
}
|
||||
|
||||
func chainingValue(n node) (cv [8]uint32) {
|
||||
full := compressNode(n)
|
||||
copy(cv[:], full[:])
|
||||
return
|
||||
}
|
||||
|
||||
func compressBufferGeneric(buf *[maxSIMD * chunkSize]byte, buflen int, key *[8]uint32, counter uint64, flags uint32) (n node) {
|
||||
if buflen <= chunkSize {
|
||||
return compressChunk(buf[:buflen], key, counter, flags)
|
||||
}
|
||||
var cvs [maxSIMD][8]uint32
|
||||
var numCVs uint64
|
||||
for bb := bytes.NewBuffer(buf[:buflen]); bb.Len() > 0; numCVs++ {
|
||||
cvs[numCVs] = chainingValue(compressChunk(bb.Next(chunkSize), key, counter+numCVs, flags))
|
||||
}
|
||||
return mergeSubtrees(&cvs, numCVs, key, flags)
|
||||
}
|
||||
|
||||
func compressBlocksGeneric(outs *[maxSIMD][64]byte, n node) {
|
||||
for i := range outs {
|
||||
wordsToBytes(compressNode(n), &outs[i])
|
||||
n.counter++
|
||||
}
|
||||
}
|
||||
|
||||
func mergeSubtreesGeneric(cvs *[maxSIMD][8]uint32, numCVs uint64, key *[8]uint32, flags uint32) node {
|
||||
for numCVs > 2 {
|
||||
rem := numCVs / 2
|
||||
for i := range cvs[:rem] {
|
||||
cvs[i] = chainingValue(parentNode(cvs[i*2], cvs[i*2+1], *key, flags))
|
||||
}
|
||||
if numCVs%2 != 0 {
|
||||
cvs[rem] = cvs[rem*2]
|
||||
rem++
|
||||
}
|
||||
numCVs = rem
|
||||
}
|
||||
return parentNode(cvs[0], cvs[1], *key, flags)
|
||||
}
|
||||
93
vendor/lukechampine.com/blake3/compress_noasm.go
generated
vendored
Normal file
93
vendor/lukechampine.com/blake3/compress_noasm.go
generated
vendored
Normal file
@@ -0,0 +1,93 @@
|
||||
//go:build !amd64
|
||||
// +build !amd64
|
||||
|
||||
package blake3
|
||||
|
||||
import "encoding/binary"
|
||||
|
||||
func compressNode(n node) (out [16]uint32) {
|
||||
compressNodeGeneric(&out, n)
|
||||
return
|
||||
}
|
||||
|
||||
func compressBuffer(buf *[maxSIMD * chunkSize]byte, buflen int, key *[8]uint32, counter uint64, flags uint32) node {
|
||||
return compressBufferGeneric(buf, buflen, key, counter, flags)
|
||||
}
|
||||
|
||||
func compressChunk(chunk []byte, key *[8]uint32, counter uint64, flags uint32) node {
|
||||
n := node{
|
||||
cv: *key,
|
||||
counter: counter,
|
||||
blockLen: blockSize,
|
||||
flags: flags | flagChunkStart,
|
||||
}
|
||||
var block [blockSize]byte
|
||||
for len(chunk) > blockSize {
|
||||
copy(block[:], chunk)
|
||||
chunk = chunk[blockSize:]
|
||||
bytesToWords(block, &n.block)
|
||||
n.cv = chainingValue(n)
|
||||
n.flags &^= flagChunkStart
|
||||
}
|
||||
// pad last block with zeros
|
||||
block = [blockSize]byte{}
|
||||
n.blockLen = uint32(len(chunk))
|
||||
copy(block[:], chunk)
|
||||
bytesToWords(block, &n.block)
|
||||
n.flags |= flagChunkEnd
|
||||
return n
|
||||
}
|
||||
|
||||
func hashBlock(out *[64]byte, buf []byte) {
|
||||
var block [64]byte
|
||||
var words [16]uint32
|
||||
copy(block[:], buf)
|
||||
bytesToWords(block, &words)
|
||||
compressNodeGeneric(&words, node{
|
||||
cv: iv,
|
||||
block: words,
|
||||
blockLen: uint32(len(buf)),
|
||||
flags: flagChunkStart | flagChunkEnd | flagRoot,
|
||||
})
|
||||
wordsToBytes(words, out)
|
||||
}
|
||||
|
||||
func compressBlocks(out *[maxSIMD * blockSize]byte, n node) {
|
||||
var outs [maxSIMD][64]byte
|
||||
compressBlocksGeneric(&outs, n)
|
||||
for i := range outs {
|
||||
copy(out[i*64:], outs[i][:])
|
||||
}
|
||||
}
|
||||
|
||||
func mergeSubtrees(cvs *[maxSIMD][8]uint32, numCVs uint64, key *[8]uint32, flags uint32) node {
|
||||
return mergeSubtreesGeneric(cvs, numCVs, key, flags)
|
||||
}
|
||||
|
||||
func bytesToWords(bytes [64]byte, words *[16]uint32) {
|
||||
for i := range words {
|
||||
words[i] = binary.LittleEndian.Uint32(bytes[4*i:])
|
||||
}
|
||||
}
|
||||
|
||||
func wordsToBytes(words [16]uint32, block *[64]byte) {
|
||||
for i, w := range words {
|
||||
binary.LittleEndian.PutUint32(block[4*i:], w)
|
||||
}
|
||||
}
|
||||
|
||||
func bytesToCV(b []byte) [8]uint32 {
|
||||
var cv [8]uint32
|
||||
for i := range cv {
|
||||
cv[i] = binary.LittleEndian.Uint32(b[4*i:])
|
||||
}
|
||||
return cv
|
||||
}
|
||||
|
||||
func cvToBytes(cv *[8]uint32) *[32]byte {
|
||||
var b [32]byte
|
||||
for i, w := range cv {
|
||||
binary.LittleEndian.PutUint32(b[4*i:], w)
|
||||
}
|
||||
return &b
|
||||
}
|
||||
10
vendor/lukechampine.com/blake3/cpu.go
generated
vendored
Normal file
10
vendor/lukechampine.com/blake3/cpu.go
generated
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
// +build !darwin
|
||||
|
||||
package blake3
|
||||
|
||||
import "github.com/klauspost/cpuid/v2"
|
||||
|
||||
var (
|
||||
haveAVX2 = cpuid.CPU.Supports(cpuid.AVX2)
|
||||
haveAVX512 = cpuid.CPU.Supports(cpuid.AVX512F)
|
||||
)
|
||||
22
vendor/lukechampine.com/blake3/cpu_darwin.go
generated
vendored
Normal file
22
vendor/lukechampine.com/blake3/cpu_darwin.go
generated
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
package blake3
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
|
||||
"github.com/klauspost/cpuid/v2"
|
||||
)
|
||||
|
||||
var (
|
||||
haveAVX2 bool
|
||||
haveAVX512 bool
|
||||
)
|
||||
|
||||
func init() {
|
||||
haveAVX2 = cpuid.CPU.Supports(cpuid.AVX2)
|
||||
haveAVX512 = cpuid.CPU.Supports(cpuid.AVX512F)
|
||||
if !haveAVX512 {
|
||||
// On some Macs, AVX512 detection is buggy, so fallback to sysctl
|
||||
b, _ := syscall.Sysctl("hw.optional.avx512f")
|
||||
haveAVX512 = len(b) > 0 && b[0] == 1
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user