 9bdcbe0447
			
		
	
	9bdcbe0447
	
	
	
		
			
			Major integrations and fixes: - Added BACKBEAT SDK integration for P2P operation timing - Implemented beat-aware status tracking for distributed operations - Added Docker secrets support for secure license management - Resolved KACHING license validation via HTTPS/TLS - Updated docker-compose configuration for clean stack deployment - Disabled rollback policies to prevent deployment failures - Added license credential storage (CHORUS-DEV-MULTI-001) Technical improvements: - BACKBEAT P2P operation tracking with phase management - Enhanced configuration system with file-based secrets - Improved error handling for license validation - Clean separation of KACHING and CHORUS deployment stacks 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
		
			
				
	
	
		
			444 lines
		
	
	
		
			9.0 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			444 lines
		
	
	
		
			9.0 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| // Copyright 2019 The Go Authors. All rights reserved.
 | |
| // Use of this source code is governed by a BSD-style
 | |
| // license that can be found in the LICENSE file.
 | |
| 
 | |
| // Based on CRYPTOGAMS code with the following comment:
 | |
| // # ====================================================================
 | |
| // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 | |
| // # project. The module is, however, dual licensed under OpenSSL and
 | |
| // # CRYPTOGAMS licenses depending on where you obtain it. For further
 | |
| // # details see http://www.openssl.org/~appro/cryptogams/.
 | |
| // # ====================================================================
 | |
| 
 | |
| // Code for the perl script that generates the ppc64 assembler
 | |
| // can be found in the cryptogams repository at the link below. It is based on
 | |
| // the original from openssl.
 | |
| 
 | |
| // https://github.com/dot-asm/cryptogams/commit/a60f5b50ed908e91
 | |
| 
 | |
| // The differences in this and the original implementation are
 | |
| // due to the calling conventions and initialization of constants.
 | |
| 
 | |
| //go:build gc && !purego
 | |
| 
 | |
| #include "textflag.h"
 | |
| 
 | |
| #define OUT  R3
 | |
| #define INP  R4
 | |
| #define LEN  R5
 | |
| #define KEY  R6
 | |
| #define CNT  R7
 | |
| #define TMP  R15
 | |
| 
 | |
| #define CONSTBASE  R16
 | |
| #define BLOCKS R17
 | |
| 
 | |
| // for VPERMXOR
 | |
| #define MASK  R18
 | |
| 
 | |
| DATA consts<>+0x00(SB)/8, $0x3320646e61707865
 | |
| DATA consts<>+0x08(SB)/8, $0x6b20657479622d32
 | |
| DATA consts<>+0x10(SB)/8, $0x0000000000000001
 | |
| DATA consts<>+0x18(SB)/8, $0x0000000000000000
 | |
| DATA consts<>+0x20(SB)/8, $0x0000000000000004
 | |
| DATA consts<>+0x28(SB)/8, $0x0000000000000000
 | |
| DATA consts<>+0x30(SB)/8, $0x0a0b08090e0f0c0d
 | |
| DATA consts<>+0x38(SB)/8, $0x0203000106070405
 | |
| DATA consts<>+0x40(SB)/8, $0x090a0b080d0e0f0c
 | |
| DATA consts<>+0x48(SB)/8, $0x0102030005060704
 | |
| DATA consts<>+0x50(SB)/8, $0x6170786561707865
 | |
| DATA consts<>+0x58(SB)/8, $0x6170786561707865
 | |
| DATA consts<>+0x60(SB)/8, $0x3320646e3320646e
 | |
| DATA consts<>+0x68(SB)/8, $0x3320646e3320646e
 | |
| DATA consts<>+0x70(SB)/8, $0x79622d3279622d32
 | |
| DATA consts<>+0x78(SB)/8, $0x79622d3279622d32
 | |
| DATA consts<>+0x80(SB)/8, $0x6b2065746b206574
 | |
| DATA consts<>+0x88(SB)/8, $0x6b2065746b206574
 | |
| DATA consts<>+0x90(SB)/8, $0x0000000100000000
 | |
| DATA consts<>+0x98(SB)/8, $0x0000000300000002
 | |
| DATA consts<>+0xa0(SB)/8, $0x5566774411223300
 | |
| DATA consts<>+0xa8(SB)/8, $0xddeeffcc99aabb88
 | |
| DATA consts<>+0xb0(SB)/8, $0x6677445522330011
 | |
| DATA consts<>+0xb8(SB)/8, $0xeeffccddaabb8899
 | |
| GLOBL consts<>(SB), RODATA, $0xc0
 | |
| 
 | |
| //func chaCha20_ctr32_vsx(out, inp *byte, len int, key *[8]uint32, counter *uint32)
 | |
| TEXT ·chaCha20_ctr32_vsx(SB),NOSPLIT,$64-40
 | |
| 	MOVD out+0(FP), OUT
 | |
| 	MOVD inp+8(FP), INP
 | |
| 	MOVD len+16(FP), LEN
 | |
| 	MOVD key+24(FP), KEY
 | |
| 	MOVD counter+32(FP), CNT
 | |
| 
 | |
| 	// Addressing for constants
 | |
| 	MOVD $consts<>+0x00(SB), CONSTBASE
 | |
| 	MOVD $16, R8
 | |
| 	MOVD $32, R9
 | |
| 	MOVD $48, R10
 | |
| 	MOVD $64, R11
 | |
| 	SRD $6, LEN, BLOCKS
 | |
| 	// for VPERMXOR
 | |
| 	MOVD $consts<>+0xa0(SB), MASK
 | |
| 	MOVD $16, R20
 | |
| 	// V16
 | |
| 	LXVW4X (CONSTBASE)(R0), VS48
 | |
| 	ADD $80,CONSTBASE
 | |
| 
 | |
| 	// Load key into V17,V18
 | |
| 	LXVW4X (KEY)(R0), VS49
 | |
| 	LXVW4X (KEY)(R8), VS50
 | |
| 
 | |
| 	// Load CNT, NONCE into V19
 | |
| 	LXVW4X (CNT)(R0), VS51
 | |
| 
 | |
| 	// Clear V27
 | |
| 	VXOR V27, V27, V27
 | |
| 
 | |
| 	// V28
 | |
| 	LXVW4X (CONSTBASE)(R11), VS60
 | |
| 
 | |
| 	// Load mask constants for VPERMXOR
 | |
| 	LXVW4X (MASK)(R0), V20
 | |
| 	LXVW4X (MASK)(R20), V21
 | |
| 
 | |
| 	// splat slot from V19 -> V26
 | |
| 	VSPLTW $0, V19, V26
 | |
| 
 | |
| 	VSLDOI $4, V19, V27, V19
 | |
| 	VSLDOI $12, V27, V19, V19
 | |
| 
 | |
| 	VADDUWM V26, V28, V26
 | |
| 
 | |
| 	MOVD $10, R14
 | |
| 	MOVD R14, CTR
 | |
| 	PCALIGN $16
 | |
| loop_outer_vsx:
 | |
| 	// V0, V1, V2, V3
 | |
| 	LXVW4X (R0)(CONSTBASE), VS32
 | |
| 	LXVW4X (R8)(CONSTBASE), VS33
 | |
| 	LXVW4X (R9)(CONSTBASE), VS34
 | |
| 	LXVW4X (R10)(CONSTBASE), VS35
 | |
| 
 | |
| 	// splat values from V17, V18 into V4-V11
 | |
| 	VSPLTW $0, V17, V4
 | |
| 	VSPLTW $1, V17, V5
 | |
| 	VSPLTW $2, V17, V6
 | |
| 	VSPLTW $3, V17, V7
 | |
| 	VSPLTW $0, V18, V8
 | |
| 	VSPLTW $1, V18, V9
 | |
| 	VSPLTW $2, V18, V10
 | |
| 	VSPLTW $3, V18, V11
 | |
| 
 | |
| 	// VOR
 | |
| 	VOR V26, V26, V12
 | |
| 
 | |
| 	// splat values from V19 -> V13, V14, V15
 | |
| 	VSPLTW $1, V19, V13
 | |
| 	VSPLTW $2, V19, V14
 | |
| 	VSPLTW $3, V19, V15
 | |
| 
 | |
| 	// splat   const values
 | |
| 	VSPLTISW $-16, V27
 | |
| 	VSPLTISW $12, V28
 | |
| 	VSPLTISW $8, V29
 | |
| 	VSPLTISW $7, V30
 | |
| 	PCALIGN $16
 | |
| loop_vsx:
 | |
| 	VADDUWM V0, V4, V0
 | |
| 	VADDUWM V1, V5, V1
 | |
| 	VADDUWM V2, V6, V2
 | |
| 	VADDUWM V3, V7, V3
 | |
| 
 | |
| 	VPERMXOR V12, V0, V21, V12
 | |
| 	VPERMXOR V13, V1, V21, V13
 | |
| 	VPERMXOR V14, V2, V21, V14
 | |
| 	VPERMXOR V15, V3, V21, V15
 | |
| 
 | |
| 	VADDUWM V8, V12, V8
 | |
| 	VADDUWM V9, V13, V9
 | |
| 	VADDUWM V10, V14, V10
 | |
| 	VADDUWM V11, V15, V11
 | |
| 
 | |
| 	VXOR V4, V8, V4
 | |
| 	VXOR V5, V9, V5
 | |
| 	VXOR V6, V10, V6
 | |
| 	VXOR V7, V11, V7
 | |
| 
 | |
| 	VRLW V4, V28, V4
 | |
| 	VRLW V5, V28, V5
 | |
| 	VRLW V6, V28, V6
 | |
| 	VRLW V7, V28, V7
 | |
| 
 | |
| 	VADDUWM V0, V4, V0
 | |
| 	VADDUWM V1, V5, V1
 | |
| 	VADDUWM V2, V6, V2
 | |
| 	VADDUWM V3, V7, V3
 | |
| 
 | |
| 	VPERMXOR V12, V0, V20, V12
 | |
| 	VPERMXOR V13, V1, V20, V13
 | |
| 	VPERMXOR V14, V2, V20, V14
 | |
| 	VPERMXOR V15, V3, V20, V15
 | |
| 
 | |
| 	VADDUWM V8, V12, V8
 | |
| 	VADDUWM V9, V13, V9
 | |
| 	VADDUWM V10, V14, V10
 | |
| 	VADDUWM V11, V15, V11
 | |
| 
 | |
| 	VXOR V4, V8, V4
 | |
| 	VXOR V5, V9, V5
 | |
| 	VXOR V6, V10, V6
 | |
| 	VXOR V7, V11, V7
 | |
| 
 | |
| 	VRLW V4, V30, V4
 | |
| 	VRLW V5, V30, V5
 | |
| 	VRLW V6, V30, V6
 | |
| 	VRLW V7, V30, V7
 | |
| 
 | |
| 	VADDUWM V0, V5, V0
 | |
| 	VADDUWM V1, V6, V1
 | |
| 	VADDUWM V2, V7, V2
 | |
| 	VADDUWM V3, V4, V3
 | |
| 
 | |
| 	VPERMXOR V15, V0, V21, V15
 | |
| 	VPERMXOR V12, V1, V21, V12
 | |
| 	VPERMXOR V13, V2, V21, V13
 | |
| 	VPERMXOR V14, V3, V21, V14
 | |
| 
 | |
| 	VADDUWM V10, V15, V10
 | |
| 	VADDUWM V11, V12, V11
 | |
| 	VADDUWM V8, V13, V8
 | |
| 	VADDUWM V9, V14, V9
 | |
| 
 | |
| 	VXOR V5, V10, V5
 | |
| 	VXOR V6, V11, V6
 | |
| 	VXOR V7, V8, V7
 | |
| 	VXOR V4, V9, V4
 | |
| 
 | |
| 	VRLW V5, V28, V5
 | |
| 	VRLW V6, V28, V6
 | |
| 	VRLW V7, V28, V7
 | |
| 	VRLW V4, V28, V4
 | |
| 
 | |
| 	VADDUWM V0, V5, V0
 | |
| 	VADDUWM V1, V6, V1
 | |
| 	VADDUWM V2, V7, V2
 | |
| 	VADDUWM V3, V4, V3
 | |
| 
 | |
| 	VPERMXOR V15, V0, V20, V15
 | |
| 	VPERMXOR V12, V1, V20, V12
 | |
| 	VPERMXOR V13, V2, V20, V13
 | |
| 	VPERMXOR V14, V3, V20, V14
 | |
| 
 | |
| 	VADDUWM V10, V15, V10
 | |
| 	VADDUWM V11, V12, V11
 | |
| 	VADDUWM V8, V13, V8
 | |
| 	VADDUWM V9, V14, V9
 | |
| 
 | |
| 	VXOR V5, V10, V5
 | |
| 	VXOR V6, V11, V6
 | |
| 	VXOR V7, V8, V7
 | |
| 	VXOR V4, V9, V4
 | |
| 
 | |
| 	VRLW V5, V30, V5
 | |
| 	VRLW V6, V30, V6
 | |
| 	VRLW V7, V30, V7
 | |
| 	VRLW V4, V30, V4
 | |
| 	BDNZ   loop_vsx
 | |
| 
 | |
| 	VADDUWM V12, V26, V12
 | |
| 
 | |
| 	VMRGEW V0, V1, V27
 | |
| 	VMRGEW V2, V3, V28
 | |
| 
 | |
| 	VMRGOW V0, V1, V0
 | |
| 	VMRGOW V2, V3, V2
 | |
| 
 | |
| 	VMRGEW V4, V5, V29
 | |
| 	VMRGEW V6, V7, V30
 | |
| 
 | |
| 	XXPERMDI VS32, VS34, $0, VS33
 | |
| 	XXPERMDI VS32, VS34, $3, VS35
 | |
| 	XXPERMDI VS59, VS60, $0, VS32
 | |
| 	XXPERMDI VS59, VS60, $3, VS34
 | |
| 
 | |
| 	VMRGOW V4, V5, V4
 | |
| 	VMRGOW V6, V7, V6
 | |
| 
 | |
| 	VMRGEW V8, V9, V27
 | |
| 	VMRGEW V10, V11, V28
 | |
| 
 | |
| 	XXPERMDI VS36, VS38, $0, VS37
 | |
| 	XXPERMDI VS36, VS38, $3, VS39
 | |
| 	XXPERMDI VS61, VS62, $0, VS36
 | |
| 	XXPERMDI VS61, VS62, $3, VS38
 | |
| 
 | |
| 	VMRGOW V8, V9, V8
 | |
| 	VMRGOW V10, V11, V10
 | |
| 
 | |
| 	VMRGEW V12, V13, V29
 | |
| 	VMRGEW V14, V15, V30
 | |
| 
 | |
| 	XXPERMDI VS40, VS42, $0, VS41
 | |
| 	XXPERMDI VS40, VS42, $3, VS43
 | |
| 	XXPERMDI VS59, VS60, $0, VS40
 | |
| 	XXPERMDI VS59, VS60, $3, VS42
 | |
| 
 | |
| 	VMRGOW V12, V13, V12
 | |
| 	VMRGOW V14, V15, V14
 | |
| 
 | |
| 	VSPLTISW $4, V27
 | |
| 	VADDUWM V26, V27, V26
 | |
| 
 | |
| 	XXPERMDI VS44, VS46, $0, VS45
 | |
| 	XXPERMDI VS44, VS46, $3, VS47
 | |
| 	XXPERMDI VS61, VS62, $0, VS44
 | |
| 	XXPERMDI VS61, VS62, $3, VS46
 | |
| 
 | |
| 	VADDUWM V0, V16, V0
 | |
| 	VADDUWM V4, V17, V4
 | |
| 	VADDUWM V8, V18, V8
 | |
| 	VADDUWM V12, V19, V12
 | |
| 
 | |
| 	CMPU LEN, $64
 | |
| 	BLT tail_vsx
 | |
| 
 | |
| 	// Bottom of loop
 | |
| 	LXVW4X (INP)(R0), VS59
 | |
| 	LXVW4X (INP)(R8), VS60
 | |
| 	LXVW4X (INP)(R9), VS61
 | |
| 	LXVW4X (INP)(R10), VS62
 | |
| 
 | |
| 	VXOR V27, V0, V27
 | |
| 	VXOR V28, V4, V28
 | |
| 	VXOR V29, V8, V29
 | |
| 	VXOR V30, V12, V30
 | |
| 
 | |
| 	STXVW4X VS59, (OUT)(R0)
 | |
| 	STXVW4X VS60, (OUT)(R8)
 | |
| 	ADD     $64, INP
 | |
| 	STXVW4X VS61, (OUT)(R9)
 | |
| 	ADD     $-64, LEN
 | |
| 	STXVW4X VS62, (OUT)(R10)
 | |
| 	ADD     $64, OUT
 | |
| 	BEQ     done_vsx
 | |
| 
 | |
| 	VADDUWM V1, V16, V0
 | |
| 	VADDUWM V5, V17, V4
 | |
| 	VADDUWM V9, V18, V8
 | |
| 	VADDUWM V13, V19, V12
 | |
| 
 | |
| 	CMPU  LEN, $64
 | |
| 	BLT   tail_vsx
 | |
| 
 | |
| 	LXVW4X (INP)(R0), VS59
 | |
| 	LXVW4X (INP)(R8), VS60
 | |
| 	LXVW4X (INP)(R9), VS61
 | |
| 	LXVW4X (INP)(R10), VS62
 | |
| 	VXOR   V27, V0, V27
 | |
| 
 | |
| 	VXOR V28, V4, V28
 | |
| 	VXOR V29, V8, V29
 | |
| 	VXOR V30, V12, V30
 | |
| 
 | |
| 	STXVW4X VS59, (OUT)(R0)
 | |
| 	STXVW4X VS60, (OUT)(R8)
 | |
| 	ADD     $64, INP
 | |
| 	STXVW4X VS61, (OUT)(R9)
 | |
| 	ADD     $-64, LEN
 | |
| 	STXVW4X VS62, (OUT)(V10)
 | |
| 	ADD     $64, OUT
 | |
| 	BEQ     done_vsx
 | |
| 
 | |
| 	VADDUWM V2, V16, V0
 | |
| 	VADDUWM V6, V17, V4
 | |
| 	VADDUWM V10, V18, V8
 | |
| 	VADDUWM V14, V19, V12
 | |
| 
 | |
| 	CMPU LEN, $64
 | |
| 	BLT  tail_vsx
 | |
| 
 | |
| 	LXVW4X (INP)(R0), VS59
 | |
| 	LXVW4X (INP)(R8), VS60
 | |
| 	LXVW4X (INP)(R9), VS61
 | |
| 	LXVW4X (INP)(R10), VS62
 | |
| 
 | |
| 	VXOR V27, V0, V27
 | |
| 	VXOR V28, V4, V28
 | |
| 	VXOR V29, V8, V29
 | |
| 	VXOR V30, V12, V30
 | |
| 
 | |
| 	STXVW4X VS59, (OUT)(R0)
 | |
| 	STXVW4X VS60, (OUT)(R8)
 | |
| 	ADD     $64, INP
 | |
| 	STXVW4X VS61, (OUT)(R9)
 | |
| 	ADD     $-64, LEN
 | |
| 	STXVW4X VS62, (OUT)(R10)
 | |
| 	ADD     $64, OUT
 | |
| 	BEQ     done_vsx
 | |
| 
 | |
| 	VADDUWM V3, V16, V0
 | |
| 	VADDUWM V7, V17, V4
 | |
| 	VADDUWM V11, V18, V8
 | |
| 	VADDUWM V15, V19, V12
 | |
| 
 | |
| 	CMPU  LEN, $64
 | |
| 	BLT   tail_vsx
 | |
| 
 | |
| 	LXVW4X (INP)(R0), VS59
 | |
| 	LXVW4X (INP)(R8), VS60
 | |
| 	LXVW4X (INP)(R9), VS61
 | |
| 	LXVW4X (INP)(R10), VS62
 | |
| 
 | |
| 	VXOR V27, V0, V27
 | |
| 	VXOR V28, V4, V28
 | |
| 	VXOR V29, V8, V29
 | |
| 	VXOR V30, V12, V30
 | |
| 
 | |
| 	STXVW4X VS59, (OUT)(R0)
 | |
| 	STXVW4X VS60, (OUT)(R8)
 | |
| 	ADD     $64, INP
 | |
| 	STXVW4X VS61, (OUT)(R9)
 | |
| 	ADD     $-64, LEN
 | |
| 	STXVW4X VS62, (OUT)(R10)
 | |
| 	ADD     $64, OUT
 | |
| 
 | |
| 	MOVD $10, R14
 | |
| 	MOVD R14, CTR
 | |
| 	BNE  loop_outer_vsx
 | |
| 
 | |
| done_vsx:
 | |
| 	// Increment counter by number of 64 byte blocks
 | |
| 	MOVD (CNT), R14
 | |
| 	ADD  BLOCKS, R14
 | |
| 	MOVD R14, (CNT)
 | |
| 	RET
 | |
| 
 | |
| tail_vsx:
 | |
| 	ADD  $32, R1, R11
 | |
| 	MOVD LEN, CTR
 | |
| 
 | |
| 	// Save values on stack to copy from
 | |
| 	STXVW4X VS32, (R11)(R0)
 | |
| 	STXVW4X VS36, (R11)(R8)
 | |
| 	STXVW4X VS40, (R11)(R9)
 | |
| 	STXVW4X VS44, (R11)(R10)
 | |
| 	ADD $-1, R11, R12
 | |
| 	ADD $-1, INP
 | |
| 	ADD $-1, OUT
 | |
| 	PCALIGN $16
 | |
| looptail_vsx:
 | |
| 	// Copying the result to OUT
 | |
| 	// in bytes.
 | |
| 	MOVBZU 1(R12), KEY
 | |
| 	MOVBZU 1(INP), TMP
 | |
| 	XOR    KEY, TMP, KEY
 | |
| 	MOVBU  KEY, 1(OUT)
 | |
| 	BDNZ   looptail_vsx
 | |
| 
 | |
| 	// Clear the stack values
 | |
| 	STXVW4X VS48, (R11)(R0)
 | |
| 	STXVW4X VS48, (R11)(R8)
 | |
| 	STXVW4X VS48, (R11)(R9)
 | |
| 	STXVW4X VS48, (R11)(R10)
 | |
| 	BR      done_vsx
 |