dcrd/txscript/script.go
Dave Collins f93c1de13f
txscript: Implement efficient opcode data removal.
This introduces a new function named removeOpcodeByDataRaw which accepts
the raw scripts and data to remove versus requiring the parsed opcodes
to both significantly optimize it as well as make it more flexible for
working with raw scripts.

There are several places in the rest of the code that currently only
have access to the parsed opcodes, so this only introduces the function
for use in the future and deprecates the existing one.

Note that, in practice, the script will never actually contain the data
that is intended to be removed since the function is only used during
signature verification to remove the signature itself which would
require some incredibly non-standard code to create.

Thus, as an optimization, it avoids allocating a new script unless there
is actually a match that needs to be removed.

Finally, it updates the tests to use the new function.
2019-03-26 14:55:35 -05:00

561 lines
19 KiB
Go

// Copyright (c) 2013-2017 The btcsuite developers
// Copyright (c) 2015-2019 The Decred developers
// Use of this source code is governed by an ISC
// license that can be found in the LICENSE file.
package txscript
import (
"bytes"
"fmt"
"strings"
)
// These are the constants specified for maximums in individual scripts.
const (
MaxOpsPerScript = 255 // Max number of non-push operations.
MaxPubKeysPerMultiSig = 20 // Multisig can't have more sigs than this.
MaxScriptElementSize = 2048 // Max bytes pushable to the stack.
)
// isSmallInt returns whether or not the opcode is considered a small integer,
// which is an OP_0, or OP_1 through OP_16.
//
// NOTE: This function is only valid for version 0 opcodes. Since the function
// does not accept a script version, the results are undefined for other script
// versions.
func isSmallInt(op byte) bool {
return op == OP_0 || (op >= OP_1 && op <= OP_16)
}
// IsPayToScriptHash returns true if the script is in the standard
// pay-to-script-hash (P2SH) format, false otherwise.
//
// WARNING: This function always treats the passed script as version 0. Great
// care must be taken if introducing a new script version because it is used in
// consensus which, unfortunately as of the time of this writing, does not check
// script versions before determining if the script is a P2SH which means nodes
// on existing rules will analyze new version scripts as if they were version 0.
func IsPayToScriptHash(script []byte) bool {
return isScriptHashScript(script)
}
// IsPushOnlyScript returns whether or not the passed script only pushes data
// according to the consensus definition of pushing data.
//
// WARNING: This function always treats the passed script as version 0. Great
// care must be taken if introducing a new script version because it is used in
// consensus which, unfortunately as of the time of this writing, does not check
// script versions before checking if it is a push only script which means nodes
// on existing rules will treat new version scripts as if they were version 0.
func IsPushOnlyScript(script []byte) bool {
const scriptVersion = 0
tokenizer := MakeScriptTokenizer(scriptVersion, script)
for tokenizer.Next() {
// All opcodes up to OP_16 are data push instructions.
// NOTE: This does consider OP_RESERVED to be a data push instruction,
// but execution of OP_RESERVED will fail anyway and matches the
// behavior required by consensus.
if tokenizer.Opcode() > OP_16 {
return false
}
}
return tokenizer.Err() == nil
}
// isStakeOpcode returns whether or not the opcode is one of the stake tagging
// opcodes.
func isStakeOpcode(op byte) bool {
return op >= OP_SSTX && op <= OP_SSTXCHANGE
}
// extractScriptHash extracts the script hash from the passed script if it is a
// standard pay-to-script-hash script. It will return nil otherwise.
//
// NOTE: This function is only valid for version 0 opcodes. Since the function
// does not accept a script version, the results are undefined for other script
// versions.
func extractScriptHash(script []byte) []byte {
// A pay-to-script-hash script is of the form:
// OP_HASH160 <20-byte scripthash> OP_EQUAL
if len(script) == 23 &&
script[0] == OP_HASH160 &&
script[1] == OP_DATA_20 &&
script[22] == OP_EQUAL {
return script[2:22]
}
return nil
}
// isScriptHashScript returns whether or not the passed script is a standard
// pay-to-script-hash script.
func isScriptHashScript(script []byte) bool {
return extractScriptHash(script) != nil
}
// isStakeScriptHashScript returns whether or not the passed script is a
// stake-tagged pay-to-script-hash script.
func isStakeScriptHashScript(script []byte) bool {
return len(script) == 24 &&
isStakeOpcode(script[0]) &&
script[1] == OP_HASH160 &&
script[2] == OP_DATA_20 &&
script[23] == OP_EQUAL
}
// isAnyKindOfScriptHash returns whether or not the passed script is either a
// regular pay-to-script-hash script or a stake-tagged pay-to-script-hash
// script.
func isAnyKindOfScriptHash(script []byte) bool {
return isScriptHashScript(script) || isStakeScriptHashScript(script)
}
// HasP2SHScriptSigStakeOpCodes returns an error is the p2sh script has either
// stake opcodes or if the pkscript cannot be retrieved.
//
// DEPRECATED. This will be removed in the next major version bump.
func HasP2SHScriptSigStakeOpCodes(version uint16, scriptSig, scriptPubKey []byte) error {
class := GetScriptClass(version, scriptPubKey)
if IsStakeOutput(scriptPubKey) {
class, _ = GetStakeOutSubclass(scriptPubKey)
}
if class == ScriptHashTy {
// Obtain the embedded pkScript from the scriptSig of the
// current transaction. Then, ensure that it does not use
// any stake tagging OP codes.
pData, err := PushedData(scriptSig)
if err != nil {
return err
}
if len(pData) == 0 {
str := "script has no pushed data"
return scriptError(ErrNotPushOnly, str)
}
// The pay-to-hash-script is the final data push of the
// signature script.
shScript := pData[len(pData)-1]
hasStakeOpCodes, err := ContainsStakeOpCodes(shScript)
if err != nil {
return err
}
if hasStakeOpCodes {
str := "stake opcodes were found in a p2sh script"
return scriptError(ErrP2SHStakeOpCodes, str)
}
}
return nil
}
// parseScriptTemplate is the same as parseScript but allows the passing of the
// template list for testing purposes. When there are parse errors, it returns
// the list of parsed opcodes up to the point of failure along with the error.
func parseScriptTemplate(script []byte, opcodes *[256]opcode) ([]parsedOpcode, error) {
retScript := make([]parsedOpcode, 0, len(script))
for i := 0; i < len(script); {
instr := script[i]
op := &opcodes[instr]
pop := parsedOpcode{opcode: op}
// Parse data out of instruction.
switch {
// No additional data. Note that some of the opcodes, notably
// OP_1NEGATE, OP_0, and OP_[1-16] represent the data
// themselves.
case op.length == 1:
i++
// Data pushes of specific lengths -- OP_DATA_[1-75].
case op.length > 1:
if len(script[i:]) < op.length {
str := fmt.Sprintf("opcode %s requires %d "+
"bytes, but script only has %d remaining",
op.name, op.length, len(script[i:]))
return retScript, scriptError(ErrMalformedPush,
str)
}
// Slice out the data.
pop.data = script[i+1 : i+op.length]
i += op.length
// Data pushes with parsed lengths -- OP_PUSHDATAP{1,2,4}.
case op.length < 0:
var l uint
off := i + 1
if len(script[off:]) < -op.length {
str := fmt.Sprintf("opcode %s requires %d "+
"bytes, but script only has %d remaining",
op.name, -op.length, len(script[off:]))
return retScript, scriptError(ErrMalformedPush,
str)
}
// Next -length bytes are little endian length of data.
switch op.length {
case -1:
l = uint(script[off])
case -2:
l = ((uint(script[off+1]) << 8) |
uint(script[off]))
case -4:
l = ((uint(script[off+3]) << 24) |
(uint(script[off+2]) << 16) |
(uint(script[off+1]) << 8) |
uint(script[off]))
default:
str := fmt.Sprintf("invalid opcode length %d",
op.length)
return retScript, scriptError(ErrMalformedPush,
str)
}
// Move offset to beginning of the data.
off += -op.length
// Disallow entries that do not fit script or were
// sign extended.
if int(l) > len(script[off:]) || int(l) < 0 {
str := fmt.Sprintf("opcode %s pushes %d bytes, "+
"but script only has %d remaining",
op.name, int(l), len(script[off:]))
return retScript, scriptError(ErrMalformedPush,
str)
}
pop.data = script[off : off+int(l)]
i += 1 - op.length + int(l)
}
retScript = append(retScript, pop)
}
return retScript, nil
}
// parseScript preparses the script in bytes into a list of parsedOpcodes while
// applying a number of sanity checks.
func parseScript(script []byte) ([]parsedOpcode, error) {
return parseScriptTemplate(script, &opcodeArray)
}
// unparseScript reversed the action of parseScript and returns the
// parsedOpcodes as a list of bytes
func unparseScript(pops []parsedOpcode) ([]byte, error) {
script := make([]byte, 0, len(pops))
for _, pop := range pops {
b, err := pop.bytes()
if err != nil {
return nil, err
}
script = append(script, b...)
}
return script, nil
}
// DisasmString formats a disassembled script for one line printing. When the
// script fails to parse, the returned string will contain the disassembled
// script up to the point the failure occurred along with the string '[error]'
// appended. In addition, the reason the script failed to parse is returned
// if the caller wants more information about the failure.
//
// NOTE: This function is only valid for version 0 scripts. Since the function
// does not accept a script version, the results are undefined for other script
// versions.
func DisasmString(script []byte) (string, error) {
const scriptVersion = 0
var disbuf strings.Builder
tokenizer := MakeScriptTokenizer(scriptVersion, script)
if tokenizer.Next() {
disasmOpcode(&disbuf, tokenizer.op, tokenizer.Data(), true)
}
for tokenizer.Next() {
disbuf.WriteByte(' ')
disasmOpcode(&disbuf, tokenizer.op, tokenizer.Data(), true)
}
if tokenizer.Err() != nil {
if tokenizer.ByteIndex() != 0 {
disbuf.WriteByte(' ')
}
disbuf.WriteString("[error]")
}
return disbuf.String(), tokenizer.Err()
}
// isCanonicalPush returns true if the opcode is either not a push instruction
// or the data associated with the push instruction uses the smallest
// instruction to do the job. False otherwise.
//
// For example, it is possible to push a value of 1 to the stack as "OP_1",
// "OP_DATA_1 0x01", "OP_PUSHDATA1 0x01 0x01", and others, however, the first
// only takes a single byte, while the rest take more. Only the first is
// considered canonical.
func isCanonicalPush(opcode byte, data []byte) bool {
dataLen := len(data)
if opcode > OP_16 {
return true
}
if opcode < OP_PUSHDATA1 && opcode > OP_0 && (dataLen == 1 && data[0] <= 16) {
return false
}
if opcode == OP_PUSHDATA1 && dataLen < OP_PUSHDATA1 {
return false
}
if opcode == OP_PUSHDATA2 && dataLen <= 0xff {
return false
}
if opcode == OP_PUSHDATA4 && dataLen <= 0xffff {
return false
}
return true
}
// removeOpcodeByData will return the script minus any opcodes that would push
// the passed data to the stack.
//
// DEPRECATED. Use removeOpcodeByDataRaw instead.
func removeOpcodeByData(pkscript []parsedOpcode, data []byte) []parsedOpcode {
retScript := make([]parsedOpcode, 0, len(pkscript))
for _, pop := range pkscript {
if !isCanonicalPush(pop.opcode.value, pop.data) ||
!bytes.Contains(pop.data, data) {
retScript = append(retScript, pop)
}
}
return retScript
}
// removeOpcodeByDataRaw will return the script minus any opcodes that perform a
// canonical push of data that contains the passed data to remove. This
// function assumes it is provided a version 0 script as any future version of
// script should avoid this functionality since it is unncessary due to the
// signature scripts not being part of the witness-free transaction hash.
//
// WARNING: This will return the passed script unmodified unless a modification
// is necessary in which case the modified script is returned. This implies
// callers may NOT rely on being able to safely mutate either the passed or
// returned script without potentially modifying the same data.
//
// NOTE: This function is only valid for version 0 scripts. Since the function
// does not accept a script version, the results are undefined for other script
// versions.
func removeOpcodeByDataRaw(script []byte, dataToRemove []byte) []byte {
// Avoid work when possible.
if len(script) == 0 || len(dataToRemove) == 0 {
return script
}
// Parse through the script looking for a canonical data push that contains
// the data to remove.
const scriptVersion = 0
var result []byte
var prevOffset int32
tokenizer := MakeScriptTokenizer(scriptVersion, script)
for tokenizer.Next() {
// In practice, the script will basically never actually contain the
// data since this function is only used during signature verification
// to remove the signature itself which would require some incredibly
// non-standard code to create.
//
// Thus, as an optimization, avoid allocating a new script unless there
// is actually a match that needs to be removed.
op, data := tokenizer.Opcode(), tokenizer.Data()
if isCanonicalPush(op, data) && bytes.Contains(data, dataToRemove) {
if result == nil {
fullPushLen := tokenizer.ByteIndex() - prevOffset
result = make([]byte, 0, int32(len(script))-fullPushLen)
result = append(result, script[0:prevOffset]...)
}
} else if result != nil {
result = append(result, script[prevOffset:tokenizer.ByteIndex()]...)
}
prevOffset = tokenizer.ByteIndex()
}
if result == nil {
result = script
}
return result
}
// asSmallInt returns the passed opcode, which must be true according to
// isSmallInt(), as an integer.
func asSmallInt(op byte) int {
if op == OP_0 {
return 0
}
return int(op - (OP_1 - 1))
}
// countSigOpsV0 returns the number of signature operations in the provided
// script up to the point of the first parse failure or the entire script when
// there are no parse failures. The precise flag attempts to accurately count
// the number of operations for a multisig operation versus using the maximum
// allowed.
//
// WARNING: This function always treats the passed script as version 0. Great
// care must be taken if introducing a new script version because it is used in
// consensus which, unfortunately as of the time of this writing, does not check
// script versions before counting their signature operations which means nodes
// on existing rules will count new version scripts as if they were version 0.
func countSigOpsV0(script []byte, precise bool) int {
const scriptVersion = 0
numSigOps := 0
tokenizer := MakeScriptTokenizer(scriptVersion, script)
prevOp := byte(OP_INVALIDOPCODE)
for tokenizer.Next() {
switch tokenizer.Opcode() {
case OP_CHECKSIG, OP_CHECKSIGVERIFY, OP_CHECKSIGALT,
OP_CHECKSIGALTVERIFY:
numSigOps++
case OP_CHECKMULTISIG, OP_CHECKMULTISIGVERIFY:
// Note that OP_0 is treated as the max number of sigops here in
// precise mode despite it being a valid small integer in order to
// highly discourage multisigs with zero pubkeys.
//
// Also, even though this is referred to as "precise" counting, it's
// not really precise at all due to the small int opcodes only
// covering 1 through 16 pubkeys, which means this will count any
// more than that value (e.g. 17, 18 19) as the maximum number of
// allowed pubkeys. This was inherited from bitcoin and is,
// unfortunately, now part of the consensus rules. This could be
// made more correct with a new script version, however, ideally all
// multisignaure operations in new script versions should move to
// aggregated schemes such as Schnorr instead.
if precise && prevOp >= OP_1 && prevOp <= OP_16 {
numSigOps += asSmallInt(prevOp)
} else {
numSigOps += MaxPubKeysPerMultiSig
}
default:
// Not a sigop.
}
prevOp = tokenizer.Opcode()
}
return numSigOps
}
// GetSigOpCount provides a quick count of the number of signature operations
// in a script. a CHECKSIG operations counts for 1, and a CHECK_MULTISIG for 20.
// If the script fails to parse, then the count up to the point of failure is
// returned.
//
// WARNING: This function always treats the passed script as version 0. Great
// care must be taken if introducing a new script version because it is used in
// consensus which, unfortunately as of the time of this writing, does not check
// script versions before counting their signature operations which means nodes
// on existing rules will count new version scripts as if they were version 0.
func GetSigOpCount(script []byte) int {
return countSigOpsV0(script, false)
}
// finalOpcodeData returns the data associated with the final opcode in the
// script. It will return nil if the script fails to parse.
func finalOpcodeData(scriptVersion uint16, script []byte) []byte {
// Avoid unnecessary work.
if len(script) == 0 {
return nil
}
var data []byte
tokenizer := MakeScriptTokenizer(scriptVersion, script)
for tokenizer.Next() {
data = tokenizer.Data()
}
if tokenizer.Err() != nil {
return nil
}
return data
}
// GetPreciseSigOpCount returns the number of signature operations in
// scriptPubKey. If bip16 is true then scriptSig may be searched for the
// Pay-To-Script-Hash script in order to find the precise number of signature
// operations in the transaction. If the script fails to parse, then the count
// up to the point of failure is returned.
//
// WARNING: This function always treats the passed script as version 0. Great
// care must be taken if introducing a new script version because it is used in
// consensus which, unfortunately as of the time of this writing, does not check
// script versions before counting their signature operations which means nodes
// on existing rules will count new version scripts as if they were version 0.
//
// The third parameter is DEPRECATED and is unused.
func GetPreciseSigOpCount(scriptSig, scriptPubKey []byte, _ bool) int {
const scriptVersion = 0
// Treat non P2SH transactions as normal. Note that signature operation
// counting includes all operations up to the first parse failure.
if !isScriptHashScript(scriptPubKey) {
return countSigOpsV0(scriptPubKey, true)
}
// The signature script must only push data to the stack for P2SH to be
// a valid pair, so the signature operation count is 0 when that is not
// the case.
if len(scriptSig) == 0 || !IsPushOnlyScript(scriptSig) {
return 0
}
// The P2SH script is the last item the signature script pushes to the
// stack. When the script is empty, there are no signature operations.
//
// Notice that signature scripts that fail to fully parse count as 0
// signature operations unlike public key and redeem scripts.
redeemScript := finalOpcodeData(scriptVersion, scriptSig)
if len(redeemScript) == 0 {
return 0
}
// Return the more precise sigops count for the redeem script. Note that
// signature operation counting includes all operations up to the first
// parse failure.
return countSigOpsV0(redeemScript, true)
}
// checkScriptParses returns an error if the provided script fails to parse.
func checkScriptParses(scriptVersion uint16, script []byte) error {
tokenizer := MakeScriptTokenizer(scriptVersion, script)
for tokenizer.Next() {
// Nothing to do.
}
return tokenizer.Err()
}
// IsUnspendable returns whether the passed public key script is unspendable, or
// guaranteed to fail at execution. This allows inputs to be pruned instantly
// when entering the UTXO set. In Decred, all zero value outputs are unspendable.
//
// NOTE: This function is only valid for version 0 scripts. Since the function
// does not accept a script version, the results are undefined for other script
// versions.
func IsUnspendable(amount int64, pkScript []byte) bool {
// The script is unspendable if starts with OP_RETURN or is guaranteed to
// fail at execution due to being larger than the max allowed script size.
if amount == 0 || len(pkScript) > MaxScriptSize || len(pkScript) > 0 &&
pkScript[0] == OP_RETURN {
return true
}
// The script is unspendable if it is guaranteed to fail at execution.
const scriptVersion = 0
return checkScriptParses(scriptVersion, pkScript) != nil
}