Skip to content

Commit 55e913b

Browse files
diegommmantonmedv
andauthored
VM performance improvements in function calls (#832)
* optimize vm allocation of function arguments * make estimation faster by using a table * add benchmarks * avoid all allocations if no arguments are needed * simplify code and gain 2% speed * add safety limit on preallocation --------- Co-authored-by: Anton Medvedev <anton@medv.io>
1 parent d652fbe commit 55e913b

File tree

2 files changed

+189
-27
lines changed

2 files changed

+189
-27
lines changed

vm/vm.go

Lines changed: 107 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ import (
1717
"github.com/expr-lang/expr/vm/runtime"
1818
)
1919

20+
const maxFnArgsBuf = 256
21+
2022
func Run(program *Program, env any) (any, error) {
2123
if program == nil {
2224
return nil, fmt.Errorf("program is nil")
@@ -83,6 +85,8 @@ func (vm *VM) Run(program *Program, env any) (_ any, err error) {
8385
vm.memory = 0
8486
vm.ip = 0
8587

88+
var fnArgsBuf []any
89+
8690
for vm.ip < len(program.Bytecode) {
8791
if debug && vm.debug {
8892
<-vm.step
@@ -399,62 +403,53 @@ func (vm *VM) Run(program *Program, env any) (_ any, err error) {
399403
vm.push(out)
400404

401405
case OpCall1:
402-
a := vm.pop()
403-
out, err := program.functions[arg](a)
406+
var args []any
407+
args, fnArgsBuf = vm.getArgsForFunc(fnArgsBuf, program, 1)
408+
out, err := program.functions[arg](args...)
404409
if err != nil {
405410
panic(err)
406411
}
407412
vm.push(out)
408413

409414
case OpCall2:
410-
b := vm.pop()
411-
a := vm.pop()
412-
out, err := program.functions[arg](a, b)
415+
var args []any
416+
args, fnArgsBuf = vm.getArgsForFunc(fnArgsBuf, program, 2)
417+
out, err := program.functions[arg](args...)
413418
if err != nil {
414419
panic(err)
415420
}
416421
vm.push(out)
417422

418423
case OpCall3:
419-
c := vm.pop()
420-
b := vm.pop()
421-
a := vm.pop()
422-
out, err := program.functions[arg](a, b, c)
424+
var args []any
425+
args, fnArgsBuf = vm.getArgsForFunc(fnArgsBuf, program, 3)
426+
out, err := program.functions[arg](args...)
423427
if err != nil {
424428
panic(err)
425429
}
426430
vm.push(out)
427431

428432
case OpCallN:
429433
fn := vm.pop().(Function)
430-
size := arg
431-
in := make([]any, size)
432-
for i := int(size) - 1; i >= 0; i-- {
433-
in[i] = vm.pop()
434-
}
435-
out, err := fn(in...)
434+
var args []any
435+
args, fnArgsBuf = vm.getArgsForFunc(fnArgsBuf, program, arg)
436+
out, err := fn(args...)
436437
if err != nil {
437438
panic(err)
438439
}
439440
vm.push(out)
440441

441442
case OpCallFast:
442443
fn := vm.pop().(func(...any) any)
443-
size := arg
444-
in := make([]any, size)
445-
for i := int(size) - 1; i >= 0; i-- {
446-
in[i] = vm.pop()
447-
}
448-
vm.push(fn(in...))
444+
var args []any
445+
args, fnArgsBuf = vm.getArgsForFunc(fnArgsBuf, program, arg)
446+
vm.push(fn(args...))
449447

450448
case OpCallSafe:
451449
fn := vm.pop().(SafeFunction)
452-
size := arg
453-
in := make([]any, size)
454-
for i := int(size) - 1; i >= 0; i-- {
455-
in[i] = vm.pop()
456-
}
457-
out, mem, err := fn(in...)
450+
var args []any
451+
args, fnArgsBuf = vm.getArgsForFunc(fnArgsBuf, program, arg)
452+
out, mem, err := fn(args...)
458453
if err != nil {
459454
panic(err)
460455
}
@@ -661,6 +656,64 @@ func (vm *VM) scope() *Scope {
661656
return vm.Scopes[len(vm.Scopes)-1]
662657
}
663658

659+
// getArgsForFunc lazily initializes the buffer the first time it is called for
660+
// a given program (thus, it also needs "program" to run). It will
661+
// take "needed" elements from the buffer and populate them with vm.pop() in
662+
// reverse order. Because the estimation can fall short, this function can
663+
// occasionally make a new allocation.
664+
func (vm *VM) getArgsForFunc(argsBuf []any, program *Program, needed int) (args []any, argsBufOut []any) {
665+
if needed == 0 || program == nil {
666+
return nil, argsBuf
667+
}
668+
669+
// Step 1: fix estimations and preallocate
670+
if argsBuf == nil {
671+
estimatedFnArgsCount := estimateFnArgsCount(program)
672+
if estimatedFnArgsCount > maxFnArgsBuf {
673+
// put a practical limit to avoid excessive preallocation
674+
estimatedFnArgsCount = maxFnArgsBuf
675+
}
676+
if estimatedFnArgsCount < needed {
677+
// in the case that the first call is for example OpCallN with a large
678+
// number of arguments, then make sure we will be able to serve them at
679+
// least.
680+
estimatedFnArgsCount = needed
681+
}
682+
683+
// in the case that we are preparing the arguments for the first
684+
// function call of the program, then argsBuf will be nil, so we
685+
// initialize it. We delay this initial allocation here because a
686+
// program could have many function calls but exit earlier than the
687+
// first call, so in that case we avoid allocating unnecessarily
688+
argsBuf = make([]any, estimatedFnArgsCount)
689+
}
690+
691+
// Step 2: get the final slice that will be returned
692+
var buf []any
693+
if len(argsBuf) >= needed {
694+
// in this case, we are successfully using the single preallocation. We
695+
// use the full slice expression [low : high : max] because in that way
696+
// a function that receives this slice as variadic arguments will not be
697+
// able to make modifications to contiguous elements with append(). If
698+
// they call append on their variadic arguments they will make a new
699+
// allocation.
700+
buf = (argsBuf)[:needed:needed]
701+
argsBuf = (argsBuf)[needed:] // advance the buffer
702+
} else {
703+
// if we have been making calls to something like OpCallN with many more
704+
// arguments than what we estimated, then we will need to allocate
705+
// separately
706+
buf = make([]any, needed)
707+
}
708+
709+
// Step 3: populate the final slice bulk copying from the stack. This is the
710+
// exact order and copy() is a highly optimized operation
711+
copy(buf, vm.Stack[len(vm.Stack)-needed:])
712+
vm.Stack = vm.Stack[:len(vm.Stack)-needed]
713+
714+
return buf, argsBuf
715+
}
716+
664717
func (vm *VM) Step() {
665718
vm.step <- struct{}{}
666719
}
@@ -675,3 +728,30 @@ func clearSlice[S ~[]E, E any](s S) {
675728
s[i] = zero // clear mem, optimized by the compiler, in Go 1.21 the "clear" builtin can be used
676729
}
677730
}
731+
732+
// estimateFnArgsCount inspects a *Program and estimates how many function
733+
// arguments will be required to run it.
734+
func estimateFnArgsCount(program *Program) int {
735+
// Implementation note: a program will not necessarily go through all
736+
// operations, but this is just an estimation
737+
var count int
738+
for _, op := range program.Bytecode {
739+
if int(op) < len(opArgLenEstimation) {
740+
count += opArgLenEstimation[op]
741+
}
742+
}
743+
return count
744+
}
745+
746+
var opArgLenEstimation = [...]int{
747+
OpCall1: 1,
748+
OpCall2: 2,
749+
OpCall3: 3,
750+
// we don't know exactly but we know at least 4, so be conservative as this
751+
// is only an optimization and we also want to avoid excessive preallocation
752+
OpCallN: 4,
753+
// here we don't know either, but we can guess it could be common to receive
754+
// up to 3 arguments in a function
755+
OpCallFast: 3,
756+
OpCallSafe: 3,
757+
}

vm/vm_bench_test.go

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
package vm_test
2+
3+
import (
4+
"runtime"
5+
"testing"
6+
7+
"github.com/expr-lang/expr"
8+
"github.com/expr-lang/expr/checker"
9+
"github.com/expr-lang/expr/compiler"
10+
"github.com/expr-lang/expr/conf"
11+
"github.com/expr-lang/expr/vm"
12+
)
13+
14+
func BenchmarkVM(b *testing.B) {
15+
cases := []struct {
16+
name, input string
17+
}{
18+
{"function calls", `
19+
func(
20+
func(
21+
func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)),
22+
func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)),
23+
func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)),
24+
),
25+
func(
26+
func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)),
27+
func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)),
28+
func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)),
29+
),
30+
func(
31+
func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)),
32+
func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)),
33+
func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)),
34+
)
35+
)
36+
`},
37+
}
38+
39+
a := new(recursive)
40+
for i, b := 0, a; i < 40*4; i++ {
41+
b.Inner = new(recursive)
42+
b = b.Inner
43+
}
44+
45+
f := func(params ...any) (any, error) { return nil, nil }
46+
env := map[string]any{
47+
"a": a,
48+
"b": true,
49+
"func": f,
50+
}
51+
config := conf.New(env)
52+
expr.Function("func", f, f)(config)
53+
config.Check()
54+
55+
for _, c := range cases {
56+
tree, err := checker.ParseCheck(c.input, config)
57+
if err != nil {
58+
b.Fatal(c.input, "parse and check", err)
59+
}
60+
prog, err := compiler.Compile(tree, config)
61+
if err != nil {
62+
b.Fatal(c.input, "compile", err)
63+
}
64+
//b.Logf("disassembled:\n%s", prog.Disassemble())
65+
//b.FailNow()
66+
runtime.GC()
67+
68+
var vm vm.VM
69+
b.Run("name="+c.name, func(b *testing.B) {
70+
for i := 0; i < b.N; i++ {
71+
_, err = vm.Run(prog, env)
72+
}
73+
})
74+
if err != nil {
75+
b.Fatal(err)
76+
}
77+
}
78+
}
79+
80+
type recursive struct {
81+
Inner *recursive `expr:"a"`
82+
}

0 commit comments

Comments
 (0)