mirror of
https://github.com/trholding/llama2.c.git
synced 2026-02-06 11:26:53 +00:00
Update runq.c
runq - Undo #pragma omp parallel sections for matmuls for now as there is no real benefit with low number of cores
This commit is contained in:
parent
725faaa608
commit
16e223fbca
9
runq.c
9
runq.c
@ -636,18 +636,9 @@ float* forward(Transformer* transformer, int token, int pos) {
|
||||
|
||||
// qkv matmuls for this position
|
||||
quantize(&s->xq, s->xb, dim);
|
||||
|
||||
// L2E Addition
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
#pragma omp section
|
||||
matmul(s->q, &s->xq, w->wq + l, dim, dim);
|
||||
#pragma omp section
|
||||
matmul(s->k, &s->xq, w->wk + l, dim, kv_dim);
|
||||
#pragma omp section
|
||||
matmul(s->v, &s->xq, w->wv + l, dim, kv_dim);
|
||||
}
|
||||
// END L2E Addition
|
||||
|
||||
// RoPE relative positional encoding: complex-valued rotate q and k in each head
|
||||
for (int i = 0; i < dim; i+=2) {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user