mirror of
https://github.com/trholding/llama2.c.git
synced 2026-02-06 11:26:53 +00:00
203 lines
6.7 KiB
Makefile
203 lines
6.7 KiB
Makefile
# Libraries
|
|
# BLIS
|
|
BLIS_PREFIX = /usr/local
|
|
BLIS_INC = $(BLIS_PREFIX)/include/blis
|
|
BLIS_LIB = $(BLIS_PREFIX)/lib/libblis.a
|
|
|
|
# Model / Tokenizer Paths
|
|
MOD_PATH = out/model.bin
|
|
TOK_PATH = tokenizer.bin
|
|
|
|
# -L${MKLROOT}/lib/intel64 -lmkl_rt -Wl,--no-as-needed -lpthread -lm -ldl
|
|
# -m64 -I"${MKLROOT}/include"
|
|
|
|
|
|
# choose your compiler, e.g. gcc/clang
|
|
# example override to clang: make run CC=clang
|
|
|
|
CC = gcc
|
|
|
|
# the most basic way of building that is most likely to work on most systems
|
|
.PHONY: run
|
|
run: run.c
|
|
$(CC) -O3 -o run run.c -lm
|
|
|
|
# useful for a debug build, can then e.g. analyze with valgrind, example:
|
|
# $ valgrind --leak-check=full ./run out/model.bin -n 3
|
|
rundebug: run.c
|
|
$(CC) -g -o run run.c -lm
|
|
|
|
# https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
|
|
# https://simonbyrne.github.io/notes/fastmath/
|
|
# -Ofast enables all -O3 optimizations.
|
|
# Disregards strict standards compliance.
|
|
# It also enables optimizations that are not valid for all standard-compliant programs.
|
|
# It turns on -ffast-math, -fallow-store-data-races and the Fortran-specific
|
|
# -fstack-arrays, unless -fmax-stack-var-size is specified, and -fno-protect-parens.
|
|
# It turns off -fsemantic-interposition.
|
|
# In our specific application this is *probably* okay to use
|
|
.PHONY: runfast
|
|
runfast: run.c
|
|
$(CC) -Ofast -o run run.c -lm
|
|
|
|
# additionally compiles with OpenMP, allowing multithreaded runs
|
|
# make sure to also enable multiple threads when running, e.g.:
|
|
# OMP_NUM_THREADS=4 ./run out/model.bin
|
|
.PHONY: run_cc_openmp
|
|
run_cc_openmp: run.c
|
|
$(CC) -D OPENMP -Ofast -fopenmp -foffload-options="-Ofast -lm" -march=native run.c -lm -o run
|
|
|
|
.PHONY: run_cc_openacc
|
|
run_cc_openacc: run.c
|
|
$(CC) -D OPENACC -Ofast -fopenacc -foffload-options="-Ofast -lm" -march=native run.c -lm -o run
|
|
|
|
.PHONY: win64
|
|
win64: run.c
|
|
x86_64-w64-mingw32-gcc -Ofast -D_WIN32 -o run.exe -I. run.c win.c
|
|
|
|
# compiles with gnu99 standard flags for amazon linux, coreos, etc. compatibility
|
|
.PHONY: rungnu
|
|
rungnu:
|
|
$(CC) -Ofast -std=gnu11 -o run run.c -lm
|
|
|
|
.PHONY: runompgnu
|
|
runompgnu:
|
|
$(CC) -Ofast -fopenmp -std=gnu11 run.c -lm -o run
|
|
|
|
.PHONY: run_cc_clblast
|
|
run_cc_clblast: run.c
|
|
$(CC) -D CLBLAST -Ofast -fopenmp -march=native run.c -lm -lclblast -o run
|
|
|
|
.PHONY: run_cc_openblas
|
|
run_cc_openblas: run.c
|
|
$(CC) -D OPENBLAS -Ofast -fopenmp -march=native run.c -lm -lopenblas -o run
|
|
|
|
.PHONY: run_cc_cblas
|
|
run_cc_cblas: run.c
|
|
$(CC) -D CBLAS -Ofast -fopenmp -march=native run.c -lm -lcblas -o run
|
|
|
|
.PHONY: run_cc_blis
|
|
run_cc_blis: run.c
|
|
$(CC) -D BLIS -Ofast -fopenmp -march=native -I$(BLIS_INC) run.c -lm -lblis -o run
|
|
|
|
.PHONY: run_cc_armpl
|
|
run_cc_armpl: run.c
|
|
$(CC) -D ARMPL -Ofast -fopenmp -march=native run.c -lm -larmpl_lp64_mp -o run
|
|
|
|
# amd64 (x86_64) / intel mac (WIP) Do not use!
|
|
.PHONY: run_cc_mkl
|
|
run_cc_mkl: run.c
|
|
$(CC) -D MKL -Ofast -fopenmp -march=native run.c -lm -lblis -o run
|
|
|
|
.PHONY: run_cc_mac_accel
|
|
runaccel: run.c
|
|
$(CC) -D AAF -Ofast -fopenmp -march=native run.c -lm -framework Accelerate -o run
|
|
|
|
|
|
# Cosmocc + embedded model & tokenizer
|
|
|
|
.PHONY: run_cosmocc_zipos
|
|
run_cosmocc_zipos: run.c
|
|
cosmocc -Ofast -D COSMO_BLINK -D COSMO_METAL -D COSMO_ZIP run.c -lm -o run.com
|
|
zip run.com $(MOD_PATH)
|
|
zip run.com $(TOK_PATH)
|
|
|
|
.PHONY: run_cosmocc_incbin
|
|
run_cosmocc_incbin:
|
|
cosmocc -Ofast -D COSMO_BLINK -D COSMO_METAL -D INC_BIN -D MODPATH=$(MOD_PATH) -D TOKPATH=$(TOK_PATH) -D LLOOP run.c -lm -o run.com
|
|
|
|
.PHONY: run_cosmocc_strlit
|
|
run_cosmocc_strlit: run.c
|
|
# Uses https://github.com/mortie/strliteral to embed files
|
|
gcc -Ofast strliteral.c -o strlit
|
|
./strlit -i emb_Model_data $(MOD_PATH) model.h
|
|
./strlit -i emb_Tokenizer_data $(TOK_PATH) tokenizer.h
|
|
cosmocc -Ofast -D COSMO_BLINK -D COSMO_METAL -D STRLIT -D LLOOP run.c -lm -o run.com
|
|
|
|
|
|
# GCC OpenMP + embedded model & tokenizer
|
|
|
|
.PHONY: run_gcc_openmp_incbin
|
|
run_gcc_openmp_incbin: run.c
|
|
gcc -D OPENMP -Ofast -fopenmp -foffload-options="-Ofast -lm" -march=native -D INC_BIN -D MODPATH=$(MOD_PATH) -D TOKPATH=$(TOK_PATH) -D LLOOP run.c -lm -o run
|
|
|
|
.PHONY: run_gcc_openmp_strlit
|
|
run_gcc_openmp_strlit: run.c
|
|
# Uses https://github.com/mortie/strliteral to embed files
|
|
gcc -Ofast strliteral.c -o strlit
|
|
./strlit -i emb_Model_data $(MOD_PATH) model.h
|
|
./strlit -i emb_Tokenizer_data $(TOK_PATH) tokenizer.h
|
|
gcc -D OPENMP -Ofast -fopenmp -foffload-options="-Ofast -lm" -march=native -D STRLIT -D LLOOP run.c -lm -o run
|
|
|
|
|
|
# Clang OpenMP + embedded model & tokenizer
|
|
|
|
.PHONY: run_clang_openmp_incbin
|
|
run_clang_openmp_incbin: run.c
|
|
clang -D OPENMP -Ofast -fopenmp -march=native -D INC_BIN -D MODPATH=$(MOD_PATH) -D TOKPATH=$(TOK_PATH) -D LLOOP run.c -lm -o run
|
|
|
|
.PHONY: run_clang_openmp_strlit
|
|
run_clang_openmp_strlit: run.c
|
|
# Uses https://github.com/mortie/strliteral to embed files
|
|
clang -Ofast strliteral.c -o strlit
|
|
./strlit -i emb_Model_data $(MOD_PATH) model.h
|
|
./strlit -i emb_Tokenizer_data $(TOK_PATH) tokenizer.h
|
|
clang -D OPENMP -Ofast -fopenmp -march=native -D STRLIT -D LLOOP run.c -lm -o run
|
|
|
|
# GCC static + embedded model & tokenizer
|
|
|
|
.PHONY: run_gcc_static_incbin
|
|
run_gcc_static_incbin: run.c
|
|
gcc -Ofast -static -march=native -D INC_BIN -D MODPATH=$(MOD_PATH) -D TOKPATH=$(TOK_PATH) -D LLOOP run.c -lm -o run
|
|
|
|
.PHONY: run_gcc_static_strlit
|
|
run_gcc_static_strlit: run.c
|
|
# Uses https://github.com/mortie/strliteral to embed files
|
|
gcc -Ofast strliteral.c -o strlit
|
|
./strlit -i emb_Model_data $(MOD_PATH) model.h
|
|
./strlit -i emb_Tokenizer_data $(TOK_PATH) tokenizer.h
|
|
gcc -Ofast -static -march=native -D STRLIT -D LLOOP run.c -lm -o run
|
|
|
|
# Clang static + embedded model & tokenizer
|
|
.PHONY: run_clang_static_incbin
|
|
run_clang_static_incbin: run.c
|
|
clang -Ofast -static -march=native -D INC_BIN -D MODPATH=$(MOD_PATH) -D TOKPATH=$(TOK_PATH) -D LLOOP run.c -lm -o run
|
|
|
|
.PHONY: run_clang_static_strlit
|
|
run_clang_static_strlit: run.c
|
|
# Uses https://github.com/mortie/strliteral to embed files
|
|
clang -Ofast strliteral.c -o strlit
|
|
./strlit -i emb_Model_data $(MOD_PATH) model.h
|
|
./strlit -i emb_Tokenizer_data $(TOK_PATH) tokenizer.h
|
|
clang -Ofast -static -march=native -D STRLIT -D LLOOP run.c -lm -o run
|
|
|
|
# Unikraft Unikernel build
|
|
.PHONY: run_unik_qemu_x86_64
|
|
run_unik_qemu_x86_64: run.c
|
|
[ ! -d "UNIK" ] && echo "Cloning unikraft and musl sources..."
|
|
[ ! -d "UNIK/unikraft" ] && git clone https://github.com/unikraft/unikraft UNIK/unikraft
|
|
[ ! -d "UNIK/libs/musl" ] && git clone https://github.com/unikraft/lib-musl UNIK/libs/musl
|
|
make -f Makefile.unikernel
|
|
|
|
# run all tests
|
|
.PHONY: test
|
|
test:
|
|
pytest
|
|
|
|
# run only tests for run.c C implementation (is a bit faster if only C code changed)
|
|
.PHONY: testc
|
|
testc:
|
|
pytest -k runc
|
|
|
|
.PHONY: clean
|
|
clean:
|
|
rm -f run run.com model.h tokenizer.h strlit run.com.dbg *~
|
|
make -f Makefile.unikernel clean
|
|
|
|
.PHONY: distclean
|
|
distclean:
|
|
rm -f run run.com model.h tokenizer.h strlit run.com.dbg *~
|
|
make -f Makefile.unikernel distclean
|
|
rm -rf UNIK
|
|
|