From 771bf8a932197aaeafd28274c150c2d2ed4c3afd Mon Sep 17 00:00:00 2001 From: Jack-Khuu Date: Tue, 30 Jul 2024 11:32:06 -0700 Subject: [PATCH] Explicitly use cpu when running GH machines; Tests that want mps/cuda should explicitly request for it --- .github/workflows/pull.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index d702df3f1..5ff28873a 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -481,7 +481,7 @@ jobs: export MODEL_PATH=${PWD}/checkpoints/stories15M/stories15M.pt export MODEL_NAME=stories15M - python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 + python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --device cpu python export.py --checkpoint-path ${MODEL_PATH} --output-pte-path ${PWD}/${MODEL_NAME}.pte python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${PWD}/${MODEL_NAME}.pte @@ -618,7 +618,7 @@ jobs: python torchchat.py list python torchchat.py download stories15m - python torchchat.py generate stories15M + python torchchat.py generate stories15M --device cpu python torchchat.py remove stories15m test-mps: @@ -832,30 +832,30 @@ jobs: echo "******************************************" echo "Running eager" - python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 + python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --device cpu echo "Running compiled" - python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile + python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile --device cpu echo "******************************************" echo "******* Emb: channel-wise quantized ******" echo "******************************************" echo "Running eager" - python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 + python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --device cpu echo "Running compiled" - python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile + python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile --device cpu echo "******************************************" echo "******** Emb: group-wise quantized *******" echo "******************************************" echo "Running eager" - python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 + python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --device cpu echo "Running compiled" - python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile + python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile --device cpu echo "tests complete" echo "******************************************" @@ -942,7 +942,7 @@ jobs: export PRMT="Once upon a time in a land far away" - python torchchat.py generate stories15M --temperature 0 --prompt "${PRMT}" + python torchchat.py generate stories15M --temperature 0 --prompt "${PRMT}" --device cpu python torchchat.py export stories15M --output-pte-path ./model.pte ./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}" @@ -995,7 +995,7 @@ jobs: export MODEL_DIR=${PWD}/checkpoints/stories15M export PROMPT="Once upon a time in a land far away" - python torchchat.py generate --checkpoint-path ${MODEL_DIR}/stories15M.pt --temperature 0 --prompt "${PROMPT}" + python torchchat.py generate --checkpoint-path ${MODEL_DIR}/stories15M.pt --temperature 0 --prompt "${PROMPT}" --device cpu for dtype in fp32 fp16 bf16 fast fast16; do echo "Running export + runner with dtype=$dtype"