Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit b2c3f26

Browse files
authored
Explicitly use cpu when running GH machines; Tests that want mps/cuda should explicitly request for it (#975)
1 parent 4eda481 commit b2c3f26

File tree

1 file changed

+10
-10
lines changed

1 file changed

+10
-10
lines changed

.github/workflows/pull.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ jobs:
481481
export MODEL_PATH=${PWD}/checkpoints/stories15M/stories15M.pt
482482
export MODEL_NAME=stories15M
483483
484-
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0
484+
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --device cpu
485485
486486
python export.py --checkpoint-path ${MODEL_PATH} --output-pte-path ${PWD}/${MODEL_NAME}.pte
487487
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${PWD}/${MODEL_NAME}.pte
@@ -618,7 +618,7 @@ jobs:
618618
619619
python torchchat.py list
620620
python torchchat.py download stories15m
621-
python torchchat.py generate stories15M
621+
python torchchat.py generate stories15M --device cpu
622622
python torchchat.py remove stories15m
623623
624624
test-mps:
@@ -832,30 +832,30 @@ jobs:
832832
echo "******************************************"
833833
834834
echo "Running eager"
835-
python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0
835+
python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --device cpu
836836
837837
echo "Running compiled"
838-
python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile
838+
python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile --device cpu
839839
840840
echo "******************************************"
841841
echo "******* Emb: channel-wise quantized ******"
842842
echo "******************************************"
843843
844844
echo "Running eager"
845-
python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0
845+
python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --device cpu
846846
847847
echo "Running compiled"
848-
python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile
848+
python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile --device cpu
849849
850850
echo "******************************************"
851851
echo "******** Emb: group-wise quantized *******"
852852
echo "******************************************"
853853
854854
echo "Running eager"
855-
python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0
855+
python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --device cpu
856856
857857
echo "Running compiled"
858-
python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile
858+
python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile --device cpu
859859
860860
echo "tests complete"
861861
echo "******************************************"
@@ -942,7 +942,7 @@ jobs:
942942
943943
export PRMT="Once upon a time in a land far away"
944944
945-
python torchchat.py generate stories15M --temperature 0 --prompt "${PRMT}"
945+
python torchchat.py generate stories15M --temperature 0 --prompt "${PRMT}" --device cpu
946946
947947
python torchchat.py export stories15M --output-pte-path ./model.pte
948948
./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}"
@@ -995,7 +995,7 @@ jobs:
995995
export MODEL_DIR=${PWD}/checkpoints/stories15M
996996
export PROMPT="Once upon a time in a land far away"
997997
998-
python torchchat.py generate --checkpoint-path ${MODEL_DIR}/stories15M.pt --temperature 0 --prompt "${PROMPT}"
998+
python torchchat.py generate --checkpoint-path ${MODEL_DIR}/stories15M.pt --temperature 0 --prompt "${PROMPT}" --device cpu
999999
10001000
for dtype in fp32 fp16 bf16 fast fast16; do
10011001
echo "Running export + runner with dtype=$dtype"

0 commit comments

Comments
 (0)