@@ -481,7 +481,7 @@ jobs:
481481 export MODEL_PATH=${PWD}/checkpoints/stories15M/stories15M.pt
482482 export MODEL_NAME=stories15M
483483
484- python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0
484+ python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --device cpu
485485
486486 python export.py --checkpoint-path ${MODEL_PATH} --output-pte-path ${PWD}/${MODEL_NAME}.pte
487487 python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${PWD}/${MODEL_NAME}.pte
@@ -618,7 +618,7 @@ jobs:
618618
619619 python torchchat.py list
620620 python torchchat.py download stories15m
621- python torchchat.py generate stories15M
621+ python torchchat.py generate stories15M --device cpu
622622 python torchchat.py remove stories15m
623623
624624 test-mps :
@@ -832,30 +832,30 @@ jobs:
832832 echo "******************************************"
833833
834834 echo "Running eager"
835- python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0
835+ python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --device cpu
836836
837837 echo "Running compiled"
838- python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile
838+ python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile --device cpu
839839
840840 echo "******************************************"
841841 echo "******* Emb: channel-wise quantized ******"
842842 echo "******************************************"
843843
844844 echo "Running eager"
845- python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0
845+ python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --device cpu
846846
847847 echo "Running compiled"
848- python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile
848+ python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile --device cpu
849849
850850 echo "******************************************"
851851 echo "******** Emb: group-wise quantized *******"
852852 echo "******************************************"
853853
854854 echo "Running eager"
855- python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0
855+ python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --device cpu
856856
857857 echo "Running compiled"
858- python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile
858+ python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile --device cpu
859859
860860 echo "tests complete"
861861 echo "******************************************"
@@ -942,7 +942,7 @@ jobs:
942942
943943 export PRMT="Once upon a time in a land far away"
944944
945- python torchchat.py generate stories15M --temperature 0 --prompt "${PRMT}"
945+ python torchchat.py generate stories15M --temperature 0 --prompt "${PRMT}" --device cpu
946946
947947 python torchchat.py export stories15M --output-pte-path ./model.pte
948948 ./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}"
@@ -995,7 +995,7 @@ jobs:
995995 export MODEL_DIR=${PWD}/checkpoints/stories15M
996996 export PROMPT="Once upon a time in a land far away"
997997
998- python torchchat.py generate --checkpoint-path ${MODEL_DIR}/stories15M.pt --temperature 0 --prompt "${PROMPT}"
998+ python torchchat.py generate --checkpoint-path ${MODEL_DIR}/stories15M.pt --temperature 0 --prompt "${PROMPT}" --device cpu
999999
10001000 for dtype in fp32 fp16 bf16 fast fast16; do
10011001 echo "Running export + runner with dtype=$dtype"
0 commit comments