@@ -149,31 +149,31 @@ static const std::map<e_model, size_t> & MEM_REQ_EVAL()
149149}
150150
151151// amount of VRAM needed per batch size to hold temporary results
152- // the values for 3b and 65b are not derived from testing but instead chosen conservatively
152+ // the values for 3b are not derived from testing but instead chosen conservatively
153153static const std::map<e_model, size_t > & VRAM_REQ_SCRATCH_BASE ()
154154{
155155 static std::map<e_model, size_t > k_sizes = {
156156 { MODEL_3B, 512ull * kB },
157157 { MODEL_7B, 512ull * kB },
158158 { MODEL_13B, 640ull * kB },
159159 { MODEL_30B, 768ull * kB },
160- { MODEL_65B, 1536ull * kB },
161- { MODEL_70B, 1536ull * kB }, // TODO (likely can be reduced)
160+ { MODEL_65B, 1280ull * kB },
161+ { MODEL_70B, 1280ull * kB },
162162 };
163163 return k_sizes;
164164}
165165
166166// amount of VRAM needed per batch size and context to hold temporary results
167- // the values for 3b and 65b are not derived from testing but instead chosen conservatively
167+ // the values for 3b are not derived from testing but instead chosen conservatively
168168static const std::map<e_model, size_t > & VRAM_REQ_SCRATCH_PER_CONTEXT ()
169169{
170170 static std::map<e_model, size_t > k_sizes = {
171171 { MODEL_3B, 128ull },
172172 { MODEL_7B, 128ull },
173173 { MODEL_13B, 160ull },
174174 { MODEL_30B, 208ull },
175- { MODEL_65B, 416ull },
176- { MODEL_70B, 416ull }, // TODO (likely can be reduced)
175+ { MODEL_65B, 256ull },
176+ { MODEL_70B, 256ull },
177177 };
178178 return k_sizes;
179179}
0 commit comments