@@ -1966,6 +1966,16 @@ std::vector<TensorStorage> remove_duplicates(const std::vector<TensorStorage>& v
19661966}
19671967
19681968bool ModelLoader::load_tensors (on_new_tensor_cb_t on_new_tensor_cb) {
1969+ int64_t process_time_ms = 0 ;
1970+ int64_t read_time_ms = 0 ;
1971+ int64_t memcpy_time_ms = 0 ;
1972+ int64_t copy_to_backend_time_ms = 0 ;
1973+ int64_t convert_time_ms = 0 ;
1974+
1975+ int64_t prev_time_ms = 0 ;
1976+ int64_t curr_time_ms = 0 ;
1977+ int64_t start_time = ggml_time_ms ();
1978+ prev_time_ms = start_time;
19691979 std::vector<TensorStorage> processed_tensor_storages;
19701980 for (auto & tensor_storage : tensor_storages) {
19711981 // LOG_DEBUG("%s", name.c_str());
@@ -1978,6 +1988,9 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
19781988 }
19791989 std::vector<TensorStorage> dedup = remove_duplicates (processed_tensor_storages);
19801990 processed_tensor_storages = dedup;
1991+ curr_time_ms = ggml_time_ms ();
1992+ process_time_ms = curr_time_ms - prev_time_ms;
1993+ prev_time_ms = curr_time_ms;
19811994
19821995 bool success = true ;
19831996 for (size_t file_index = 0 ; file_index < file_paths_.size (); file_index++) {
@@ -2019,15 +2032,27 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
20192032 size_t entry_size = zip_entry_size (zip);
20202033 if (entry_size != n) {
20212034 read_buffer.resize (entry_size);
2035+ prev_time_ms = ggml_time_ms ();
20222036 zip_entry_noallocread (zip, (void *)read_buffer.data (), entry_size);
2037+ curr_time_ms = ggml_time_ms ();
2038+ read_time_ms += curr_time_ms - prev_time_ms;
2039+ prev_time_ms = curr_time_ms;
20232040 memcpy ((void *)buf, (void *)(read_buffer.data () + tensor_storage.offset ), n);
2041+ curr_time_ms = ggml_time_ms ();
2042+ memcpy_time_ms += curr_time_ms - prev_time_ms;
20242043 } else {
2044+ prev_time_ms = ggml_time_ms ();
20252045 zip_entry_noallocread (zip, (void *)buf, n);
2046+ curr_time_ms = ggml_time_ms ();
2047+ read_time_ms += curr_time_ms - prev_time_ms;
20262048 }
20272049 zip_entry_close (zip);
20282050 } else {
2051+ prev_time_ms = ggml_time_ms ();
20292052 file.seekg (tensor_storage.offset );
20302053 file.read (buf, n);
2054+ curr_time_ms = ggml_time_ms ();
2055+ read_time_ms += curr_time_ms - prev_time_ms;
20312056 if (!file) {
20322057 LOG_ERROR (" read tensor data failed: '%s'" , file_path.c_str ());
20332058 return false ;
@@ -2072,6 +2097,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
20722097 read_data (tensor_storage, (char *)dst_tensor->data , nbytes_to_read);
20732098 }
20742099
2100+ prev_time_ms = ggml_time_ms ();
20752101 if (tensor_storage.is_bf16 ) {
20762102 // inplace op
20772103 bf16_to_f32_vec ((uint16_t *)dst_tensor->data , (float *)dst_tensor->data , tensor_storage.nelements ());
@@ -2086,10 +2112,13 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
20862112 } else if (tensor_storage.is_i64 ) {
20872113 i64_to_i32_vec ((int64_t *)read_buffer.data (), (int32_t *)dst_tensor->data , tensor_storage.nelements ());
20882114 }
2115+ curr_time_ms = ggml_time_ms ();
2116+ convert_time_ms += curr_time_ms - prev_time_ms;
20892117 } else {
20902118 read_buffer.resize (std::max (tensor_storage.nbytes (), tensor_storage.nbytes_to_read ()));
20912119 read_data (tensor_storage, (char *)read_buffer.data (), nbytes_to_read);
20922120
2121+ prev_time_ms = ggml_time_ms ();
20932122 if (tensor_storage.is_bf16 ) {
20942123 // inplace op
20952124 bf16_to_f32_vec ((uint16_t *)read_buffer.data (), (float *)read_buffer.data (), tensor_storage.nelements ());
@@ -2109,11 +2138,14 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
21092138
21102139 convert_tensor ((void *)read_buffer.data (), tensor_storage.type , dst_tensor->data ,
21112140 dst_tensor->type , (int )tensor_storage.nelements () / (int )tensor_storage.ne [0 ], (int )tensor_storage.ne [0 ]);
2141+ curr_time_ms = ggml_time_ms ();
2142+ convert_time_ms += curr_time_ms - prev_time_ms;
21122143 }
21132144 } else {
21142145 read_buffer.resize (std::max (tensor_storage.nbytes (), tensor_storage.nbytes_to_read ()));
21152146 read_data (tensor_storage, (char *)read_buffer.data (), nbytes_to_read);
21162147
2148+ prev_time_ms = ggml_time_ms ();
21172149 if (tensor_storage.is_bf16 ) {
21182150 // inplace op
21192151 bf16_to_f32_vec ((uint16_t *)read_buffer.data (), (float *)read_buffer.data (), tensor_storage.nelements ());
@@ -2133,14 +2165,24 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
21332165
21342166 if (tensor_storage.type == dst_tensor->type ) {
21352167 // copy to device memory
2168+ curr_time_ms = ggml_time_ms ();
2169+ convert_time_ms += curr_time_ms - prev_time_ms;
2170+ prev_time_ms = curr_time_ms;
21362171 ggml_backend_tensor_set (dst_tensor, read_buffer.data (), 0 , ggml_nbytes (dst_tensor));
2172+ curr_time_ms = ggml_time_ms ();
2173+ copy_to_backend_time_ms += curr_time_ms - prev_time_ms;
21372174 } else {
21382175 // convert first, then copy to device memory
21392176 convert_buffer.resize (ggml_nbytes (dst_tensor));
21402177 convert_tensor ((void *)read_buffer.data (), tensor_storage.type ,
21412178 (void *)convert_buffer.data (), dst_tensor->type ,
21422179 (int )tensor_storage.nelements () / (int )tensor_storage.ne [0 ], (int )tensor_storage.ne [0 ]);
2180+ curr_time_ms = ggml_time_ms ();
2181+ convert_time_ms += curr_time_ms - prev_time_ms;
2182+ prev_time_ms = curr_time_ms;
21432183 ggml_backend_tensor_set (dst_tensor, convert_buffer.data (), 0 , ggml_nbytes (dst_tensor));
2184+ curr_time_ms = ggml_time_ms ();
2185+ copy_to_backend_time_ms += curr_time_ms - prev_time_ms;
21442186 }
21452187 }
21462188 ++tensor_count;
@@ -2170,6 +2212,14 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
21702212 break ;
21712213 }
21722214 }
2215+ int64_t end_time = ggml_time_ms ();
2216+ LOG_INFO (" loading tensors completed, taking %.2fs (process: %.2fs, read: %.2fs, memcpy: %.2fs, convert: %.2fs, copy_to_backend: %.2fs)" ,
2217+ (end_time - start_time) / 1000 .f ,
2218+ process_time_ms / 1000 .f ,
2219+ read_time_ms / 1000 .f ,
2220+ memcpy_time_ms / 1000 .f ,
2221+ convert_time_ms / 1000 .f ,
2222+ copy_to_backend_time_ms / 1000 .f );
21732223 return success;
21742224}
21752225
0 commit comments