Skip to content

Commit 5cebe5a

Browse files
committed
PR fixes + fix leak in onnx modelrun
1 parent 11649ea commit 5cebe5a

12 files changed

+131
-108
lines changed

docs/commands.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -179,10 +179,10 @@ _Arguments_
179179
* **BATCHSIZE**: when provided with an `n` that is greater than 0, the engine will batch incoming requests from multiple clients that use the model with input tensors of the same shape. When `AI.MODELEXECUTE` (or `AI.MODELRUN`) is called the requests queue is visited and input tensors from compatible requests are concatenated along the 0th (batch) dimension until `n` is exceeded. The model is then run for the entire batch and the results are unpacked back to the individual requests unblocking their respective clients. If the batch size of the inputs to of first request in the queue exceeds `BATCHSIZE`, the request is served immediately (default value: 0).
180180
* **MINBATCHSIZE**: when provided with an `m` that is greater than 0, the engine will postpone calls to `AI.MODELEXECUTE` until the batch's size had reached `m`. In this case, note that requests for which `m` is not reached will hang indefinitely (default value: 0), unless `MINBATCHTIMEOUT` is provided.
181181
* **MINBATCHTIMEOUT**: when provided with a `t` (expressed in milliseconds) that is greater than 0, the engine will trigger a run even though `MINBATCHSIZE` has not been reached after `t` milliseconds from the time a `MODELEXECUTE` (or the enclosing `DAGRUN`) is enqueued. This only applies to cases where both `BATCHSIZE` and `MINBATCHSIZE` are greater than 0.
182-
* **INPUTS**: denotes that one or more names of the model's input nodes are following (applicable only for TensorFlow models)
183-
* **input_count**: a positive number that indicates the number of following input nodes.
184-
* **OUTPUTS**: denotes that one or more names of the model's output nodes are following (applicable only for TensorFlow models)
185-
* **output_count**: a positive number that indicates the number of following input nodes.
182+
* **INPUTS**: denotes that one or more names of the model's input nodes are following, applicable only for TensorFlow models (specifying INPUTS for other backends will cause an error)
183+
* **input_count**: a positive number that indicates the number of following input nodes (also applicable only for TensorFlow)
184+
* **OUTPUTS**: denotes that one or more names of the model's output nodes are following, applicable only for TensorFlow models (specifying OUTPUTS for other backends will cause an error)
185+
* **output_count**: a positive number that indicates the number of following input nodes (also applicable only for TensorFlow)
186186
* **model**: the Protobuf-serialized model. Since Redis supports strings up to 512MB, blobs for very large models need to be chunked, e.g. `BLOB chunk1 chunk2 ...`.
187187

188188
_Return_

docs/intro.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ AI.TENSORSET tB FLOAT 2 VALUES 3 5
246246
The model can now be run with the [`AI.MODELEXECUTE` command](commands.md#aimodelexecute) as follows:
247247

248248
```
249-
AI.MODELEXECUTE mymodel INPUTS tA tB OUTPUTS tResult
249+
AI.MODELEXECUTE mymodel INPUTS 2 tA tB OUTPUTS 1 tResult
250250
```
251251

252252
!!! example "Example: running a model"

src/backends/onnxruntime.c

Lines changed: 62 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,8 @@ DLDataType RAI_GetDLDataTypeFromORT(ONNXTensorElementDataType dtype) {
179179
}
180180
}
181181

182-
OrtValue *RAI_OrtValueFromTensors(RAI_Tensor **ts, size_t count, RAI_Error *error) {
182+
int RAI_OrtValueFromTensors(RAI_Tensor **ts, size_t count, OrtValue **input,
183+
OrtStatus **status_ptr) {
183184
OrtStatus *status = NULL;
184185
const OrtApi *ort = OrtGetApiBase()->GetApi(1);
185186

@@ -218,12 +219,12 @@ OrtValue *RAI_OrtValueFromTensors(RAI_Tensor **ts, size_t count, RAI_Error *erro
218219
RAI_TensorByteSize(t0), t0->tensor.dl_tensor.shape, t0->tensor.dl_tensor.ndim,
219220
RAI_GetOrtDataTypeFromDL(t0->tensor.dl_tensor.dtype), &out))
220221
}
221-
return out;
222+
*input = out;
223+
return REDISMODULE_OK;
222224

223225
error:
224-
RAI_SetError(error, RAI_EMODELRUN, ort->GetErrorMessage(status));
225-
ort->ReleaseStatus(status);
226-
return NULL;
226+
*status_ptr = status;
227+
return REDISMODULE_ERR;
227228
}
228229

229230
RAI_Tensor *RAI_TensorCreateFromOrtValue(OrtValue *v, size_t batch_offset, long long batch_size,
@@ -233,6 +234,7 @@ RAI_Tensor *RAI_TensorCreateFromOrtValue(OrtValue *v, size_t batch_offset, long
233234
RAI_Tensor *ret = NULL;
234235
int64_t *shape = NULL;
235236
int64_t *strides = NULL;
237+
OrtTensorTypeAndShapeInfo *info = NULL;
236238

237239
int is_tensor;
238240
ONNX_VALIDATE_STATUS(ort->IsTensor(v, &is_tensor))
@@ -245,7 +247,6 @@ RAI_Tensor *RAI_TensorCreateFromOrtValue(OrtValue *v, size_t batch_offset, long
245247
ret = RAI_TensorNew();
246248
DLDevice device = (DLDevice){.device_type = kDLCPU, .device_id = 0};
247249

248-
OrtTensorTypeAndShapeInfo *info;
249250
ONNX_VALIDATE_STATUS(ort->GetTensorTypeAndShape(v, &info))
250251

251252
{
@@ -323,6 +324,9 @@ RAI_Tensor *RAI_TensorCreateFromOrtValue(OrtValue *v, size_t batch_offset, long
323324
if (ret != NULL) {
324325
RedisModule_Free(ret);
325326
}
327+
if (info != NULL) {
328+
ort->ReleaseTensorTypeAndShapeInfo(info);
329+
}
326330
return NULL;
327331
}
328332

@@ -497,19 +501,16 @@ int RAI_ModelRunORT(RAI_ModelRunCtx **mctxs, RAI_Error *error) {
497501
}
498502

499503
OrtStatus *status = NULL;
500-
size_t n_input_nodes;
501-
ONNX_VALIDATE_STATUS(ort->SessionGetInputCount(session, &n_input_nodes))
502-
503-
size_t n_output_nodes;
504-
ONNX_VALIDATE_STATUS(ort->SessionGetOutputCount(session, &n_output_nodes)) {
505-
const char *input_names[n_input_nodes];
506-
const char *output_names[n_output_nodes];
507-
508-
OrtValue *inputs[n_input_nodes];
509-
OrtValue *outputs[n_output_nodes];
510-
511-
const size_t ninputs = array_len(mctxs[0]->inputs);
512-
const size_t noutputs = array_len(mctxs[0]->outputs);
504+
const size_t ninputs = array_len(mctxs[0]->inputs);
505+
const size_t noutputs = array_len(mctxs[0]->outputs);
506+
array_new_on_stack(const char *, 5, input_names)
507+
array_new_on_stack(const char *, 5, output_names) array_new_on_stack(OrtValue *, 5, inputs)
508+
array_new_on_stack(OrtValue *, 5, outputs) OrtTensorTypeAndShapeInfo *info = NULL;
509+
{
510+
size_t n_input_nodes;
511+
size_t n_output_nodes;
512+
ONNX_VALIDATE_STATUS(ort->SessionGetInputCount(session, &n_input_nodes))
513+
ONNX_VALIDATE_STATUS(ort->SessionGetOutputCount(session, &n_output_nodes))
513514

514515
if (ninputs != n_input_nodes) {
515516
char msg[70];
@@ -529,26 +530,26 @@ int RAI_ModelRunORT(RAI_ModelRunCtx **mctxs, RAI_Error *error) {
529530
char *input_name;
530531
ONNX_VALIDATE_STATUS(
531532
ort->SessionGetInputName(session, i, global_allocator, &input_name))
532-
input_names[i] = input_name;
533+
input_names = array_append(input_names, input_name);
533534

534535
RAI_Tensor *batched_input_tensors[nbatches];
535536
for (size_t b = 0; b < nbatches; b++) {
536537
batched_input_tensors[b] = mctxs[b]->inputs[i].tensor;
537538
}
538-
539-
inputs[i] = RAI_OrtValueFromTensors(batched_input_tensors, nbatches, error);
540-
if (error->code != RAI_OK) {
541-
ort->ReleaseStatus(status);
542-
return REDISMODULE_ERR;
539+
OrtValue *input;
540+
if (RAI_OrtValueFromTensors(batched_input_tensors, nbatches, &input, &status) !=
541+
REDISMODULE_OK) {
542+
goto error;
543543
}
544+
inputs = array_append(inputs, input);
544545
}
545546

546547
for (size_t i = 0; i < n_output_nodes; i++) {
547548
char *output_name;
548549
ONNX_VALIDATE_STATUS(
549550
ort->SessionGetOutputName(session, i, global_allocator, &output_name))
550-
output_names[i] = output_name;
551-
outputs[i] = NULL;
551+
output_names = array_append(output_names, output_name);
552+
outputs = array_append(outputs, NULL);
552553
}
553554

554555
OrtRunOptions *run_options = NULL;
@@ -559,13 +560,14 @@ int RAI_ModelRunORT(RAI_ModelRunCtx **mctxs, RAI_Error *error) {
559560
for (uint32_t i = 0; i < ninputs; i++) {
560561
status = ort->AllocatorFree(global_allocator, (void *)input_names[i]);
561562
}
563+
array_free(input_names);
562564
for (uint32_t i = 0; i < noutputs; i++) {
563565
status = ort->AllocatorFree(global_allocator, (void *)output_names[i]);
564566
}
567+
array_free(output_names);
565568

566569
for (size_t i = 0; i < n_output_nodes; i++) {
567570
if (nbatches > 1) {
568-
OrtTensorTypeAndShapeInfo *info;
569571
ONNX_VALIDATE_STATUS(ort->GetTensorTypeAndShape(outputs[i], &info))
570572
size_t ndims;
571573
ONNX_VALIDATE_STATUS(ort->GetDimensionsCount(info, &ndims))
@@ -575,37 +577,36 @@ int RAI_ModelRunORT(RAI_ModelRunCtx **mctxs, RAI_Error *error) {
575577
if (dims[0] != total_batch_size) {
576578
RAI_SetError(error, RAI_EMODELRUN,
577579
"ERR Model did not generate the expected batch size");
578-
ort->ReleaseStatus(status);
579-
return REDISMODULE_ERR;
580+
goto error;
580581
}
581582

582583
for (size_t b = 0; b < nbatches; b++) {
583584
RAI_Tensor *output_tensor = RAI_TensorCreateFromOrtValue(
584585
outputs[i], batch_offsets[b], batch_sizes[b], error);
585586
if (error->code != RAI_OK) {
586-
ort->ReleaseStatus(status);
587-
return REDISMODULE_ERR;
587+
goto error;
588588
}
589589
if (output_tensor) {
590590
mctxs[b]->outputs[i].tensor = RAI_TensorGetShallowCopy(output_tensor);
591591
RAI_TensorFree(output_tensor);
592592
} else {
593-
printf("ERR: non-tensor output from ONNX models, ignoring (currently "
594-
"unsupported)");
593+
RedisModule_Log(NULL, "warning",
594+
"non-tensor output from ONNX models, ignoring (currently "
595+
"unsupported)");
595596
}
596597
}
597598
} else {
598599
RAI_Tensor *output_tensor = RAI_TensorCreateFromOrtValue(outputs[i], 0, -1, error);
599600
if (error->code != RAI_OK) {
600-
ort->ReleaseStatus(status);
601-
return REDISMODULE_ERR;
601+
goto error;
602602
}
603603
if (output_tensor) {
604604
mctxs[0]->outputs[i].tensor = RAI_TensorGetShallowCopy(output_tensor);
605605
RAI_TensorFree(output_tensor);
606606
} else {
607-
printf("ERR: non-tensor output from ONNX models, ignoring (currently "
608-
"unsupported)");
607+
RedisModule_Log(NULL, "warning",
608+
"non-tensor output from ONNX models, ignoring (currently "
609+
"unsupported)");
609610
}
610611
}
611612
ort->ReleaseValue(outputs[i]);
@@ -617,8 +618,29 @@ int RAI_ModelRunORT(RAI_ModelRunCtx **mctxs, RAI_Error *error) {
617618
}
618619

619620
error:
620-
RAI_SetError(error, RAI_EMODELRUN, ort->GetErrorMessage(status));
621-
ort->ReleaseStatus(status);
621+
if (status) {
622+
RAI_SetError(error, RAI_EMODELRUN, ort->GetErrorMessage(status));
623+
ort->ReleaseStatus(status);
624+
}
625+
for (uint32_t i = 0; i < array_len(input_names); i++) {
626+
status = ort->AllocatorFree(global_allocator, (void *)input_names[i]);
627+
}
628+
array_free(input_names);
629+
for (uint32_t i = 0; i < array_len(output_names); i++) {
630+
status = ort->AllocatorFree(global_allocator, (void *)output_names[i]);
631+
}
632+
array_free(output_names);
633+
for (size_t i = 0; i < array_len(inputs); i++) {
634+
ort->ReleaseValue(inputs[i]);
635+
}
636+
array_free(inputs);
637+
for (size_t i = 0; i < array_len(outputs); i++) {
638+
ort->ReleaseValue(outputs[i]);
639+
}
640+
array_free(outputs);
641+
if (info) {
642+
ort->ReleaseTensorTypeAndShapeInfo(info);
643+
}
622644
return REDISMODULE_ERR;
623645
}
624646

tests/flow/includes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def tmpfn():
187187

188188

189189
# Load a model/script from a file located in test_data dir.
190-
def load_from_file(file_name):
190+
def load_file_content(file_name):
191191
test_data_path = os.path.join(os.path.dirname(__file__), 'test_data')
192192
filename = os.path.join(test_data_path, file_name)
193193
with open(filename, 'rb') as f:

tests/flow/tests_commands.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def test_modelstore_errors(env):
1717
return
1818

1919
con = env.getConnection()
20-
model_pb = load_from_file('pt-minimal.pt')
20+
model_pb = load_file_content('pt-minimal.pt')
2121

2222
# Check that the basic arguments are valid (model's key, device, backend, blob)
2323
check_error_message(env, con, "wrong number of arguments for 'AI.MODELSTORE' command",
@@ -79,7 +79,7 @@ def test_modelexecute_errors(env):
7979
return
8080
con = env.getConnection()
8181

82-
model_pb = load_from_file('graph.pb')
82+
model_pb = load_file_content('graph.pb')
8383
ret = con.execute_command('AI.MODELSTORE', 'm{1}', 'TF', DEVICE,
8484
'INPUTS', 2, 'a', 'b', 'OUTPUTS', 1, 'mul', 'BLOB', model_pb)
8585
env.assertEqual(ret, b'OK')

tests/flow/tests_deprecated_commands.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def test_modelset_errors(env):
1212
return
1313

1414
con = env.getConnection()
15-
model_pb = load_from_file('pt-minimal.pt')
15+
model_pb = load_file_content('pt-minimal.pt')
1616

1717
# test validity of backend and device args.
1818
check_error_message(env, con, "wrong number of arguments for 'AI.MODELSET' command",
@@ -45,7 +45,7 @@ def test_modelset_errors(env):
4545
'AI.MODELSET', 'm{1}', 'TORCH', DEVICE, 'BATCHSIZE', 2, 'BLOB')
4646

4747
# test INPUTS and OUTPUTS args for TF backend
48-
model_pb = load_from_file('graph.pb')
48+
model_pb = load_file_content('graph.pb')
4949
check_error_message(env, con, "Insufficient arguments, INPUTS and OUTPUTS not specified",
5050
'AI.MODELSET', 'm_1{1}', 'TF', DEVICE, 'BLOB', model_pb)
5151
check_error_message(env, con, "INPUTS not specified",
@@ -62,7 +62,7 @@ def test_modelrun_errors(env):
6262
return
6363
con = env.getConnection()
6464

65-
model_pb = load_from_file('graph.pb')
65+
model_pb = load_file_content('graph.pb')
6666
ret = con.execute_command('AI.MODELSET', 'm{1}', 'TF', DEVICE,
6767
'INPUTS', 'a', 'b', 'OUTPUTS', 'mul', 'BLOB', model_pb)
6868
env.assertEqual(ret, b'OK')
@@ -94,7 +94,7 @@ def test_modelset_modelrun_tf(env):
9494
return
9595
con = env.getConnection()
9696

97-
model_pb = load_from_file('graph.pb')
97+
model_pb = load_file_content('graph.pb')
9898
ret = con.execute_command('AI.MODELSET', 'm{1}', 'TF', DEVICE, 'TAG', 'version:1',
9999
'INPUTS', 'a', 'b', 'OUTPUTS', 'mul', 'BLOB', model_pb)
100100
env.assertEqual(ret, b'OK')
@@ -123,8 +123,8 @@ def test_modelset_modelrun_tflite(env):
123123
return
124124

125125
con = env.getConnection()
126-
model_pb = load_from_file('mnist_model_quant.tflite')
127-
sample_raw = load_from_file('one.raw')
126+
model_pb = load_file_content('mnist_model_quant.tflite')
127+
sample_raw = load_file_content('one.raw')
128128

129129
ret = con.execute_command('AI.MODELSET', 'm{1}', 'TFLITE', 'CPU', 'TAG', 'asdf', 'BLOB', model_pb)
130130
env.assertEqual(ret, b'OK')
@@ -153,7 +153,7 @@ def test_modelset_modelrun_pytorch(env):
153153
return
154154

155155
con = env.getConnection()
156-
model_pb = load_from_file('pt-minimal.pt')
156+
model_pb = load_file_content('pt-minimal.pt')
157157

158158
ret = con.execute_command('AI.TENSORSET', 'a{1}', 'FLOAT', 2, 2, 'VALUES', 2, 3, 2, 3)
159159
env.assertEqual(ret, b'OK')
@@ -187,8 +187,8 @@ def test_modelset_modelrun_onnx(env):
187187
return
188188

189189
con = env.getConnection()
190-
model_pb = load_from_file('mnist.onnx')
191-
sample_raw = load_from_file('one.raw')
190+
model_pb = load_file_content('mnist.onnx')
191+
sample_raw = load_file_content('one.raw')
192192

193193
ret = con.execute_command('AI.MODELSET', 'm{1}', 'ONNX', DEVICE, 'TAG', 'version:2', 'BLOB', model_pb)
194194
env.assertEqual(ret, b'OK')

tests/flow/tests_llapi.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ def wrapper(env, *args, **kwargs):
1616
if b'RAI_llapi' in [module[1] for module in modules]:
1717
return f(env, *args, **kwargs)
1818
try:
19-
ret = con.execute_command('MODULE', 'LOAD', TESTMOD_PATH)
19+
# ret = con.execute_command('MODULE', 'LOAD', TESTMOD_PATH)
20+
ret = con.execute_command('MODULE', 'LOAD', "/home/alon/CLionProjects/RedisAI/bin/linux-x64-debug/src/tests/module/testmod.so")
2021
env.assertEqual(ret, b'OK')
2122
except Exception as e:
2223
env.assertFalse(True)

0 commit comments

Comments
 (0)