failed to inference latest version (67cf22a4e6809edb7308dd0a2ae2c1ffb86f4984) of mpt-7b with BigDL

In latest commit, https://huggingface.co/mosaicml/mpt-7b/commit/67cf22a4e6809edb7308dd0a2ae2c1ffb86f4984, BigDL throws below error when generate text.

INFO 2024-02-20 06:41:05,962 proxy 172.17.0.2 0dfd2310-daba-4d40-8b27-6ccbbd608fd2 /mpt-7b-bigdl router.py:959 - Using router <class 'ray.serve._private.router.PowerOfTwoChoicesReplicaScheduler'>.
INFO 2024-02-20 06:41:05,978 proxy 172.17.0.2 router.py:496 - Got updated replicas for deployment 'PredictorDeployment' in application 'mpt-7b-bigdl': {'mpt-7b-bigdl#PredictorDeployment#jBsKpA'}.
ERROR 2024-02-20 06:41:06,031 proxy 172.17.0.2 0dfd2310-daba-4d40-8b27-6ccbbd608fd2 /mpt-7b-bigdl proxy.py:1045 - ray::ServeReplica:mpt-7b-bigdl:PredictorDeployment.handle_request_streaming() (pid=8543, ip=172.17.0.2, actor_id=55a8f57651e95e4bfc9348b101000000, repr=<ray.serve._private.replica.ServeReplica:mpt-7b-bigdl:PredictorDeployment object at 0x7fa349fc2d30>)
    async for result in generator:
  File "/opt/conda/lib/python3.9/site-packages/ray/serve/_private/replica.py", line 338, in _handle_http_request_generator
    raise e from None
  File "/opt/conda/lib/python3.9/site-packages/ray/serve/_private/replica.py", line 914, in call_user_method
    raise e from None
ray.exceptions.RayTaskError: ray::ServeReplica:mpt-7b-bigdl:PredictorDeployment.handle_request_streaming() (pid=8543, ip=172.17.0.2)
  File "/opt/conda/lib/python3.9/site-packages/ray/serve/_private/utils.py", line 165, in wrap_to_ray_error
    raise exception
  File "/opt/conda/lib/python3.9/site-packages/ray/serve/_private/replica.py", line 895, in call_user_method
    result = await method_to_call(*request_args, **request_kwargs)
  File "/root/llm-on-ray/inference/predictor_deployment.py", line 121, in __call__
    return self.predictor.generate(prompts, **config)
  File "/root/llm-on-ray/inference/transformer_predictor.py", line 122, in generate
    gen_tokens = self.model.generate(
  File "/opt/conda/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
    return func(*args, **kwargs)
  File "/opt/conda/lib/python3.9/site-packages/transformers/generation/utils.py", line 1538, in generate
    return self.greedy_search(
  File "/opt/conda/lib/python3.9/site-packages/transformers/generation/utils.py", line 2362, in greedy_search
    outputs = self(
  File "/opt/conda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/opt/conda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
    return forward_call(*args, **kwargs)
  File "/root/.cache/huggingface/modules/transformers_modules/mosaicml/mpt-7b/67cf22a4e6809edb7308dd0a2ae2c1ffb86f4984/modeling_mpt.py", line 436, in forward
    outputs = self.transformer(input_ids=input_ids, past_key_values=past_key_values, attention_mask=attention_mask, prefix_mask=prefix_mask, sequence_id=sequence_id, return_dict=return_dict, output_attentions=output_attentions, output_hidden_states=output_hidden_states, use_cache=use_cache, inputs_embeds=inputs_embeds)
  File "/opt/conda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/opt/conda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
    return forward_call(*args, **kwargs)
  File "/root/.cache/huggingface/modules/transformers_modules/mosaicml/mpt-7b/67cf22a4e6809edb7308dd0a2ae2c1ffb86f4984/modeling_mpt.py", line 357, in forward
    (x, attn_weights, present) = block(x, past_key_value=past_key_value, attn_bias=attn_bias, rotary_emb_w_meta_info=rotary_emb_w_meta_info, attention_mask=attention_mask, is_causal=self.is_causal, output_attentions=bool(output_attentions), alibi_slopes=alibi_slopes, flash_attn_padding_info=flash_attn_padding_info)
  File "/opt/conda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/opt/conda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
    return forward_call(*args, **kwargs)
  File "/root/.cache/huggingface/modules/transformers_modules/mosaicml/mpt-7b/67cf22a4e6809edb7308dd0a2ae2c1ffb86f4984/blocks.py", line 40, in forward
    (b, attn_weights, past_key_value) = self.attn(a, past_key_value=past_key_value, attn_bias=attn_bias, rotary_emb_w_meta_info=rotary_emb_w_meta_info, attention_mask=attention_mask, is_causal=is_causal, needs_weights=output_attentions, alibi_slopes=alibi_slopes, flash_attn_padding_info=flash_attn_padding_info)
  File "/opt/conda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/opt/conda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
    return forward_call(*args, **kwargs)
TypeError: mpt_multihead_attention_forward() got an unexpected keyword argument 'rotary_emb_w_meta_info'
Traceback (most recent call last):
  File "/opt/conda/lib/python3.9/site-packages/ray/serve/_private/proxy.py", line 979, in send_request_to_replica
    async for asgi_message_batch in response_generator:
  File "/opt/conda/lib/python3.9/site-packages/ray/serve/_private/proxy_response_generator.py", line 111, in __anext__
    raise e from None
  File "/opt/conda/lib/python3.9/site-packages/ray/serve/_private/proxy_response_generator.py", line 91, in __anext__
    result = await self._get_next_streaming_result()
  File "/opt/conda/lib/python3.9/site-packages/ray/serve/_private/proxy_response_generator.py", line 134, in _get_next_streaming_result
    return next_result_task.result()
  File "/opt/conda/lib/python3.9/site-packages/ray/serve/_private/proxy_response_generator.py", line 116, in _await_response_anext
    return await self._response.__anext__()
  File "/opt/conda/lib/python3.9/site-packages/ray/serve/handle.py", line 781, in __anext__
    return await next_obj_ref
ray.exceptions.RayTaskError(TypeError): ray::ServeReplica:mpt-7b-bigdl:PredictorDeployment.handle_request_streaming() (pid=8543, ip=172.17.0.2, actor_id=55a8f57651e95e4bfc9348b101000000, repr=<ray.serve._private.replica.ServeReplica:mpt-7b-bigdl:PredictorDeployment object at 0x7fa349fc2d30>)
    async for result in generator:
  File "/opt/conda/lib/python3.9/site-packages/ray/serve/_private/replica.py", line 338, in _handle_http_request_generator
    raise e from None
  File "/opt/conda/lib/python3.9/site-packages/ray/serve/_private/replica.py", line 914, in call_user_method
    raise e from None
ray.exceptions.RayTaskError: ray::ServeReplica:mpt-7b-bigdl:PredictorDeployment.handle_request_streaming() (pid=8543, ip=172.17.0.2)
  File "/opt/conda/lib/python3.9/site-packages/ray/serve/_private/utils.py", line 165, in wrap_to_ray_error
    raise exception
  File "/opt/conda/lib/python3.9/site-packages/ray/serve/_private/replica.py", line 895, in call_user_method
    result = await method_to_call(*request_args, **request_kwargs)
  File "/root/llm-on-ray/inference/predictor_deployment.py", line 121, in __call__
    return self.predictor.generate(prompts, **config)
  File "/root/llm-on-ray/inference/transformer_predictor.py", line 122, in generate
    gen_tokens = self.model.generate(
  File "/opt/conda/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
    return func(*args, **kwargs)
  File "/opt/conda/lib/python3.9/site-packages/transformers/generation/utils.py", line 1538, in generate
    return self.greedy_search(
  File "/opt/conda/lib/python3.9/site-packages/transformers/generation/utils.py", line 2362, in greedy_search
    outputs = self(
  File "/opt/conda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/opt/conda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
    return forward_call(*args, **kwargs)
  File "/root/.cache/huggingface/modules/transformers_modules/mosaicml/mpt-7b/67cf22a4e6809edb7308dd0a2ae2c1ffb86f4984/modeling_mpt.py", line 436, in forward
    outputs = self.transformer(input_ids=input_ids, past_key_values=past_key_values, attention_mask=attention_mask, prefix_mask=prefix_mask, sequence_id=sequence_id, return_dict=return_dict, output_attentions=output_attentions, output_hidden_states=output_hidden_states, use_cache=use_cache, inputs_embeds=inputs_embeds)
  File "/opt/conda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/opt/conda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
    return forward_call(*args, **kwargs)
  File "/root/.cache/huggingface/modules/transformers_modules/mosaicml/mpt-7b/67cf22a4e6809edb7308dd0a2ae2c1ffb86f4984/modeling_mpt.py", line 357, in forward
    (x, attn_weights, present) = block(x, past_key_value=past_key_value, attn_bias=attn_bias, rotary_emb_w_meta_info=rotary_emb_w_meta_info, attention_mask=attention_mask, is_causal=self.is_causal, output_attentions=bool(output_attentions), alibi_slopes=alibi_slopes, flash_attn_padding_info=flash_attn_padding_info)
  File "/opt/conda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/opt/conda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
    return forward_call(*args, **kwargs)
  File "/root/.cache/huggingface/modules/transformers_modules/mosaicml/mpt-7b/67cf22a4e6809edb7308dd0a2ae2c1ffb86f4984/blocks.py", line 40, in forward
    (b, attn_weights, past_key_value) = self.attn(a, past_key_value=past_key_value, attn_bias=attn_bias, rotary_emb_w_meta_info=rotary_emb_w_meta_info, attention_mask=attention_mask, is_causal=is_causal, needs_weights=output_attentions, alibi_slopes=alibi_slopes, flash_attn_padding_info=flash_attn_padding_info)
  File "/opt/conda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/opt/conda/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
    return forward_call(*args, **kwargs)
TypeError: mpt_multihead_attention_forward() got an unexpected keyword argument 'rotary_emb_w_meta_info'


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

failed to inference latest version (67cf22a4e6809edb7308dd0a2ae2c1ffb86f4984) of mpt-7b with BigDL #10177

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

failed to inference latest version (67cf22a4e6809edb7308dd0a2ae2c1ffb86f4984) of mpt-7b with BigDL #10177

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions