Skip to content

Latest commit

 

History

History
94 lines (89 loc) · 26.2 KB

File metadata and controls

94 lines (89 loc) · 26.2 KB

generator_prefill

Source: emel/generator/prefill/sm.hpp

Mermaid

stateDiagram-v2
  direction TB
  [*] --> idle
  idle --> slots : run [always] / none
  slots --> slots_decision : completion_run_ [always] / request_slots_
  slots_decision --> snapshot : completion_run_ [slots_ok_] / none
  slots_decision --> idle : completion_run_ [slots_invalid_request_] / mark_invalid_request_
  slots_decision --> idle : completion_run_ [slots_backend_error_] / mark_backend_error_
  snapshot --> snapshot_decision : completion_run_ [always] / request_memory_snapshot_
  snapshot_decision --> contract_runtime_decision : completion_run_ [snapshot_ok_] / none
  snapshot_decision --> idle : completion_run_ [snapshot_invalid_request_] / mark_invalid_request_
  snapshot_decision --> idle : completion_run_ [snapshot_backend_error_] / mark_backend_error_
  contract_runtime_decision --> contract_flash_decision : completion_run_ [flash_runtime_supported_] / none
  contract_runtime_decision --> contract_nonflash_decision : completion_run_ [nonflash_runtime_required_] / none
  contract_flash_decision --> compute_result_decision : completion_run_ [uses_materialized_logits_with_chunk8_q8_k_] / request_contract_flash_materialized_chunk8_q8_k_
  contract_flash_decision --> compute_result_decision : completion_run_ [uses_materialized_logits_with_chunk4_packed_q8_0_] / request_contract_flash_materialized_chunk4_packed_q8_0_
  contract_flash_decision --> compute_result_decision : completion_run_ [uses_materialized_logits_with_chunk4_q8_k_] / request_contract_flash_materialized_chunk4_q8_k_
  contract_flash_decision --> compute_result_decision : completion_run_ [uses_materialized_logits_with_scalar_] / request_contract_flash_materialized_scalar_
  contract_flash_decision --> compute_result_decision : completion_run_ [uses_preselected_argmax_with_chunk8_q8_k_] / request_contract_flash_preselected_chunk8_q8_k_
  contract_flash_decision --> compute_result_decision : completion_run_ [uses_preselected_argmax_with_chunk4_packed_q8_0_] / request_contract_flash_preselected_chunk4_packed_q8_0_
  contract_flash_decision --> compute_result_decision : completion_run_ [uses_preselected_argmax_with_chunk4_q8_k_] / request_contract_flash_preselected_chunk4_q8_k_
  contract_flash_decision --> compute_result_decision : completion_run_ [uses_preselected_argmax_with_scalar_] / request_contract_flash_preselected_scalar_
  contract_nonflash_decision --> compute_result_decision : completion_run_ [uses_materialized_logits_with_chunk8_q8_k_] / request_contract_nonflash_materialized_chunk8_q8_k_
  contract_nonflash_decision --> compute_result_decision : completion_run_ [uses_materialized_logits_with_chunk4_packed_q8_0_] / request_contract_nonflash_materialized_chunk4_packed_q8_0_
  contract_nonflash_decision --> compute_result_decision : completion_run_ [uses_materialized_logits_with_chunk4_q8_k_] / request_contract_nonflash_materialized_chunk4_q8_k_
  contract_nonflash_decision --> compute_result_decision : completion_run_ [uses_materialized_logits_with_scalar_] / request_contract_nonflash_materialized_scalar_
  contract_nonflash_decision --> compute_result_decision : completion_run_ [uses_preselected_argmax_with_chunk8_q8_k_] / request_contract_nonflash_preselected_chunk8_q8_k_
  contract_nonflash_decision --> compute_result_decision : completion_run_ [uses_preselected_argmax_with_chunk4_packed_q8_0_] / request_contract_nonflash_preselected_chunk4_packed_q8_0_
  contract_nonflash_decision --> compute_result_decision : completion_run_ [uses_preselected_argmax_with_chunk4_q8_k_] / request_contract_nonflash_preselected_chunk4_q8_k_
  contract_nonflash_decision --> compute_result_decision : completion_run_ [uses_preselected_argmax_with_scalar_] / request_contract_nonflash_preselected_scalar_
  compute_result_decision --> idle : completion_run_ [compute_ok_] / mark_prefill_cached_
  compute_result_decision --> idle : completion_run_ [compute_invalid_request_] / mark_invalid_request_
  compute_result_decision --> idle : completion_run_ [compute_backend_error_] / mark_backend_error_
  idle --> idle : _ [always] / on_unexpected_
  slots --> idle : _ [always] / on_unexpected_
  slots_decision --> idle : _ [always] / on_unexpected_
  snapshot --> idle : _ [always] / on_unexpected_
  snapshot_decision --> idle : _ [always] / on_unexpected_
  contract_runtime_decision --> idle : _ [always] / on_unexpected_
  contract_flash_decision --> idle : _ [always] / on_unexpected_
  contract_nonflash_decision --> idle : _ [always] / on_unexpected_
  compute_result_decision --> idle : _ [always] / on_unexpected_
Loading

Transitions

Source Event Guard Action Target
idle run always none slots
slots completion<run> always request_slots> slots_decision
slots_decision completion<run> slots_ok> none snapshot
slots_decision completion<run> slots_invalid_request> mark_invalid_request> idle
slots_decision completion<run> slots_backend_error> mark_backend_error> idle
snapshot completion<run> always request_memory_snapshot> snapshot_decision
snapshot_decision completion<run> snapshot_ok> none contract_runtime_decision
snapshot_decision completion<run> snapshot_invalid_request> mark_invalid_request> idle
snapshot_decision completion<run> snapshot_backend_error> mark_backend_error> idle
contract_runtime_decision completion<run> flash_runtime_supported> none contract_flash_decision
contract_runtime_decision completion<run> nonflash_runtime_required> none contract_nonflash_decision
contract_flash_decision completion<run> uses_materialized_logits_with_chunk8_q8_k> request_contract_flash_materialized_chunk8_q8_k> compute_result_decision
contract_flash_decision completion<run> uses_materialized_logits_with_chunk4_packed_q8_0> request_contract_flash_materialized_chunk4_packed_q8_0> compute_result_decision
contract_flash_decision completion<run> uses_materialized_logits_with_chunk4_q8_k> request_contract_flash_materialized_chunk4_q8_k> compute_result_decision
contract_flash_decision completion<run> uses_materialized_logits_with_scalar> request_contract_flash_materialized_scalar> compute_result_decision
contract_flash_decision completion<run> uses_preselected_argmax_with_chunk8_q8_k> request_contract_flash_preselected_chunk8_q8_k> compute_result_decision
contract_flash_decision completion<run> uses_preselected_argmax_with_chunk4_packed_q8_0> request_contract_flash_preselected_chunk4_packed_q8_0> compute_result_decision
contract_flash_decision completion<run> uses_preselected_argmax_with_chunk4_q8_k> request_contract_flash_preselected_chunk4_q8_k> compute_result_decision
contract_flash_decision completion<run> uses_preselected_argmax_with_scalar> request_contract_flash_preselected_scalar> compute_result_decision
contract_nonflash_decision completion<run> uses_materialized_logits_with_chunk8_q8_k> request_contract_nonflash_materialized_chunk8_q8_k> compute_result_decision
contract_nonflash_decision completion<run> uses_materialized_logits_with_chunk4_packed_q8_0> request_contract_nonflash_materialized_chunk4_packed_q8_0> compute_result_decision
contract_nonflash_decision completion<run> uses_materialized_logits_with_chunk4_q8_k> request_contract_nonflash_materialized_chunk4_q8_k> compute_result_decision
contract_nonflash_decision completion<run> uses_materialized_logits_with_scalar> request_contract_nonflash_materialized_scalar> compute_result_decision
contract_nonflash_decision completion<run> uses_preselected_argmax_with_chunk8_q8_k> request_contract_nonflash_preselected_chunk8_q8_k> compute_result_decision
contract_nonflash_decision completion<run> uses_preselected_argmax_with_chunk4_packed_q8_0> request_contract_nonflash_preselected_chunk4_packed_q8_0> compute_result_decision
contract_nonflash_decision completion<run> uses_preselected_argmax_with_chunk4_q8_k> request_contract_nonflash_preselected_chunk4_q8_k> compute_result_decision
contract_nonflash_decision completion<run> uses_preselected_argmax_with_scalar> request_contract_nonflash_preselected_scalar> compute_result_decision
compute_result_decision completion<run> compute_ok> mark_prefill_cached> idle
compute_result_decision completion<run> compute_invalid_request> mark_invalid_request> idle
compute_result_decision completion<run> compute_backend_error> mark_backend_error> idle
idle _ always on_unexpected> idle
slots _ always on_unexpected> idle
slots_decision _ always on_unexpected> idle
snapshot _ always on_unexpected> idle
snapshot_decision _ always on_unexpected> idle
contract_runtime_decision _ always on_unexpected> idle
contract_flash_decision _ always on_unexpected> idle
contract_nonflash_decision _ always on_unexpected> idle
compute_result_decision _ always on_unexpected> idle