Source: emel/generator/prefill/sm.hpp
stateDiagram-v2
direction TB
[*] --> idle
idle --> slots : run [always] / none
slots --> slots_decision : completion_run_ [always] / request_slots_
slots_decision --> snapshot : completion_run_ [slots_ok_] / none
slots_decision --> idle : completion_run_ [slots_invalid_request_] / mark_invalid_request_
slots_decision --> idle : completion_run_ [slots_backend_error_] / mark_backend_error_
snapshot --> snapshot_decision : completion_run_ [always] / request_memory_snapshot_
snapshot_decision --> contract_runtime_decision : completion_run_ [snapshot_ok_] / none
snapshot_decision --> idle : completion_run_ [snapshot_invalid_request_] / mark_invalid_request_
snapshot_decision --> idle : completion_run_ [snapshot_backend_error_] / mark_backend_error_
contract_runtime_decision --> contract_flash_decision : completion_run_ [flash_runtime_supported_] / none
contract_runtime_decision --> contract_nonflash_decision : completion_run_ [nonflash_runtime_required_] / none
contract_flash_decision --> compute_result_decision : completion_run_ [uses_materialized_logits_with_chunk8_q8_k_] / request_contract_flash_materialized_chunk8_q8_k_
contract_flash_decision --> compute_result_decision : completion_run_ [uses_materialized_logits_with_chunk4_packed_q8_0_] / request_contract_flash_materialized_chunk4_packed_q8_0_
contract_flash_decision --> compute_result_decision : completion_run_ [uses_materialized_logits_with_chunk4_q8_k_] / request_contract_flash_materialized_chunk4_q8_k_
contract_flash_decision --> compute_result_decision : completion_run_ [uses_materialized_logits_with_scalar_] / request_contract_flash_materialized_scalar_
contract_flash_decision --> compute_result_decision : completion_run_ [uses_preselected_argmax_with_chunk8_q8_k_] / request_contract_flash_preselected_chunk8_q8_k_
contract_flash_decision --> compute_result_decision : completion_run_ [uses_preselected_argmax_with_chunk4_packed_q8_0_] / request_contract_flash_preselected_chunk4_packed_q8_0_
contract_flash_decision --> compute_result_decision : completion_run_ [uses_preselected_argmax_with_chunk4_q8_k_] / request_contract_flash_preselected_chunk4_q8_k_
contract_flash_decision --> compute_result_decision : completion_run_ [uses_preselected_argmax_with_scalar_] / request_contract_flash_preselected_scalar_
contract_nonflash_decision --> compute_result_decision : completion_run_ [uses_materialized_logits_with_chunk8_q8_k_] / request_contract_nonflash_materialized_chunk8_q8_k_
contract_nonflash_decision --> compute_result_decision : completion_run_ [uses_materialized_logits_with_chunk4_packed_q8_0_] / request_contract_nonflash_materialized_chunk4_packed_q8_0_
contract_nonflash_decision --> compute_result_decision : completion_run_ [uses_materialized_logits_with_chunk4_q8_k_] / request_contract_nonflash_materialized_chunk4_q8_k_
contract_nonflash_decision --> compute_result_decision : completion_run_ [uses_materialized_logits_with_scalar_] / request_contract_nonflash_materialized_scalar_
contract_nonflash_decision --> compute_result_decision : completion_run_ [uses_preselected_argmax_with_chunk8_q8_k_] / request_contract_nonflash_preselected_chunk8_q8_k_
contract_nonflash_decision --> compute_result_decision : completion_run_ [uses_preselected_argmax_with_chunk4_packed_q8_0_] / request_contract_nonflash_preselected_chunk4_packed_q8_0_
contract_nonflash_decision --> compute_result_decision : completion_run_ [uses_preselected_argmax_with_chunk4_q8_k_] / request_contract_nonflash_preselected_chunk4_q8_k_
contract_nonflash_decision --> compute_result_decision : completion_run_ [uses_preselected_argmax_with_scalar_] / request_contract_nonflash_preselected_scalar_
compute_result_decision --> idle : completion_run_ [compute_ok_] / mark_prefill_cached_
compute_result_decision --> idle : completion_run_ [compute_invalid_request_] / mark_invalid_request_
compute_result_decision --> idle : completion_run_ [compute_backend_error_] / mark_backend_error_
idle --> idle : _ [always] / on_unexpected_
slots --> idle : _ [always] / on_unexpected_
slots_decision --> idle : _ [always] / on_unexpected_
snapshot --> idle : _ [always] / on_unexpected_
snapshot_decision --> idle : _ [always] / on_unexpected_
contract_runtime_decision --> idle : _ [always] / on_unexpected_
contract_flash_decision --> idle : _ [always] / on_unexpected_
contract_nonflash_decision --> idle : _ [always] / on_unexpected_
compute_result_decision --> idle : _ [always] / on_unexpected_