-
Notifications
You must be signed in to change notification settings - Fork 57
CVS-175736-[OVEP] Enable stateful mode for Phi-silica models #821
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
65bbecc
1e132f3
25c6976
513e198
d7ee534
3c1c4c3
7c1720d
2041402
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -59,6 +59,17 @@ bool ModelHasInputOutputNames(std::shared_ptr<ov::Model> model, const std::strin | |||||
| return false; | ||||||
| } | ||||||
|
|
||||||
| std::string GetInputOutputName(std::shared_ptr<ov::Model> ov_model, | ||||||
| const std::vector<std::string>& candidate_names) { | ||||||
| for (const auto& name : candidate_names) { | ||||||
| if (ModelHasInputOutputNames(ov_model, name)) { | ||||||
| return name; | ||||||
| } | ||||||
| } | ||||||
| // Return the first candidate as default if none are found | ||||||
| return candidate_names.empty() ? "" : candidate_names[0]; | ||||||
| } | ||||||
|
|
||||||
| void FuseCacheReorder(std::shared_ptr<ov::Model> ov_model, | ||||||
| std::vector<std::string>& not_kv_inputs, | ||||||
| const std::vector<std::string>& key_value_input_names, | ||||||
|
|
@@ -67,10 +78,15 @@ void FuseCacheReorder(std::shared_ptr<ov::Model> ov_model, | |||||
| throw std::runtime_error("Model already has fused cache"); | ||||||
| } | ||||||
|
|
||||||
| std::string main_input_name = "inputs_embeds"; | ||||||
| if (ModelHasInputOutputNames(ov_model, "input_ids")) { | ||||||
| main_input_name = "input_ids"; | ||||||
| } | ||||||
| // Define input name candidates in priority order | ||||||
| const std::vector<std::string> input_name_candidates = { | ||||||
| "inputs_embeds", // Default fallback | ||||||
| "input_ids", // Most common | ||||||
| "input_hidden_states", // Alternative | ||||||
| "/model/embed_tokens/Gather_output_0" // Specific model type | ||||||
| }; | ||||||
|
|
||||||
| std::string main_input_name = GetInputOutputName(ov_model, input_name_candidates); | ||||||
|
|
||||||
| auto input_batch = ov_model->input(main_input_name).get_partial_shape()[0]; | ||||||
|
|
||||||
|
|
@@ -121,20 +137,22 @@ void MakeStateful(std::shared_ptr<ov::Model>& ov_model, | |||||
| void PatchStatefulDecoder(std::shared_ptr<ov::Model> model) { | ||||||
| std::vector<std::string> key_value_input_names; | ||||||
| std::vector<std::string> not_kv_inputs; | ||||||
| for (const ov::Output<ov::Node>& input : model->inputs()) { | ||||||
| auto& names = input.get_names(); | ||||||
|
|
||||||
| bool found = false; | ||||||
| for (auto& name : names) { | ||||||
| if (name.find("key_values") != std::string::npos) { | ||||||
| key_value_input_names.push_back(name); | ||||||
| const auto& params = model->get_parameters(); | ||||||
| bool found = false; | ||||||
| for (size_t i = 0; i < params.size(); i++) { | ||||||
| auto param_name = params.at(i)->output(0).get_any_name(); | ||||||
|
||||||
| auto param_name = params.at(i)->output(0).get_any_name(); | |
| auto param_name = params[i]->output(0).get_any_name(); |
RyanMetcalfeInt8 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
Copilot
AI
Oct 28, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The 'found' variable is never reset to false between iterations, causing incorrect classification of subsequent parameters. Reset 'found = false' at the beginning of each loop iteration.
Uh oh!
There was an error while loading. Please reload this page.