-
-
Notifications
You must be signed in to change notification settings - Fork 551
Add optional detailed token counts #4657
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,7 @@ | ||
| import json | ||
| import html | ||
| import os | ||
| import re | ||
| import shutil | ||
| import platform | ||
| import subprocess | ||
|
|
@@ -422,32 +423,50 @@ def copy_seed(gen_info_string: str, index: int): | |
| reuse_seed_btn.click(fn=copy_seed, _js="(x, y) => [x, selected_gallery_index()]", show_progress='hidden', inputs=[generation_info, dummy_component], outputs=[seed, dummy_component, subseed_strength]) | ||
|
|
||
|
|
||
| def update_token_counter(text): | ||
| token_count = 0 | ||
| max_length = 75 | ||
| def update_token_counter(text: str): | ||
| if shared.state.job_count > 0: | ||
| shared.log.debug('Tokenizer busy') | ||
| return f"<span class='gr-box gr-text-input'>{token_count}/{max_length}</span>" | ||
| from modules import extra_networks | ||
| if isinstance(text, list): | ||
| prompt, _ = extra_networks.parse_prompts(text) | ||
| else: | ||
| prompt, _ = extra_networks.parse_prompt(text) | ||
| return gr.update(value="<span class='gr-box gr-text-input'>--/--</span>", visible=True) | ||
|
|
||
| from modules.extra_networks import parse_prompt | ||
|
|
||
| count_formatted = '0' | ||
| visible = False | ||
|
|
||
| prompt, _ = parse_prompt(text) | ||
| prompt_list = [prompt] | ||
| ids = [] | ||
| if shared.sd_loaded and hasattr(shared.sd_model, 'tokenizer') and shared.sd_model.tokenizer is not None: | ||
| if shared.opts.prompt_detailed_tokens: | ||
| p_split = re.compile(r'\bBREAK\b|\n' if shared.opts.sd_textencder_linebreak else r'\bBREAK\b') | ||
| prompt_list = re.split(p_split, prompt) | ||
|
|
||
| tokenizer = shared.sd_model.tokenizer | ||
| # For multi-modal processors (e.g., PixtralProcessor), use the underlying text tokenizer | ||
| if hasattr(tokenizer, 'tokenizer') and tokenizer.tokenizer is not None: | ||
| tokenizer = tokenizer.tokenizer | ||
| has_bos_token = hasattr(tokenizer, 'bos_token_id') and tokenizer.bos_token_id is not None | ||
| has_eos_token = hasattr(tokenizer, 'eos_token_id') and tokenizer.eos_token_id is not None | ||
| try: | ||
| ids = tokenizer(prompt) | ||
| ids = getattr(ids, 'input_ids', []) | ||
| except Exception: | ||
| ids = [] | ||
| token_count = len(ids) - int(has_bos_token) - int(has_eos_token) | ||
| has_bos_token = getattr(tokenizer, 'bos_token_id', None) is not None | ||
| has_eos_token = getattr(tokenizer, 'eos_token_id', None) is not None | ||
| model_max_length = getattr(tokenizer, 'model_max_length', 0) | ||
| max_length = model_max_length - int(has_bos_token) - int(has_eos_token) | ||
| if max_length is None or max_length < 0 or max_length > 10000: | ||
| max_length = 0 | ||
| return gr.update(value=f"<span class='gr-box gr-text-input'>{token_count}/{max_length}</span>", visible=token_count > 0) | ||
|
|
||
| try: | ||
| try: | ||
| ids = getattr(tokenizer(prompt_list), 'input_ids', []) | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this assumes that tokenizer works with list as input, that's the only thing i'm not sure about for all different tokenizers, but ok for now and if needed, it can be changed later. |
||
| except TypeError: | ||
| for p in prompt_list: | ||
| ids.append(getattr(tokenizer(p), 'input_ids', [])) | ||
| except Exception as e: | ||
| shared.log.warning("Token counter:", e) | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should be guarded with warn_once so its not flooding the log? |
||
| return gr.update(value=f"<span class='gr-box gr-text-input'>??/{max_length}</span>", visible=True) | ||
|
|
||
| token_counts = [len(group) - int(has_bos_token) - int(has_eos_token) for group in ids] | ||
| if len(token_counts) > 1: | ||
| visible = True | ||
| count_formatted = f"{token_counts} {sum(token_counts)}" if shared.opts.prompt_detailed_tokens else str(sum(token_counts)) | ||
| elif len(token_counts) == 1 and token_counts[0] > 0: | ||
| visible = True | ||
| count_formatted = str(token_counts[0]) | ||
| return gr.update(value=f"<span class='gr-box gr-text-input'>{count_formatted}/{max_length}</span>", visible=visible) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i don't think we need this as tunable. if it works, it should be on and thats it.