|
198 | 198 | 'basic', |
199 | 199 | ], |
200 | 200 |
|
201 | | - 'mistralai/Mistral-7B-Instruct-v0.1': [ |
202 | | - 'basic', |
203 | | - ], |
| 201 | + # Remove gated model that requires authentication |
| 202 | + # 'mistralai/Mistral-7B-Instruct-v0.1': [ |
| 203 | + # 'basic', |
| 204 | + # ], |
204 | 205 |
|
205 | 206 | 'HuggingFaceH4/zephyr-7b-beta': [ |
206 | 207 | 'system', |
@@ -327,29 +328,32 @@ def generate_tokenizer_tests(): |
327 | 328 |
|
328 | 329 | for tokenizer_id in TOKENIZERS_WITH_CHAT_TEMPLATES: |
329 | 330 | print(f'Generating chat templates for {tokenizer_id}') |
330 | | - tokenizer = AutoTokenizer.from_pretrained( |
331 | | - tokenizer_id, |
332 | | - |
333 | | - # TODO: Remove once https://github.com/huggingface/transformers/pull/26678 is fixed |
334 | | - use_fast='llama' not in tokenizer_id, |
335 | | - ) |
336 | | - tokenizer_results = [] |
337 | | - for key in TOKENIZERS_WITH_CHAT_TEMPLATES[tokenizer_id]: |
338 | | - messages = CHAT_MESSAGES_EXAMPLES[key] |
| 331 | + try: |
| 332 | + tokenizer = AutoTokenizer.from_pretrained( |
| 333 | + tokenizer_id, |
| 334 | + # TODO: Remove once https://github.com/huggingface/transformers/pull/26678 is fixed |
| 335 | + use_fast='llama' not in tokenizer_id, |
| 336 | + ) |
| 337 | + tokenizer_results = [] |
| 338 | + for key in TOKENIZERS_WITH_CHAT_TEMPLATES[tokenizer_id]: |
| 339 | + messages = CHAT_MESSAGES_EXAMPLES[key] |
339 | 340 |
|
340 | | - for add_generation_prompt, tokenize in product([True, False], [True, False]): |
341 | | - tokenizer_results.append(dict( |
342 | | - messages=messages, |
343 | | - add_generation_prompt=add_generation_prompt, |
344 | | - tokenize=tokenize, |
345 | | - target=tokenizer.apply_chat_template( |
346 | | - messages, |
| 341 | + for add_generation_prompt, tokenize in product([True, False], [True, False]): |
| 342 | + tokenizer_results.append(dict( |
| 343 | + messages=messages, |
347 | 344 | add_generation_prompt=add_generation_prompt, |
348 | 345 | tokenize=tokenize, |
349 | | - ), |
350 | | - )) |
351 | | - |
352 | | - template_results[tokenizer_id] = tokenizer_results |
| 346 | + target=tokenizer.apply_chat_template( |
| 347 | + messages, |
| 348 | + add_generation_prompt=add_generation_prompt, |
| 349 | + tokenize=tokenize, |
| 350 | + ), |
| 351 | + )) |
| 352 | + |
| 353 | + template_results[tokenizer_id] = tokenizer_results |
| 354 | + except (OSError, EnvironmentError) as e: |
| 355 | + print(f" - Skipping {tokenizer_id}: {str(e)}") |
| 356 | + continue |
353 | 357 |
|
354 | 358 | return dict( |
355 | 359 | tokenization=tokenization_results, |
|
0 commit comments