Skip to content

Commit 1d7cfd2

Browse files
committed
revert tests
1 parent 377dffc commit 1d7cfd2

File tree

1 file changed

+23
-60
lines changed

1 file changed

+23
-60
lines changed

tests/generate_tests.py

Lines changed: 23 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -3,24 +3,13 @@
33

44
import json
55
import os
6-
import sys
76
from itertools import product
87

98
from transformers import AutoTokenizer, AutoConfig
109
import numpy as np
1110

1211
from scripts.supported_models import SUPPORTED_MODELS
1312

14-
# Handle protobuf compatibility issues by setting the environment variable if not already set
15-
# This is one of the workarounds mentioned in the error message
16-
if 'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION' not in os.environ:
17-
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'
18-
19-
# Check if we should run in local mode (safer settings)
20-
LOCAL_MODE = '--local' in sys.argv
21-
if LOCAL_MODE:
22-
print("Running in local mode with safer settings")
23-
2413
# List of tokenizers where the model isn't yet supported, but the tokenizer is
2514
ADDITIONAL_TOKENIZERS_TO_TEST = {
2615
'falcon': [
@@ -69,12 +58,6 @@
6958

7059
# TODO: remove when https://github.com/huggingface/transformers/issues/28096 is addressed
7160
'RajuKandasamy/tamillama_tiny_30m',
72-
73-
# TODO: remove when need for trust_remote_code can be addressed in CI
74-
'monologg/kobert',
75-
76-
# TODO: remove when protobuf compatibility issues are resolved
77-
'dangvantuan/sentence-camembert-large',
7861
]
7962

8063
MAX_TESTS = {
@@ -286,21 +269,15 @@ def generate_tokenizer_tests():
286269
tokenizer = AutoTokenizer.from_pretrained(
287270
tokenizer_name,
288271
use_fast=False,
289-
trust_remote_code=True,
290272
)
291273
decoder_tokenizer = AutoTokenizer.from_pretrained(
292274
tokenizer_name,
293275
use_fast=True,
294-
trust_remote_code=True,
295276
)
296277

297278
else:
298-
# In local mode, always use slow tokenizers to avoid protobuf issues
299-
use_fast = not LOCAL_MODE
300279
decoder_tokenizer = tokenizer = AutoTokenizer.from_pretrained(
301-
tokenizer_name,
302-
trust_remote_code=True,
303-
use_fast=use_fast)
280+
tokenizer_name)
304281

305282
except (KeyError, EnvironmentError):
306283
# If a KeyError/EnvironmentError is raised from the AutoTokenizer, it
@@ -347,42 +324,29 @@ def generate_tokenizer_tests():
347324

348325
for tokenizer_id in TOKENIZERS_WITH_CHAT_TEMPLATES:
349326
print(f'Generating chat templates for {tokenizer_id}')
327+
tokenizer = AutoTokenizer.from_pretrained(
328+
tokenizer_id,
350329

351-
try:
352-
# In local mode, use safer settings
353-
use_fast = not LOCAL_MODE or 'llama' not in tokenizer_id
330+
# TODO: Remove once https://github.com/huggingface/transformers/pull/26678 is fixed
331+
use_fast='llama' not in tokenizer_id,
332+
)
333+
tokenizer_results = []
334+
for key in TOKENIZERS_WITH_CHAT_TEMPLATES[tokenizer_id]:
335+
messages = CHAT_MESSAGES_EXAMPLES[key]
354336

355-
tokenizer = AutoTokenizer.from_pretrained(
356-
tokenizer_id,
357-
use_fast=use_fast,
358-
trust_remote_code=True,
359-
)
360-
tokenizer_results = []
361-
for key in TOKENIZERS_WITH_CHAT_TEMPLATES[tokenizer_id]:
362-
messages = CHAT_MESSAGES_EXAMPLES[key]
363-
364-
for add_generation_prompt, tokenize in product([True, False], [True, False]):
365-
try:
366-
result = tokenizer.apply_chat_template(
367-
messages,
368-
add_generation_prompt=add_generation_prompt,
369-
tokenize=tokenize,
370-
)
371-
tokenizer_results.append(dict(
372-
messages=messages,
373-
add_generation_prompt=add_generation_prompt,
374-
tokenize=tokenize,
375-
target=result,
376-
))
377-
except ValueError as e:
378-
print(f" - Skipping template for {tokenizer_id} with {key}: {str(e)}")
379-
continue
337+
for add_generation_prompt, tokenize in product([True, False], [True, False]):
338+
tokenizer_results.append(dict(
339+
messages=messages,
340+
add_generation_prompt=add_generation_prompt,
341+
tokenize=tokenize,
342+
target=tokenizer.apply_chat_template(
343+
messages,
344+
add_generation_prompt=add_generation_prompt,
345+
tokenize=tokenize,
346+
),
347+
))
380348

381-
if tokenizer_results:
382-
template_results[tokenizer_id] = tokenizer_results
383-
except Exception as e:
384-
print(f" - Error processing tokenizer {tokenizer_id}: {str(e)}")
385-
continue
349+
template_results[tokenizer_id] = tokenizer_results
386350

387351
return dict(
388352
tokenization=tokenization_results,
@@ -399,7 +363,7 @@ def generate_config_tests():
399363
print(' -', config_name)
400364
try:
401365
# Load config
402-
config = AutoConfig.from_pretrained(config_name, trust_remote_code=True)
366+
config = AutoConfig.from_pretrained(config_name)
403367
except Exception:
404368
# Something went wrong, skip this config
405369
continue
@@ -464,5 +428,4 @@ def main():
464428

465429

466430
if __name__ == "__main__":
467-
main()
468-
431+
main()

0 commit comments

Comments
 (0)