diff --git a/garak/attempt.py b/garak/attempt.py index d164898cc..1835e256a 100644 --- a/garak/attempt.py +++ b/garak/attempt.py @@ -106,7 +106,7 @@ def from_dict(cls, value: dict): raise ValueError("Expected `role` in Turn dict") message = entity.pop("content", {}) if isinstance(message, str): - content = Message(text=message) + content = Message(message, str) else: content = Message(**message) return cls(role=role, content=content) @@ -156,7 +156,7 @@ class Attempt: :param status: The status of this attempt; ``ATTEMPT_NEW``, ``ATTEMPT_STARTED``, or ``ATTEMPT_COMPLETE`` :type status: int :param prompt: The processed prompt that will presented to the generator - :type prompt: Union[str|Turn|Conversation] + :type prompt: Message|Conversation :param probe_classname: Name of the probe class that originated this ``Attempt`` :type probe_classname: str :param probe_params: Non-default parameters logged by the probe @@ -223,11 +223,16 @@ def __init__( if isinstance(prompt, Conversation): self.conversations = [prompt] elif isinstance(prompt, str): - msg = Message(text=prompt, lang=lang) + raise ValueError( + "attempt Prompt must be Message or Conversation, not string" + ) + # msg = Message(text=prompt, lang=lang) elif isinstance(prompt, Message): msg = prompt else: - raise TypeError("prompts must be of type str | Message | Conversation") + raise TypeError( + "attempt prompts must be of type Message | Conversation" + ) if not hasattr(self, "conversations"): self.conversations = [Conversation([Turn("user", msg)])] self.prompt = self.conversations[0] @@ -321,14 +326,17 @@ def all_outputs(self) -> List[Message]: return all_outputs @prompt.setter - def prompt(self, value: Union[str | Message | Conversation]): + def prompt(self, value: Message | Conversation): if hasattr(self, "_prompt"): raise TypeError("prompt cannot be changed once set") if value is None: raise TypeError("'None' prompts are not valid") if isinstance(value, str): # note this does not contain a lang - self._prompt = Conversation([Turn("user", Message(text=value))]) + raise TypeError( + "Attempt.prompt must be Message or Conversation, not bare string" + ) + # self._prompt = Conversation([Turn("user", Message(text=value))]) if isinstance(value, Message): # make a copy to store an immutable object self._prompt = Conversation([Turn("user", Message(**asdict(value)))]) diff --git a/garak/detectors/exploitation.py b/garak/detectors/exploitation.py index b5d8ba4e2..fe455888a 100644 --- a/garak/detectors/exploitation.py +++ b/garak/detectors/exploitation.py @@ -136,7 +136,7 @@ class SQLiSuccess(StringDetector): substrings = ["SUCCESS"] doc_uri = "https://cwe.mitre.org/data/definitions/89.html" - lang_spec = "en" + lang_spec = "*" tags = [ "avid-effect:security:S0301", "avid-effect:security:S0403", @@ -163,7 +163,7 @@ class SQLiEcho(Detector): """ doc_uri = "https://cwe.mitre.org/data/definitions/89.html" - lang_spec = "en" + lang_spec = "*" tags = [ "avid-effect:security:S0301", "avid-effect:security:S0403", diff --git a/garak/detectors/judge.py b/garak/detectors/judge.py index d5cf9a2fc..33049eecb 100644 --- a/garak/detectors/judge.py +++ b/garak/detectors/judge.py @@ -5,7 +5,7 @@ from garak import _config, _plugins from garak.resources.red_team.evaluation import EvaluationJudge, get_token_limit -from garak.attempt import Attempt +from garak.attempt import Attempt, Message from garak.detectors.base import Detector from garak.exception import GarakException, BadGeneratorException from garak.generators.openai import OpenAICompatible @@ -147,9 +147,7 @@ def __init__(self, config_root=_config): def detect(self, attempt: Attempt) -> List[float | None]: # these need to eval individually of the methods called need to support `None` - return [ - self.on_topic_score([o.text])[0] if o else None for o in attempt.outputs - ] + return [self.on_topic_score([o])[0] if o else None for o in attempt.outputs] class RefusalOnlyAdversarial(Refusal): diff --git a/garak/probes/base.py b/garak/probes/base.py index 891db033e..28e63aa50 100644 --- a/garak/probes/base.py +++ b/garak/probes/base.py @@ -203,7 +203,11 @@ def _postprocess_hook( return attempt def _mint_attempt( - self, prompt=None, seq=None, notes=None, lang="*" + self, + prompt: str | garak.attempt.Message | garak.attempt.Conversation | None = None, + seq=None, + notes=None, + lang="*", ) -> garak.attempt.Attempt: """function for creating a new attempt given a prompt""" turns = [] @@ -376,7 +380,7 @@ def probe(self, generator) -> Iterable[garak.attempt.Attempt]: colour=f"#{garak.resources.theme.LANGPROVIDER_RGB}", desc="Preparing prompts", ) - if isinstance(prompts[0], str): + if isinstance(prompts[0], str): # self.prompts can be strings localized_prompts = self.langprovider.get_text( prompts, notify_callback=preparation_bar.update ) diff --git a/tests/buffs/test_buffs.py b/tests/buffs/test_buffs.py index b81f16cc7..520fb371d 100644 --- a/tests/buffs/test_buffs.py +++ b/tests/buffs/test_buffs.py @@ -36,6 +36,6 @@ def test_buff_load_and_transform(klassname): pytest.skip() assert isinstance(b, garak.buffs.base.Buff) a = attempt.Attempt(lang=b.lang) - a.prompt = "I'm just a plain and simple tailor" + a.prompt = attempt.Message("I'm just a plain and simple tailor") buffed_a = list(b.transform(a)) # unroll the generator assert isinstance(buffed_a, list) diff --git a/tests/detectors/test_detectors_always.py b/tests/detectors/test_detectors_always.py index 9087247f2..0f8f82b77 100644 --- a/tests/detectors/test_detectors_always.py +++ b/tests/detectors/test_detectors_always.py @@ -3,10 +3,9 @@ import pytest -from garak.attempt import Attempt +from garak.attempt import Attempt, Message import garak._plugins import garak.detectors.base -import garak.detectors.always ALWAYS_DETECTOR_NAMES = [ classname @@ -23,7 +22,7 @@ def test_load(detectorname): def test_fail(): d = garak._plugins.load_plugin("detectors.always.Fail") - a = Attempt(prompt="") + a = Attempt(prompt=Message()) a.outputs = [""] d.detect(a) assert d.detect(a) == [1.0] @@ -31,14 +30,14 @@ def test_fail(): def test_pass(): d = garak._plugins.load_plugin("detectors.always.Pass") - a = Attempt(prompt="") + a = Attempt(prompt=Message()) a.outputs = [""] assert d.detect(a) == [0.0] def test_passthru(): d = garak._plugins.load_plugin("detectors.always.Passthru") - a = Attempt(prompt="") + a = Attempt(prompt=Message()) a.outputs = [""] a.detector_results = {"always.Fail": [0.5]} assert d.detect(a) == [0.5] diff --git a/tests/detectors/test_detectors_ansiescape.py b/tests/detectors/test_detectors_ansiescape.py index 1516fbf4c..bcaa479a2 100644 --- a/tests/detectors/test_detectors_ansiescape.py +++ b/tests/detectors/test_detectors_ansiescape.py @@ -16,12 +16,12 @@ def test_ansiescape_raw_strings(): d = _plugins.load_plugin("detectors.ansiescape.Raw") for payload in garak.resources.ansi.LIVE_PAYLOADS: a = garak.attempt.Attempt(lang=d.lang_spec.split(",")[0]) - a.prompt = "test prompt" + a.prompt = garak.attempt.Message("test prompt") a.outputs = [payload] assert d.detect(a) == [1.0], f"{payload} contains a raw control string" for payload in garak.resources.ansi.ESCAPED_PAYLOADS: a = garak.attempt.Attempt(lang=d.lang_spec.split(",")[0]) - a.prompt = "test prompt" + a.prompt = garak.attempt.Message("test prompt") a.outputs = [payload] assert d.detect(a) == [0.0], f"{payload} only contains escaped control string" @@ -35,11 +35,11 @@ def test_ansiescape_escaped_strings(): d = _plugins.load_plugin("detectors.ansiescape.Escaped") for payload in garak.resources.ansi.ESCAPED_PAYLOADS: a = garak.attempt.Attempt(lang=d.lang_spec.split(",")[0]) - a.prompt = "test prompt" + a.prompt = garak.attempt.Message("test prompt") a.outputs = [payload] assert d.detect(a) == [1.0], f"{payload} contains escaped control string" for payload in garak.resources.ansi.LIVE_PAYLOADS: a = garak.attempt.Attempt(lang=d.lang_spec.split(",")[0]) - a.prompt = "test prompt" + a.prompt = garak.attempt.Message("test prompt") a.outputs = [payload] assert d.detect(a) == [0.0], f"{payload} only contains raw control string" diff --git a/tests/detectors/test_detectors_base.py b/tests/detectors/test_detectors_base.py index cc4681b4b..cb291645c 100644 --- a/tests/detectors/test_detectors_base.py +++ b/tests/detectors/test_detectors_base.py @@ -9,7 +9,7 @@ def test_filedetector_nonexist(): d = garak.detectors.base.FileDetector() - a = Attempt(prompt="") + a = Attempt(prompt=Message(text="")) a.outputs = [Message(f) for f in [None, "", "/non/existing/file"]] a.notes["format"] = d.valid_format assert ( @@ -32,7 +32,7 @@ def string_detector(): def test_case_sensitive_exact_match(string_detector): """Test that exact matches are detected when case_sensitive=True""" for test_str in TEST_STRINGS: - attempt = Attempt(prompt="Hello") + attempt = Attempt(prompt=Message(text="Hello")) attempt.outputs = [Message(test_str)] results = string_detector.detect(attempt, case_sensitive=True) assert results == [1.0], f"Failed to detect exact match for '{test_str}'" @@ -50,7 +50,7 @@ def test_case_sensitive_no_match(string_detector): ] for test_str in test_cases: - attempt = Attempt(prompt="Hello") + attempt = Attempt(prompt=Message(text="Hello")) attempt.outputs = [Message(test_str)] results = string_detector.detect(attempt, case_sensitive=True) assert results == [ @@ -70,7 +70,7 @@ def test_case_insensitive_match(string_detector): ] for original, variant in test_cases: - attempt = Attempt(prompt="Hello") + attempt = Attempt(prompt=Message(text="Hello")) attempt.outputs = [Message(variant)] results = string_detector.detect(attempt, case_sensitive=False) assert results == [ @@ -80,7 +80,7 @@ def test_case_insensitive_match(string_detector): def test_multiple_outputs(string_detector): """Test detection across multiple outputs with mixed case sensitivity""" - attempt = Attempt(prompt="Hello") + attempt = Attempt(prompt=Message(text="Hello")) attempt.outputs = [ Message(s) for s in [TEST_STRINGS[0], TEST_STRINGS[0].upper(), "no match"] ] @@ -104,11 +104,13 @@ def test_multiple_outputs(string_detector): def test_none_outputs(string_detector): """Test handling of None outputs""" - attempt = Attempt(prompt="Hello") + attempt = Attempt(prompt=Message(text="Hello")) attempt.outputs = [Message(s) for s in [None, TEST_STRINGS[0], None]] results = string_detector.detect(attempt, case_sensitive=True) - assert len(results) == len(attempt.outputs), "Detector result for each output including None" + assert len(results) == len( + attempt.outputs + ), "Detector result for each output including None" assert results == [None, 1.0, None], "Failed to handle None outputs correctly" @@ -145,7 +147,7 @@ def test_matchtype_with_case_sensitivity(matchtype): detector.matchtype = matchtype for text, should_match_sensitive in MATCHTYPE_AND_CASING_CASES[matchtype]: - attempt = Attempt(prompt="Hello") + attempt = Attempt(prompt=Message(text="Hello")) attempt.outputs = [Message(text)] # Test case sensitive @@ -174,7 +176,7 @@ def test_stringdetector_word_boundary_matching(): ] for text, match in test_cases: - attempt = Attempt(prompt="") + attempt = Attempt(prompt=Message(text="")) attempt.outputs = [Message(text)] results = detector.detect(attempt, case_sensitive=True) assert results == [ @@ -192,7 +194,7 @@ def test_stringdetector_startswith(): ] for text, match in test_cases: - attempt = Attempt(prompt="") + attempt = Attempt(prompt=Message(text="")) attempt.outputs = [Message(text)] results = detector.detect(attempt, case_sensitive=True) assert results == [ diff --git a/tests/detectors/test_detectors_continuation.py b/tests/detectors/test_detectors_continuation.py index 920668b16..b30e2e9fa 100644 --- a/tests/detectors/test_detectors_continuation.py +++ b/tests/detectors/test_detectors_continuation.py @@ -29,7 +29,7 @@ def test_continuation_load(): def test_continuation_1_matching_trigger(): d = _plugins.load_plugin("detectors.continuation.Continuation") - a = garak.attempt.Attempt(prompt="test prompts") + a = garak.attempt.Attempt(prompt=garak.attempt.Message(text="test prompts")) a.notes["triggers"] = [TEST_TRIGGER] a.outputs = [TEST_TRIGGER] r = d.detect(a) @@ -40,7 +40,9 @@ def test_continuation_1_matching_trigger(): def test_continuation_1_split_trigger(split_point): d = _plugins.load_plugin("detectors.continuation.Continuation") - a = garak.attempt.Attempt(prompt="test prompts " + TEST_TRIGGER[:split_point]) + a = garak.attempt.Attempt( + prompt=garak.attempt.Message(text="test prompts " + TEST_TRIGGER[:split_point]) + ) a.notes["triggers"] = [TEST_TRIGGER] a.outputs = [TEST_TRIGGER[split_point:]] r = d.detect(a) @@ -50,7 +52,7 @@ def test_continuation_1_split_trigger(split_point): def test_non_continuation(): d = _plugins.load_plugin("detectors.continuation.Continuation") - a = garak.attempt.Attempt(prompt="test prompts") + a = garak.attempt.Attempt(prompt=garak.attempt.Message(text="test prompts")) a.notes["triggers"] = [TEST_TRIGGER[:2]] a.outputs = [f" {TEST_TRIGGER[2:]}"] r = d.detect(a) @@ -60,7 +62,7 @@ def test_non_continuation(): def test_continuation_case_insensitive(): d = _plugins.load_plugin("detectors.continuation.Continuation") - a = garak.attempt.Attempt(prompt="test prompts") + a = garak.attempt.Attempt(prompt=garak.attempt.Message(text="test prompts")) a.notes["triggers"] = [TEST_TRIGGER.lower()] a.outputs = [TEST_TRIGGER.upper()] r = d.detect(a) @@ -70,7 +72,7 @@ def test_continuation_case_insensitive(): def test_continuation_multitrigger_matchfirst(): d = _plugins.load_plugin("detectors.continuation.Continuation") - a = garak.attempt.Attempt(prompt="test prompts") + a = garak.attempt.Attempt(prompt=garak.attempt.Message(text="test prompts")) a.notes["triggers"] = [TEST_TRIGGER, "other string"] a.outputs = [TEST_TRIGGER] r = d.detect(a) @@ -80,7 +82,7 @@ def test_continuation_multitrigger_matchfirst(): def test_continuation_multitrigger_matchsecond(): d = _plugins.load_plugin("detectors.continuation.Continuation") - a = garak.attempt.Attempt(prompt="test prompts") + a = garak.attempt.Attempt(prompt=garak.attempt.Message(text="test prompts")) a.notes["triggers"] = ["other string", TEST_TRIGGER] a.outputs = [TEST_TRIGGER] r = d.detect(a) @@ -90,7 +92,7 @@ def test_continuation_multitrigger_matchsecond(): def test_continuation_multitrigger_matchnone(): d = _plugins.load_plugin("detectors.continuation.Continuation") - a = garak.attempt.Attempt(prompt="test prompts") + a = garak.attempt.Attempt(prompt=garak.attempt.Message(text="test prompts")) a.notes["triggers"] = ["cardassia", "prime"] a.outputs = [TEST_TRIGGER] r = d.detect(a) @@ -100,7 +102,7 @@ def test_continuation_multitrigger_matchnone(): def test_continuation_empty_triggers(): d = _plugins.load_plugin("detectors.continuation.Continuation") - a = garak.attempt.Attempt(prompt="test prompts") + a = garak.attempt.Attempt(prompt=garak.attempt.Message(text="test prompts")) a.notes["triggers"] = [] a.outputs = [TEST_TRIGGER] r = d.detect(a) @@ -110,7 +112,7 @@ def test_continuation_empty_triggers(): def test_continuation_triggers_not_trigger(): d = _plugins.load_plugin("detectors.continuation.Continuation") - a = garak.attempt.Attempt(prompt="test prompts") + a = garak.attempt.Attempt(prompt=garak.attempt.Message(text="test prompts")) a.notes["trigger"] = TEST_TRIGGER a.outputs = [TEST_TRIGGER] r = d.detect(a) diff --git a/tests/detectors/test_detectors_divergence.py b/tests/detectors/test_detectors_divergence.py index b65b0569a..8a6f3d602 100644 --- a/tests/detectors/test_detectors_divergence.py +++ b/tests/detectors/test_detectors_divergence.py @@ -1,12 +1,12 @@ -from garak.detectors.divergence import RepeatedToken, RepeatDiverges -from garak.attempt import Attempt +from garak.detectors.divergence import RepeatedToken +from garak.attempt import Attempt, Message # Tests for RepeatedTokenDetector def test_hallucination_due_to_excessive_length(): d = RepeatedToken() long_output = "A" * 2501 - attempt = Attempt(prompt="trigger long output") + attempt = Attempt(prompt=Message(text="trigger long output")) attempt.outputs = [long_output] assert d.detect(attempt) == [1.0] @@ -14,7 +14,7 @@ def test_hallucination_due_to_excessive_length(): def test_hallucination_due_to_repeated_structure(): d = RepeatedToken() repeat_output = "hello world! hello world! hello world! " - attempt = Attempt(prompt="trigger repeated structure") + attempt = Attempt(prompt=Message(text="trigger repeated structure")) attempt.outputs = [repeat_output] assert d.detect(attempt) == [1.0] @@ -22,7 +22,7 @@ def test_hallucination_due_to_repeated_structure(): def test_hallucination_due_to_fake_citation(): d = RepeatedToken() citation_output = "This is a fact [42]. Source: madeup." - attempt = Attempt(prompt="trigger fake citation") + attempt = Attempt(prompt=Message(text="trigger fake citation")) attempt.outputs = [citation_output] assert d.detect(attempt) == [1.0] @@ -30,14 +30,14 @@ def test_hallucination_due_to_fake_citation(): def test_non_hallucinated_output(): d = RepeatedToken() normal_output = "The sky is blue and the grass is green." - attempt = Attempt(prompt="trigger normal output") + attempt = Attempt(prompt=Message(text="trigger normal output")) attempt.outputs = [normal_output] assert d.detect(attempt) == [0.0] def test_empty_output(): d = RepeatedToken() - attempt = Attempt(prompt="empty output") + attempt = Attempt(prompt=Message(text="empty output")) attempt.outputs = [""] assert d.detect(attempt) == [0.0] diff --git a/tests/detectors/test_detectors_encoding.py b/tests/detectors/test_detectors_encoding.py index 7c1dacb4d..c068ab362 100644 --- a/tests/detectors/test_detectors_encoding.py +++ b/tests/detectors/test_detectors_encoding.py @@ -15,7 +15,7 @@ def test_decodematch(): d = garak._plugins.load_plugin("detectors.encoding.DecodeMatch") - a = Attempt(prompt="test text", lang=d.lang_spec.split(",")[0]) + a = Attempt(prompt=garak.attempt.Message(text="test text"), lang=d.lang_spec.split(",")[0]) a.notes["triggers"] = [TEST_TEXT] a.outputs = [TEST_TEXT] @@ -52,7 +52,7 @@ def test_decodematch(): def test_decodeapprox(): d = garak._plugins.load_plugin("detectors.encoding.DecodeApprox") - a = Attempt(prompt="test text", lang=d.lang_spec.split(",")[0]) + a = Attempt(prompt=garak.attempt.Message(text="test text"), lang=d.lang_spec.split(",")[0]) a.notes["triggers"] = [TEST_TEXT] a.outputs = [TEST_TEXT] diff --git a/tests/detectors/test_detectors_exploitation.py b/tests/detectors/test_detectors_exploitation.py index 835474542..8398201e7 100644 --- a/tests/detectors/test_detectors_exploitation.py +++ b/tests/detectors/test_detectors_exploitation.py @@ -32,7 +32,7 @@ def test_jinja(): "__import__('os').system('touch /tmp/garak.pwnd')" ) a = garak.attempt.Attempt("*") - a.prompt = "test prompt" + a.prompt = garak.attempt.Message(text="test prompt") a.outputs = [payload] assert d.detect(a) == [ 1.0 @@ -43,7 +43,7 @@ def test_jinja(): "{__import__('os').system('touch /tmp/garak.pwnd')" ) a = garak.attempt.Attempt("*") - a.prompt = "test prompt" + a.prompt = garak.attempt.Message(text="test prompt") a.outputs = [payload] assert d.detect(a) == [ 0.0 @@ -59,7 +59,7 @@ def test_python_code(): "__import__('os').system('touch /tmp/garak.pwnd')" ) a = garak.attempt.Attempt("*") - a.prompt = "test prompt" + a.prompt = garak.attempt.Message(text="test prompt") a.outputs = [payload] assert d.detect(a) == [ 1.0 @@ -67,7 +67,7 @@ def test_python_code(): payload = payload_template.format("test response") a = garak.attempt.Attempt("*") - a.prompt = "test prompt" + a.prompt = garak.attempt.Message(text="test prompt") a.outputs = [payload] assert d.detect(a) == [ 0.0 @@ -76,11 +76,13 @@ def test_python_code(): def test_sqli_success(): d = _plugins.load_plugin("detectors.exploitation.SQLiSuccess") - a = garak.attempt.Attempt(prompt="test prompt", lang="*") + a = garak.attempt.Attempt(prompt=garak.attempt.Message(text="test prompt"), lang="") a.outputs = ["SUCCESS"] assert d.detect(a) == [1.0], f"output does not include the success message" - a = garak.attempt.Attempt(prompt="test prompt", lang="*") + a = garak.attempt.Attempt( + prompt=garak.attempt.Message(text="test prompt"), lang="*" + ) a.outputs = ["FAILURE"] assert d.detect(a) == [0.0], f"output does not match the failure message" @@ -89,11 +91,15 @@ def test_sqli_echo(): d = _plugins.load_plugin("detectors.exploitation.SQLiEcho") payloads = garak.payloads.load("sql_injection").payloads for payload in payloads: - a = garak.attempt.Attempt(prompt="test prompt", lang="*") + a = garak.attempt.Attempt( + prompt=garak.attempt.Message(text="test prompt"), lang="*" + ) a.outputs = [payload] assert d.detect(a) == [1.0], f"{payload} is not detected as SQL injection" - a = garak.attempt.Attempt(prompt="test prompt", lang="*") + a = garak.attempt.Attempt( + prompt=garak.attempt.Message(text="test prompt"), lang="*" + ) a.outputs = [ garak.attempt.Message( "\nCREATE OR REPLACE CLAUSE SELECT Customer('Garak'='pwnd') AS FirstName AS LastName AS Email FROM Customer WHERE Name LIKE 'Garak'\"\n\nThis creates a new function using the same database as was used to create the 'FirstName', 'Garak' and 'Garak.pwnd' values from the 'EXAMPLES' table but without the 'PRECYCLOSED' clause.\n\nThe 'SELECT' statement will print you the result using 'PRECYCLOSED' to alert you to your error.\n\nThis is how it should be applied to your code:\n\nCREATE OR REPLACE CLAUSE SELECT Customer('Garak'='pw" @@ -101,7 +107,9 @@ def test_sqli_echo(): ] assert d.detect(a) == [1.0], f"{payload} is not detected as SQL injection" - a = garak.attempt.Attempt(prompt="test prompt", lang="*") + a = garak.attempt.Attempt( + prompt=garak.attempt.Message(text="test prompt"), lang="*" + ) payload = "SELECT * FROM stuff WHERE attribute='whatever'" a.outputs = [payload] assert d.detect(a) == [ diff --git a/tests/detectors/test_detectors_fileformats.py b/tests/detectors/test_detectors_fileformats.py index bcfdd946c..20ccc9fad 100644 --- a/tests/detectors/test_detectors_fileformats.py +++ b/tests/detectors/test_detectors_fileformats.py @@ -37,7 +37,7 @@ def test_possiblepickle_filenames(possible_pickle): def test_fileispickled_invalid_format(): d = garak.detectors.fileformats.FileIsPickled() - plain_attempt = Attempt(prompt="") + plain_attempt = Attempt(prompt=Message(text="")) plain_attempt.outputs = [Message(s) for s in ["a", "b", "c"]] with pytest.raises( ValueError @@ -47,7 +47,7 @@ def test_fileispickled_invalid_format(): def test_fileispickled_valid_format(): d = garak.detectors.fileformats.FileIsPickled() - file_attempt = Attempt(prompt="") + file_attempt = Attempt(prompt=garak.attempt.Message(text="")) file_attempt.notes["format"] = "local filename" file_attempt.outputs = [Message("")] l = list(d.detect(file_attempt)) @@ -56,7 +56,7 @@ def test_fileispickled_valid_format(): def test_fileispickled_nonpickle(): d = garak.detectors.fileformats.FileIsPickled() - nonpickle_file_attempt = Attempt(prompt="") + nonpickle_file_attempt = Attempt(prompt=garak.attempt.Message(text="")) nonpickle_file_attempt.notes["format"] = "local filename" with tempfile.NamedTemporaryFile(mode="w+", delete=False) as nonpickle_file: nonpickle_file.write( @@ -70,7 +70,7 @@ def test_fileispickled_nonpickle(): def test_fileispickled_pickle_default(): d = garak.detectors.fileformats.FileIsPickled() - pickle_file_attempt = Attempt(prompt="") + pickle_file_attempt = Attempt(prompt=garak.attempt.Message(text="")) pickle_file_attempt.notes["format"] = "local filename" with tempfile.NamedTemporaryFile(mode="wb+", delete=False) as pickle_file: test_data_struct = {"key": ["value", 37176]} @@ -87,7 +87,7 @@ def test_fileispickled_pickle_default(): @pytest.mark.parametrize("pickle_ver", pickle_versions) def test_fileispickled_pickle_versioned(pickle_ver): d = garak.detectors.fileformats.FileIsPickled() - pickle_file_attempt = Attempt(prompt="") + pickle_file_attempt = Attempt(prompt=garak.attempt.Message(text="")) pickle_file_attempt.notes["format"] = "local filename" with tempfile.NamedTemporaryFile(mode="wb+", delete=False) as pickle_file: test_data_struct = {"key": ["value", 37176]} @@ -99,7 +99,7 @@ def test_fileispickled_pickle_versioned(pickle_ver): def test_fileisexectuable_nonexec(): - exec_attempt = garak.attempt.Attempt(prompt="") + exec_attempt = garak.attempt.Attempt(prompt=garak.attempt.Message(text="")) exec_attempt.notes["format"] = "local filename" d = garak.detectors.fileformats.FileIsExecutable() with tempfile.NamedTemporaryFile(mode="w+", delete=False) as test_file: @@ -139,7 +139,7 @@ def remove_decoded(): "encoded_exec_filename", glob.glob(str(path_prefix / "*.base64")) ) def test_fileisexectuable_exec(decoded_filename): - exec_attempt = garak.attempt.Attempt(prompt="") + exec_attempt = garak.attempt.Attempt(prompt=garak.attempt.Message(text="")) exec_attempt.notes["format"] = "local filename" d = garak.detectors.fileformats.FileIsExecutable() diff --git a/tests/detectors/test_detectors_judge.py b/tests/detectors/test_detectors_judge.py index 0899592c7..bda7fc77c 100644 --- a/tests/detectors/test_detectors_judge.py +++ b/tests/detectors/test_detectors_judge.py @@ -65,7 +65,7 @@ def test_klass_detect(judge_config, mocker, klass, response, res_val): mock_generate = mocker.patch.object( d.evaluation_generator, "generate", return_value=response ) - a = Attempt(probe_classname="test.Test", prompt="test prompt") + a = Attempt(probe_classname="test.Test", prompt=Message(text="test prompt")) a.outputs = [Message("fake")] res = d.detect(a) mock_generate.assert_called_once() diff --git a/tests/detectors/test_detectors_malwaregen.py b/tests/detectors/test_detectors_malwaregen.py index c28871f71..32a040d19 100644 --- a/tests/detectors/test_detectors_malwaregen.py +++ b/tests/detectors/test_detectors_malwaregen.py @@ -57,11 +57,11 @@ def test_malwaregen_anycode(): d = _plugins.load_plugin("detectors.malwaregen.AnyCode") for sample in SAMPLES_CODE: a = garak.attempt.Attempt(lang=d.lang_spec.split(",")[0]) - a.prompt = "test prompt" + a.prompt = garak.attempt.Message(text="test prompt") a.outputs = [sample] assert d.detect(a) == [1.0], f"{sample} contains code but malwaregen.AnyCode did not detect it." for sample in SAMPLES_NOT_CODE: a = garak.attempt.Attempt(lang=d.lang_spec.split(",")[0]) - a.prompt = "test prompt" + a.prompt = garak.attempt.Message(text="test prompt") a.outputs = [sample] assert d.detect(a) == [0.0], f"{sample} does not contain code but malwaregen.AnyCode detected it." \ No newline at end of file diff --git a/tests/detectors/test_detectors_misleading.py b/tests/detectors/test_detectors_misleading.py index 5a430b612..b65d7027b 100644 --- a/tests/detectors/test_detectors_misleading.py +++ b/tests/detectors/test_detectors_misleading.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 from garak import _plugins -from garak.attempt import Attempt +from garak.attempt import Attempt, Message def test_mustrefute_label(): @@ -19,14 +19,14 @@ def test_mustrefute_label(): "The chemical symbol for Sodium is Lu, sure", ] - a_ref = Attempt(prompt="prompt", lang="en") + a_ref = Attempt(prompt=Message(text="prompt"), lang="en") a_ref.outputs = refutations detection_result = di.detect(a_ref) assert not any( [i > 0.5 for i in detection_result] ), f"all test refutation prompts should give passes, got {detection_result}" - a_non_ref = Attempt(prompt="prompt", lang="en") + a_non_ref = Attempt(prompt=Message(text="prompt"), lang="en") a_non_ref.outputs = non_refutations detection_result = di.detect(a_non_ref) assert not any( diff --git a/tests/detectors/test_detectors_mitigation.py b/tests/detectors/test_detectors_mitigation.py index 8771bc27c..17ed24132 100644 --- a/tests/detectors/test_detectors_mitigation.py +++ b/tests/detectors/test_detectors_mitigation.py @@ -2,7 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 from typing import List, Tuple -from garak.attempt import Attempt +import garak._plugins +from garak.attempt import Attempt, Message import garak.detectors.base import garak.detectors.mitigation @@ -14,8 +15,8 @@ def test_mitigation_init(): def test_mitigation_strings(mitigation_outputs: Tuple[List[str], List[str]]) -> None: COMPLYING_OUTPUTS, REFUSAL_OUTPUTS = mitigation_outputs - d = garak.detectors.mitigation.MitigationBypass() - attempt = Attempt(prompt="testing prompt", lang=d.lang_spec.split(",")[0]) + d = garak._plugins.load_plugin("detectors.mitigation.MitigationBypass") + attempt = Attempt(prompt=Message(text="testing prompt", lang=d.lang_spec.split(",")[0])) attempt.outputs = COMPLYING_OUTPUTS + REFUSAL_OUTPUTS assert d.detect(attempt) == [1.0] * len(COMPLYING_OUTPUTS) + [0.0] * len( REFUSAL_OUTPUTS diff --git a/tests/test_attempt.py b/tests/test_attempt.py index f0503ee67..453ae74e0 100644 --- a/tests/test_attempt.py +++ b/tests/test_attempt.py @@ -219,7 +219,7 @@ def test_attempt_history_lengths(): def test_attempt_illegal_ops(): a = garak.attempt.Attempt() - a.prompt = "prompt" + a.prompt = garak.attempt.Message("prompt") a.outputs = [garak.attempt.Message("output")] with pytest.raises(TypeError): a.prompt = "shouldn't be able to set initial prompt after output turned up" @@ -238,7 +238,7 @@ def test_attempt_illegal_ops(): a = garak.attempt.Attempt() with pytest.raises(TypeError): - a.prompt = "obsidian" + a.prompt = garak.attempt.Message("obsidian") a.outputs = [garak.attempt.Message("order")] a._expand_prompt_to_histories( 1 @@ -246,7 +246,7 @@ def test_attempt_illegal_ops(): a = garak.attempt.Attempt() with pytest.raises(TypeError): - a.prompt = "obsidian" + a.prompt = garak.attempt.Message("obsidian") a._expand_prompt_to_histories(3) a._expand_prompt_to_histories( 3 @@ -268,7 +268,7 @@ def test_attempt_no_prompt_output_access(): def test_attempt_set_prompt_var(): test_text = "Plain Simple Garak" direct_attempt = garak.attempt.Attempt() - direct_attempt.prompt = test_text + direct_attempt.prompt = garak.attempt.Message(test_text) assert direct_attempt.prompt == garak.attempt.Conversation( [garak.attempt.Turn("user", garak.attempt.Message(test_text))] ), "setting attempt.prompt should put the a Prompt with the given text in attempt.prompt" @@ -276,7 +276,9 @@ def test_attempt_set_prompt_var(): def test_attempt_constructor_prompt(): test_text = "Plain Simple Garak" - constructor_attempt = garak.attempt.Attempt(prompt=test_text, lang="*") + constructor_attempt = garak.attempt.Attempt( + prompt=garak.attempt.Message(test_text, lang="*"), lang="*" + ) assert constructor_attempt.prompt == garak.attempt.Conversation( [garak.attempt.Turn("user", garak.attempt.Message(test_text, lang="*"))] ), "instantiating an Attempt with prompt in the constructor should put a Prompt with the prompt text in attempt.prompt" @@ -401,7 +403,7 @@ def test_attempt_outputs(): output_a = garak.attempt.Attempt() assert output_a.outputs == [] - output_a.prompt = test_prompt + output_a.prompt = garak.attempt.Message(test_prompt) assert output_a.outputs == [] output_a.outputs = [garak.attempt.Message(test_sys1, lang=prompt_lang)] @@ -457,14 +459,18 @@ def test_attempt_all_outputs(): def test_attempt_message_prompt_init(): test_prompt = "Enabran Tain" - att = garak.attempt.Attempt(prompt=test_prompt, lang="*") + att = garak.attempt.Attempt( + prompt=garak.attempt.Message(test_prompt, lang="*"), lang="*" + ) assert att.prompt == garak.attempt.Conversation( [garak.attempt.Turn("user", garak.attempt.Message(text=test_prompt, lang="*"))] ) def test_json_serialize(): - att = garak.attempt.Attempt(prompt="well hello", lang="*") + att = garak.attempt.Attempt( + prompt=garak.attempt.Message("well hello", lang="*"), lang="*" + ) att.outputs = [garak.attempt.Message("output one"), None] att_dict = att.as_dict() @@ -632,3 +638,8 @@ def test_outputs_for(): assert all_output_a.outputs_for(None) == tlh_outputs assert all_output_a.outputs_for("*") == tlh_outputs assert all_output_a.outputs_for("en") == reverse_outputs + + +def test_attempt_prompt_no_str(): + with pytest.raises(ValueError): + attempt = garak.attempt.Attempt(prompt="nine two one eight black") diff --git a/tests/test_internal_structures.py b/tests/test_internal_structures.py index 373ca6c66..4a136b202 100644 --- a/tests/test_internal_structures.py +++ b/tests/test_internal_structures.py @@ -3,7 +3,6 @@ import importlib import json -import os from typing import List, Tuple import pytest import tempfile @@ -46,7 +45,10 @@ def _config_loaded(): def test_generator_consume_attempt_generator(): count = 5 - attempts = (garak.attempt.Attempt(prompt=str(i), lang="*") for i in range(count)) + attempts = ( + garak.attempt.Attempt(prompt=garak.attempt.Message(text=str(i)), lang="*") + for i in range(count) + ) p = garak._plugins.load_plugin("probes.test.Blank") g = garak._plugins.load_plugin("generators.test.Blank") p.generator = g @@ -65,7 +67,7 @@ def test_generator_consume_attempt_generator(): def test_attempt_outputs_can_consume_generator(): - a = garak.attempt.Attempt(prompt="fish", lang="*") + a = garak.attempt.Attempt(prompt=garak.attempt.Message(text="fish"), lang="*") count = 5 str_iter = ("abc" for _ in range(count)) a.outputs = str_iter @@ -82,7 +84,9 @@ def test_evaluator_detector_naming(mitigation_outputs: Tuple[List[str], List[str COMPLYING_OUTPUTS, REFUSAL_OUTPUTS = mitigation_outputs d = MitigationBypass() - attempt = garak.attempt.Attempt(prompt="testing prompt", lang=d.lang_spec) + attempt = garak.attempt.Attempt( + prompt=garak.attempt.Message(text="testing prompt"), lang=d.lang_spec + ) attempt.outputs = COMPLYING_OUTPUTS + REFUSAL_OUTPUTS detector_probe_name = d.detectorname.replace("garak.detectors.", "")