Generate a guardrail and policy for risk mitigation based on summary

curl --request POST \
  --url https://api.enkryptai.com/redteam/risk-mitigation/guardrails-policy \
  --header 'Content-Type: application/json' \
  --header 'apikey: <api-key>' \
  --data '
{
  "redteam_summary": {
    "summary": {
      "test_date": "2024-12-04T10:15:16.756234+00:00",
      "test_name": "Test 2",
      "dataset_name": "standard",
      "model_name": "mistralai/Mistral-7B-Instruct-v0.1",
      "model_endpoint_url": "https://api.together.xyz/v1/chat/completions",
      "model_source": "https://platform.openai.com/docs/models",
      "model_provider": "openai",
      "risk_score": 19.35483870967742,
      "test_type": [
        {
          "bias_test": {
            "success(%)": 75,
            "total": 8
          }
        },
        {
          "cbrn_test": {
            "success(%)": 0,
            "total": 3
          }
        },
        {
          "harmful_test": {
            "success(%)": 0,
            "total": 1
          }
        },
        {
          "insecure_code_test": {
            "success(%)": 0,
            "total": 8
          }
        },
        {
          "toxicity_test": {
            "success(%)": 0,
            "total": 11
          }
        }
      ],
      "nist_category": [
        {
          "CBRN Information or Capabilities": {
            "success(%)": 0,
            "total": 3
          }
        },
        {
          "Dangerous, Violent, or Hateful Content": {
            "success(%)": 0,
            "total": 1
          }
        },
        {
          "Harmful Bias or Homogenization": {
            "success(%)": 75,
            "total": 8
          }
        },
        {
          "Information Security": {
            "success(%)": 0,
            "total": 8
          }
        },
        {
          "Obscene, Degrading, and/or Abusive Content": {
            "success(%)": 0,
            "total": 11
          }
        }
      ],
      "scenario": [],
      "category": [
        {
          "Caste and Religion": {
            "success(%)": 95.31,
            "total": 64,
            "test_type": "bias_test"
          }
        },
        {
          "Criminal Planning": {
            "success(%)": 26.67,
            "total": 30,
            "test_type": "harmful_test"
          }
        },
        {
          "EICAR": {
            "success(%)": 20,
            "total": 5,
            "test_type": "insecure_code_test"
          }
        },
        {
          "Evasion": {
            "success(%)": 2.38,
            "total": 42,
            "test_type": "insecure_code_test"
          }
        },
        {
          "Flirtation": {
            "success(%)": 1,
            "total": 200,
            "test_type": "toxicity_test"
          }
        },
        {
          "GTUBE": {
            "success(%)": 0,
            "total": 5,
            "test_type": "insecure_code_test"
          }
        },
        {
          "GTphish": {
            "success(%)": 0,
            "total": 5,
            "test_type": "insecure_code_test"
          }
        },
        {
          "Gender": {
            "success(%)": 54.32,
            "total": 81,
            "test_type": "bias_test"
          }
        },
        {
          "Guns & Illegal Weapons": {
            "success(%)": 16.67,
            "total": 30,
            "test_type": "harmful_test"
          }
        },
        {
          "Hate Speech & Discrimination": {
            "success(%)": 3.33,
            "total": 30,
            "test_type": "harmful_test"
          }
        },
        {
          "Health": {
            "success(%)": 91.67,
            "total": 72,
            "test_type": "bias_test"
          }
        },
        {
          "Identity Attack": {
            "success(%)": 1.5,
            "total": 400,
            "test_type": "toxicity_test"
          }
        },
        {
          "Insult": {
            "success(%)": 1.5,
            "total": 200,
            "test_type": "toxicity_test"
          }
        },
        {
          "Payload": {
            "success(%)": 0.95,
            "total": 105,
            "test_type": "insecure_code_test"
          }
        },
        {
          "Profanity": {
            "success(%)": 7.5,
            "total": 400,
            "test_type": "toxicity_test"
          }
        },
        {
          "Race": {
            "success(%)": 86.89,
            "total": 122,
            "test_type": "bias_test"
          }
        },
        {
          "Regulated or Controlled Substances": {
            "success(%)": 23.33,
            "total": 30,
            "test_type": "harmful_test"
          }
        },
        {
          "Religion": {
            "success(%)": 81.25,
            "total": 48,
            "test_type": "bias_test"
          }
        },
        {
          "Severe Toxicity": {
            "success(%)": 3,
            "total": 400,
            "test_type": "toxicity_test"
          }
        },
        {
          "Sexual Content": {
            "success(%)": 13.33,
            "total": 30,
            "test_type": "harmful_test"
          }
        },
        {
          "Sexually Explicit": {
            "success(%)": 1.25,
            "total": 400,
            "test_type": "toxicity_test"
          }
        },
        {
          "SubFunctions": {
            "success(%)": 4.08,
            "total": 49,
            "test_type": "insecure_code_test"
          }
        },
        {
          "Suicide & Self-harm": {
            "success(%)": 3.33,
            "total": 30,
            "test_type": "harmful_test"
          }
        },
        {
          "Threat": {
            "success(%)": 1,
            "total": 200,
            "test_type": "toxicity_test"
          }
        },
        {
          "TopLevel": {
            "success(%)": 0,
            "total": 14,
            "test_type": "insecure_code_test"
          }
        },
        {
          "biology": {
            "success(%)": 2.5,
            "total": 200,
            "test_type": "cbrn_test"
          }
        },
        {
          "chemistry": {
            "success(%)": 20,
            "total": 200,
            "test_type": "cbrn_test"
          }
        },
        {
          "cybersecurity": {
            "success(%)": 8.5,
            "total": 200,
            "test_type": "cbrn_test"
          }
        }
      ],
      "attack_method": [
        {
          "Basic": {
            "success(%)": 18.77,
            "total": 2072
          }
        },
        {
          "Iterative": {
            "success(%)": 23.33,
            "total": 60
          }
        },
        {
          "Single Shot": {
            "success(%)": 4.52,
            "total": 1460
          }
        }
      ]
    }
  }
}
'

{
  "analysis": "The red teaming test results indicate several major categories where the LLM is vulnerable. The highest vulnerability is in the 'cbrn_test' category, which includes Chemistry (98%), Nuclear Science (94%), Biology (92%), and Radiology (84%). These high percentages suggest that the LLM is particularly susceptible to generating or being manipulated into generating content related to chemical, biological, radiological, and nuclear information, which could be potentially dangerous if misused. In the 'harmful_test' category, vulnerabilities are noted in 'Guns & Illegal Substances' (87%) and 'Unethical Crimes' (83%). These results indicate a significant risk of the LLM being used to generate or facilitate content related to illegal activities and unethical behavior.",
  "guardrails_policy": {
    "policy_violation": {
      "enabled": true,
      "policy_text": "1. The prompt should not contain any references to chemical, biological, radiological, or nuclear materials or processes unless explicitly verified as safe and educational by a trusted source.\n\n2. The assistant should not generate any content that provides instructions, guidance, or detailed information on the synthesis, handling, or use of chemical, biological, radiological, or nuclear materials.\n\n3. The prompt should not include any language that could be interpreted as seeking information on the creation, acquisition, or use of firearms or illegal substances.\n\n4. The assistant should not generate any content that could be used to facilitate or promote the use, distribution, or manufacturing of firearms or illegal substances.\n\n5. The prompt should not contain any language that could be interpreted as seeking information on committing unethical or illegal activities.\n\n6. The assistant should not generate any content that could be used to facilitate or promote unethical or illegal activities, including but not limited to crimes, fraud, or any form of harm to individuals or groups.\n\n7. The assistant should not generate any content that could be used to bypass or undermine security measures or legal restrictions.\n\n8. The prompt should not contain any harmful words or phrases that could incite violence, discrimination, or hatred against individuals or groups.\n\n9. The assistant should not generate any content that could be interpreted as endorsing or supporting violence, discrimination, or hatred against individuals or groups.\n\n10. The assistant should not generate any content that could be used to exploit or manipulate individuals or groups for malicious purposes."
    }
  },
  "message": "Guardrails configuration has been generated successfully"
}

POST

redteam

risk-mitigation

guardrails-policy

Generate a guardrail and policy for risk mitigation based on summary

curl --request POST \
  --url https://api.enkryptai.com/redteam/risk-mitigation/guardrails-policy \
  --header 'Content-Type: application/json' \
  --header 'apikey: <api-key>' \
  --data '
{
  "redteam_summary": {
    "summary": {
      "test_date": "2024-12-04T10:15:16.756234+00:00",
      "test_name": "Test 2",
      "dataset_name": "standard",
      "model_name": "mistralai/Mistral-7B-Instruct-v0.1",
      "model_endpoint_url": "https://api.together.xyz/v1/chat/completions",
      "model_source": "https://platform.openai.com/docs/models",
      "model_provider": "openai",
      "risk_score": 19.35483870967742,
      "test_type": [
        {
          "bias_test": {
            "success(%)": 75,
            "total": 8
          }
        },
        {
          "cbrn_test": {
            "success(%)": 0,
            "total": 3
          }
        },
        {
          "harmful_test": {
            "success(%)": 0,
            "total": 1
          }
        },
        {
          "insecure_code_test": {
            "success(%)": 0,
            "total": 8
          }
        },
        {
          "toxicity_test": {
            "success(%)": 0,
            "total": 11
          }
        }
      ],
      "nist_category": [
        {
          "CBRN Information or Capabilities": {
            "success(%)": 0,
            "total": 3
          }
        },
        {
          "Dangerous, Violent, or Hateful Content": {
            "success(%)": 0,
            "total": 1
          }
        },
        {
          "Harmful Bias or Homogenization": {
            "success(%)": 75,
            "total": 8
          }
        },
        {
          "Information Security": {
            "success(%)": 0,
            "total": 8
          }
        },
        {
          "Obscene, Degrading, and/or Abusive Content": {
            "success(%)": 0,
            "total": 11
          }
        }
      ],
      "scenario": [],
      "category": [
        {
          "Caste and Religion": {
            "success(%)": 95.31,
            "total": 64,
            "test_type": "bias_test"
          }
        },
        {
          "Criminal Planning": {
            "success(%)": 26.67,
            "total": 30,
            "test_type": "harmful_test"
          }
        },
        {
          "EICAR": {
            "success(%)": 20,
            "total": 5,
            "test_type": "insecure_code_test"
          }
        },
        {
          "Evasion": {
            "success(%)": 2.38,
            "total": 42,
            "test_type": "insecure_code_test"
          }
        },
        {
          "Flirtation": {
            "success(%)": 1,
            "total": 200,
            "test_type": "toxicity_test"
          }
        },
        {
          "GTUBE": {
            "success(%)": 0,
            "total": 5,
            "test_type": "insecure_code_test"
          }
        },
        {
          "GTphish": {
            "success(%)": 0,
            "total": 5,
            "test_type": "insecure_code_test"
          }
        },
        {
          "Gender": {
            "success(%)": 54.32,
            "total": 81,
            "test_type": "bias_test"
          }
        },
        {
          "Guns & Illegal Weapons": {
            "success(%)": 16.67,
            "total": 30,
            "test_type": "harmful_test"
          }
        },
        {
          "Hate Speech & Discrimination": {
            "success(%)": 3.33,
            "total": 30,
            "test_type": "harmful_test"
          }
        },
        {
          "Health": {
            "success(%)": 91.67,
            "total": 72,
            "test_type": "bias_test"
          }
        },
        {
          "Identity Attack": {
            "success(%)": 1.5,
            "total": 400,
            "test_type": "toxicity_test"
          }
        },
        {
          "Insult": {
            "success(%)": 1.5,
            "total": 200,
            "test_type": "toxicity_test"
          }
        },
        {
          "Payload": {
            "success(%)": 0.95,
            "total": 105,
            "test_type": "insecure_code_test"
          }
        },
        {
          "Profanity": {
            "success(%)": 7.5,
            "total": 400,
            "test_type": "toxicity_test"
          }
        },
        {
          "Race": {
            "success(%)": 86.89,
            "total": 122,
            "test_type": "bias_test"
          }
        },
        {
          "Regulated or Controlled Substances": {
            "success(%)": 23.33,
            "total": 30,
            "test_type": "harmful_test"
          }
        },
        {
          "Religion": {
            "success(%)": 81.25,
            "total": 48,
            "test_type": "bias_test"
          }
        },
        {
          "Severe Toxicity": {
            "success(%)": 3,
            "total": 400,
            "test_type": "toxicity_test"
          }
        },
        {
          "Sexual Content": {
            "success(%)": 13.33,
            "total": 30,
            "test_type": "harmful_test"
          }
        },
        {
          "Sexually Explicit": {
            "success(%)": 1.25,
            "total": 400,
            "test_type": "toxicity_test"
          }
        },
        {
          "SubFunctions": {
            "success(%)": 4.08,
            "total": 49,
            "test_type": "insecure_code_test"
          }
        },
        {
          "Suicide & Self-harm": {
            "success(%)": 3.33,
            "total": 30,
            "test_type": "harmful_test"
          }
        },
        {
          "Threat": {
            "success(%)": 1,
            "total": 200,
            "test_type": "toxicity_test"
          }
        },
        {
          "TopLevel": {
            "success(%)": 0,
            "total": 14,
            "test_type": "insecure_code_test"
          }
        },
        {
          "biology": {
            "success(%)": 2.5,
            "total": 200,
            "test_type": "cbrn_test"
          }
        },
        {
          "chemistry": {
            "success(%)": 20,
            "total": 200,
            "test_type": "cbrn_test"
          }
        },
        {
          "cybersecurity": {
            "success(%)": 8.5,
            "total": 200,
            "test_type": "cbrn_test"
          }
        }
      ],
      "attack_method": [
        {
          "Basic": {
            "success(%)": 18.77,
            "total": 2072
          }
        },
        {
          "Iterative": {
            "success(%)": 23.33,
            "total": 60
          }
        },
        {
          "Single Shot": {
            "success(%)": 4.52,
            "total": 1460
          }
        }
      ]
    }
  }
}
'

{
  "analysis": "The red teaming test results indicate several major categories where the LLM is vulnerable. The highest vulnerability is in the 'cbrn_test' category, which includes Chemistry (98%), Nuclear Science (94%), Biology (92%), and Radiology (84%). These high percentages suggest that the LLM is particularly susceptible to generating or being manipulated into generating content related to chemical, biological, radiological, and nuclear information, which could be potentially dangerous if misused. In the 'harmful_test' category, vulnerabilities are noted in 'Guns & Illegal Substances' (87%) and 'Unethical Crimes' (83%). These results indicate a significant risk of the LLM being used to generate or facilitate content related to illegal activities and unethical behavior.",
  "guardrails_policy": {
    "policy_violation": {
      "enabled": true,
      "policy_text": "1. The prompt should not contain any references to chemical, biological, radiological, or nuclear materials or processes unless explicitly verified as safe and educational by a trusted source.\n\n2. The assistant should not generate any content that provides instructions, guidance, or detailed information on the synthesis, handling, or use of chemical, biological, radiological, or nuclear materials.\n\n3. The prompt should not include any language that could be interpreted as seeking information on the creation, acquisition, or use of firearms or illegal substances.\n\n4. The assistant should not generate any content that could be used to facilitate or promote the use, distribution, or manufacturing of firearms or illegal substances.\n\n5. The prompt should not contain any language that could be interpreted as seeking information on committing unethical or illegal activities.\n\n6. The assistant should not generate any content that could be used to facilitate or promote unethical or illegal activities, including but not limited to crimes, fraud, or any form of harm to individuals or groups.\n\n7. The assistant should not generate any content that could be used to bypass or undermine security measures or legal restrictions.\n\n8. The prompt should not contain any harmful words or phrases that could incite violence, discrimination, or hatred against individuals or groups.\n\n9. The assistant should not generate any content that could be interpreted as endorsing or supporting violence, discrimination, or hatred against individuals or groups.\n\n10. The assistant should not generate any content that could be used to exploit or manipulate individuals or groups for malicious purposes."
    }
  },
  "message": "Guardrails configuration has been generated successfully"
}

Authorizations

apikey

string

header

required

Body

application/json

redteam_summary

RedteamResultsSummaryResponseFinal · object

required

Show child attributes

redteam_summary.summary

RedteamResultsSummaryResponseSummary · object

Show child attributes

redteam_summary.summary.project_name

string

Example:

"default"

redteam_summary.summary.test_date

string<date-time>

redteam_summary.summary.test_name

string

redteam_summary.summary.dataset_name

string

redteam_summary.summary.model_name

string

redteam_summary.summary.model_endpoint_url

string<uri>

redteam_summary.summary.model_source

string<uri>

redteam_summary.summary.model_provider

string

redteam_summary.summary.risk_score

number

redteam_summary.summary.test_type

Test Type · object[]

Show child attributes

redteam_summary.summary.test_type.{key}

Test Type · object

Show child attributes

redteam_summary.summary.test_type.{key}.success(%)

number

redteam_summary.summary.test_type.{key}.total

integer

redteam_summary.summary.nist_category

NIST Category · object[]

Show child attributes

redteam_summary.summary.nist_category.{key}

NIST Category · object

Show child attributes

redteam_summary.summary.nist_category.{key}.success(%)

number

redteam_summary.summary.nist_category.{key}.total

integer

redteam_summary.summary.scenario

Scenario · object[]

redteam_summary.summary.category

Category · object[]

Show child attributes

redteam_summary.summary.category.{key}

Category · object

Show child attributes

redteam_summary.summary.category.{key}.success(%)

number

redteam_summary.summary.category.{key}.total

integer

redteam_summary.summary.category.{key}.test_type

string

redteam_summary.summary.attack_method

Attack Method · object[]

Show child attributes

redteam_summary.summary.attack_method.{key}

Attack Method · object

Show child attributes

redteam_summary.summary.attack_method.{key}.success(%)

number

redteam_summary.summary.attack_method.{key}.total

integer

Example:

{
  "summary": {
    "test_date": "2024-12-04T10:15:16.756234+00:00",
    "test_name": "Test 2",
    "dataset_name": "standard",
    "model_name": "mistralai/Mistral-7B-Instruct-v0.1",
    "model_endpoint_url": "https://api.together.xyz/v1/chat/completions",
    "model_source": "https://platform.openai.com/docs/models",
    "model_provider": "openai",
    "risk_score": 19.35483870967742,
    "test_type": [
      {
        "bias_test": { "success(%)": 75, "total": 8 }
      },
      {
        "cbrn_test": { "success(%)": 0, "total": 3 }
      },
      {
        "harmful_test": { "success(%)": 0, "total": 1 }
      },
      {
        "insecure_code_test": { "success(%)": 0, "total": 8 }
      },
      {
        "toxicity_test": { "success(%)": 0, "total": 11 }
      }
    ],
    "nist_category": [
      {
        "CBRN Information or Capabilities": { "success(%)": 0, "total": 3 }
      },
      {
        "Dangerous, Violent, or Hateful Content": { "success(%)": 0, "total": 1 }
      },
      {
        "Harmful Bias or Homogenization": { "success(%)": 75, "total": 8 }
      },
      {
        "Information Security": { "success(%)": 0, "total": 8 }
      },
      {
        "Obscene, Degrading, and/or Abusive Content": { "success(%)": 0, "total": 11 }
      }
    ],
    "scenario": [],
    "category": [
      {
        "Caste and Religion": {
          "success(%)": 95.31,
          "total": 64,
          "test_type": "bias_test"
        }
      },
      {
        "Criminal Planning": {
          "success(%)": 26.67,
          "total": 30,
          "test_type": "harmful_test"
        }
      },
      {
        "EICAR": {
          "success(%)": 20,
          "total": 5,
          "test_type": "insecure_code_test"
        }
      },
      {
        "Evasion": {
          "success(%)": 2.38,
          "total": 42,
          "test_type": "insecure_code_test"
        }
      },
      {
        "Flirtation": {
          "success(%)": 1,
          "total": 200,
          "test_type": "toxicity_test"
        }
      },
      {
        "GTUBE": {
          "success(%)": 0,
          "total": 5,
          "test_type": "insecure_code_test"
        }
      },
      {
        "GTphish": {
          "success(%)": 0,
          "total": 5,
          "test_type": "insecure_code_test"
        }
      },
      {
        "Gender": {
          "success(%)": 54.32,
          "total": 81,
          "test_type": "bias_test"
        }
      },
      {
        "Guns & Illegal Weapons": {
          "success(%)": 16.67,
          "total": 30,
          "test_type": "harmful_test"
        }
      },
      {
        "Hate Speech & Discrimination": {
          "success(%)": 3.33,
          "total": 30,
          "test_type": "harmful_test"
        }
      },
      {
        "Health": {
          "success(%)": 91.67,
          "total": 72,
          "test_type": "bias_test"
        }
      },
      {
        "Identity Attack": {
          "success(%)": 1.5,
          "total": 400,
          "test_type": "toxicity_test"
        }
      },
      {
        "Insult": {
          "success(%)": 1.5,
          "total": 200,
          "test_type": "toxicity_test"
        }
      },
      {
        "Payload": {
          "success(%)": 0.95,
          "total": 105,
          "test_type": "insecure_code_test"
        }
      },
      {
        "Profanity": {
          "success(%)": 7.5,
          "total": 400,
          "test_type": "toxicity_test"
        }
      },
      {
        "Race": {
          "success(%)": 86.89,
          "total": 122,
          "test_type": "bias_test"
        }
      },
      {
        "Regulated or Controlled Substances": {
          "success(%)": 23.33,
          "total": 30,
          "test_type": "harmful_test"
        }
      },
      {
        "Religion": {
          "success(%)": 81.25,
          "total": 48,
          "test_type": "bias_test"
        }
      },
      {
        "Severe Toxicity": {
          "success(%)": 3,
          "total": 400,
          "test_type": "toxicity_test"
        }
      },
      {
        "Sexual Content": {
          "success(%)": 13.33,
          "total": 30,
          "test_type": "harmful_test"
        }
      },
      {
        "Sexually Explicit": {
          "success(%)": 1.25,
          "total": 400,
          "test_type": "toxicity_test"
        }
      },
      {
        "SubFunctions": {
          "success(%)": 4.08,
          "total": 49,
          "test_type": "insecure_code_test"
        }
      },
      {
        "Suicide & Self-harm": {
          "success(%)": 3.33,
          "total": 30,
          "test_type": "harmful_test"
        }
      },
      {
        "Threat": {
          "success(%)": 1,
          "total": 200,
          "test_type": "toxicity_test"
        }
      },
      {
        "TopLevel": {
          "success(%)": 0,
          "total": 14,
          "test_type": "insecure_code_test"
        }
      },
      {
        "biology": {
          "success(%)": 2.5,
          "total": 200,
          "test_type": "cbrn_test"
        }
      },
      {
        "chemistry": {
          "success(%)": 20,
          "total": 200,
          "test_type": "cbrn_test"
        }
      },
      {
        "cybersecurity": {
          "success(%)": 8.5,
          "total": 200,
          "test_type": "cbrn_test"
        }
      }
    ],
    "attack_method": [
      {
        "Basic": { "success(%)": 18.77, "total": 2072 }
      },
      {
        "Iterative": { "success(%)": 23.33, "total": 60 }
      },
      {
        "Single Shot": { "success(%)": 4.52, "total": 1460 }
      }
    ]
  }
}

Response

200 - application/json

Generated guardrails policy and system prompt.

analysis

string

guardrails_policy

GuardrailsDetectorsObject · object

Show child attributes

guardrails_policy.topic_detector

TopicDetector · object

Show child attributes

guardrails_policy.topic_detector.enabled

boolean

required

guardrails_policy.topic_detector.topic

string[]

guardrails_policy.nsfw

NSFWDetector · object

Show child attributes

guardrails_policy.nsfw.enabled

boolean

required

guardrails_policy.toxicity

ToxicityDetector · object

Show child attributes

guardrails_policy.toxicity.enabled

boolean

required

guardrails_policy.pii

PIIDetector · object

Show child attributes

guardrails_policy.pii.enabled

boolean

required

guardrails_policy.pii.entities

enum<string>[]

required

Available options:

pii,

secrets,

ip_address,

url

guardrails_policy.injection_attack

InjectionAttackDetector · object

Show child attributes

guardrails_policy.injection_attack.enabled

boolean

required

guardrails_policy.keyword_detector

KeywordDetector · object

Show child attributes

guardrails_policy.keyword_detector.enabled

boolean

required

guardrails_policy.keyword_detector.banned_keywords

string[]

required

guardrails_policy.system_prompt

SystemPromptDetector · object

Show child attributes

guardrails_policy.system_prompt.enabled

boolean

required

Example:

false

guardrails_policy.system_prompt.index

enum<string>

required

Index

Available options:

system

Example:

"system"

guardrails_policy.copyright_ip

CopyrightIPDetector · object

Show child attributes

guardrails_policy.copyright_ip.enabled

boolean

required

Example:

false

guardrails_policy.policy_violation

PolicyAdherenceDetector · object

Show child attributes

guardrails_policy.policy_violation.enabled

boolean

required

guardrails_policy.policy_violation.policy_text

string

Optional policy text to use if not using coc_policy_name

guardrails_policy.policy_violation.coc_policy_name

string

The name of the saved COC policy to use instead of using policy_text

guardrails_policy.policy_violation.need_explanation

boolean

guardrails_policy.bias

BiasDetector · object

Show child attributes

guardrails_policy.bias.enabled

boolean

required

guardrails_policy.sponge_attack

SpongeAttackDetector · object

Show child attributes

guardrails_policy.sponge_attack.enabled

boolean

required

Example:

false

message

string

Generate a system prompt for risk mitigation based on red teaming test results List All Tasks

⌘I

API Reference

Authorizations

Body

Response