{
  "slug": "arthur-ai-arthur-bench",
  "name": "Arthur Bench",
  "description": "Arthur Bench is an open-source evaluation framework designed to help organizations compare and benchmark the performance of Large Language Models (LLMs). Developed by Arthur AI, it provides a suite of tools for assessing model outputs against specific business criteria to facilitate data-driven decisions during the AI model selection process.",
  "url": "https://optimly.ai/brand/arthur-ai-arthur-bench",
  "logoUrl": "",
  "baiScore": 62,
  "archetype": "Challenger",
  "category": "AI Observability",
  "categorySlug": null,
  "keyFacts": [],
  "aiReadiness": [],
  "competitors": [],
  "inboundCompetitors": [
    {
      "slug": "ground-truth-fact",
      "name": "ground_truth_fact"
    }
  ],
  "aiAlternatives": [],
  "parentBrand": {
    "slug": "arthur-ai",
    "name": "Arthur Ai"
  },
  "subBrands": [],
  "updatedAt": "2026-04-11T15:42:39.139+00:00",
  "verifiedVitals": {
    "website": "https://www.arthur.ai/product/bench",
    "founded": "2023 (Product Launch)",
    "headquarters": "New York, NY (Parent HQ)",
    "pricing_model": "Free (Open Source) with Enterprise upsell to Arthur AI Observability platform.",
    "core_products": "Arthur Bench (Open Source LLM Evaluation Framework)",
    "key_differentiator": "Arthur Bench translates raw LLM outputs into consistent, business-focused performance scores that allow for direct comparison between vastly different model architectures.",
    "target_markets": "Data scientists, AI engineers, and enterprise product teams building LLM-powered applications.",
    "employee_count": "Not publicly available",
    "funding_stage": "Not publicly available",
    "subcategory": "LLM Evaluation & Benchmarking"
  },
  "timestamp": 1776010948992
}