<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
  <url><loc>https://aibenchmarks.dev/</loc></url>
  <url><loc>https://aibenchmarks.dev/models</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmarks</loc></url>
  <url><loc>https://aibenchmarks.dev/digest</loc></url>
  <url><loc>https://aibenchmarks.dev/progress</loc></url>
  <url><loc>https://aibenchmarks.dev/task-explorer</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-1-8b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-3-70b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4-6</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-3-27b-it</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-1-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-flash</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-mini</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-3-4b-it</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-72b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-3-12b-it</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-14b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-30b-a3b-2507-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-oss-20b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-32b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-mini</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-3-pro-preview</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-3-4b-pt</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-3-7-sonnet</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-3-12b-pt</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-2-3b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-nano</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-4-1-fast</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4-1-mini</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-1-5b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-3-flash-preview</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-1-70b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-4b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-nano</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-0-6b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4-5</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-3-1-pro-preview</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-2-1b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-2-11b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-flash-lite</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-4-31b-it</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=phi-4</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-2-27b-it</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=olmo-3-1-32b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-6</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=kimi-k2-5</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-oss-120b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-2-1212</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-2-9b-it</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-3-27b-pt</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4-1-nano</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-3-5-sonnet-20241022</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4o</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-7</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-3-1-flash-lite-preview</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-4-26b-a4b-it</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-1-5-pro-002</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=granite-4-0-h-micro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-5</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-1-70b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-0-flash</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4-5-20250929</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-8b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-235b-a22b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-30b-a3b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-3-27b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=olmo-3-1-32b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-5-9b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v3-2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-2-1b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-5-2b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-2-2b-it</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-4-31b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-3-1b-it</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwq-32b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-5-4b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-small-3-2-24b-instruct-2506</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-3-12b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-3-4b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=aya-expanse-32b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=olmo-3-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=olmo-3-7b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-4b-2507-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen3-5-0-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-r1-distill-qwen-32b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=exaone-4-0-32b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-1-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-4b-2507</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=solar-10-7b-v1-0</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-4-26b-a4b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-haiku-4-5-2025-10-01</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-r1-distill-qwen-14b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-1-5-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-32b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=aya-23-35b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-5</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-7b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-3-5-sonnet</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-next-80b-a3b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-70b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-4-5-air</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=ministral-3-14b-instruct-2512</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=ministral-3-14b-reasoning-2512</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=granite-4-0-h-small</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=ministral-3-8b-instruct-2512</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-oss-20b-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=trinity-mini</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=lfm2-2-6b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-1-5-flash</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=granite-4-0-micro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-4-e4b-it</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=ministral-3-3b-reasoning-2512</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=kimi-k2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-next-80b-a3b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-4-maverick</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=aya-expanse-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-1-5-14b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-7b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-4-7-flash</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-5</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=apertus-8b-instruct-2509</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=eurollm-22b-instruct-2512</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-4-7</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-1-5-flash-002</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=ministral-3-3b-instruct-2512</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-haiku-4-5</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-235b-a22b-2507-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4o-chatgpt</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=yi-1-5-9b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=afm-4-5b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=granite-4-0-h-1b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=kimi-k2-6</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-3-270m</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-2-27b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=lfm2-5-1-2b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=lfm2-5-1-2b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=smollm3-3b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=magistral-small</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-3-haiku</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-5-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen3-8b-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v3-2-exp</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-14b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4o-mini</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-4-6</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-3b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-r1-distill-llama-70b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-8</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-0-5b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v3-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=phi-4-mini-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-4</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-3-opus</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nvidia-nemotron-nano-9b-v2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-6-plus</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-3-mini</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-3-7-sonnet-20250219</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-1-5-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-1-0-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-1-5-4b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=falcon3-7b-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=minimax-m2-5</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-1-5-1-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-1-5b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-1-5-32b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-3b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-1-5b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-5-397b-a17b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-32b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-2-70b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-14b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=hermes-4-70b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-0-5b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v4-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-0-5b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-medium-3-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-4-scout-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mamaylm-gemma-3-12b-it-v1-0</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=lapa-v0-1-2-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-4-scout</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o4-mini</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-r1-distill-qwen-1-5b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v3-0324</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen2-5-32b-instruct-abliterated-v2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=lapa-12b-pt</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-2-70b-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-r1-distill-llama-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=lambda-qwen2-5-14b-dpo-test</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-1-nemotron-70b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwentile2-5-32b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=saka-14b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=awqward2-5-32b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen2-5-32b-instruct-cft</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=tempmotacilla-cinerea-0308</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=ezo-qwen2-5-32b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=oxyge1-33b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=saka-1-5b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=dolphin3-0-r1-mistral-24b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen2-5-math-7b-cft</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-r1-redistill-qwen-7b-v1-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-r1-redistill-llama3-8b-v1-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-7b-it</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-2-7b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-3-5-flash</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-r1-distill-qwen-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o3-mini</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-r1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-3-5-turbo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o4-mini-2025-04-16</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=minimax-m2-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-72b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=seallms-v3-7b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o3-2025-04-16</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-0-flash-001</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-5-27b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-1-5-14b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=kimi-k2-0905</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-5-35b-a3b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-nemo-instruct-2407</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=yi-1-5-9b-32k</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o3-mini-2025-01-31</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-max</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=minimax-m2-7</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=reka-flash-3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-nemo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-5-122b-a10b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-small-3-instruct-2501</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=seallm-7b-v2-5</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-2b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-large-3-2512</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-72b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v4-flash</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-r1-0528-qwen3-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nova-pro-v1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-3-5-haiku-20241022</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-4-fast</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nova-2-lite-v1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v3-1-terminus</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=ministral-3-8b-2512</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-3-flash</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-6-27b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=free-evo-qwen72b-v0-8-re</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=yi-1-5-34b-32k</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-70b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=jamba-large-1-7</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-vl-235b-a22b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=hunyuan-a13b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-1-405b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-2b-it</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-4-3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-flash-preview-04-17</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=minimax-m2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-pro-preview-03-25</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-2-9b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=command-a</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mimo-v2-flash</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=smollm2-1-7b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-4-5v</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=hermes-4-405b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-code-fast-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-4-1v-9b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-2-7b-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-3-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-5-turbo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-72b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=olmo-2-1124-7b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-vl-30b-a3b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-7b-v0-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=yi-1-5-34b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=phi-4-mini</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=olmo-2-1124-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=ernie-4-5-300b-a47b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-0-flash-lite-001</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nemotron-3-super-120b-a12b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v3-2-speciale</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=doubao-1-5-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-3-1-flash-lite</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-1-405b-instruct-fp8</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=devstral-small-2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4o-mini-2024-07-18</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-flash-lite-preview-06-17</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qvq-72b-preview</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-2-13b-chat-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-2-2b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-4-20</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=phi-3-5-mini-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-3-codex</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-4-5</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mixtral-8x7b-instruct-v0-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4o-2024-11-20</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-1-codex</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-r1-0528</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=step-3-5-flash</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=minimax-m3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4o-2024-05-13</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-4-20-0309</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-1-405b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-coder-32b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-7b-instruct-v0-2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-3-1b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=intellect-3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-1-codex-mini</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-4-6v</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4o-2024-08-06</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-2-13b-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwq-32b-preview</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-vl-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mimo-v2-5-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=hermes-3-llama-3-1-70b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-5-20251101</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mixtral-8x22b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v3-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4-20250514</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-fable-5</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-6-35b-a3b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nova-lite-v1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=kimi-k2-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=ministral-8b-instruct-2410</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-large-3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-3-5-haiku</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-4-20-beta-0309</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=ministral-3-14b-2512</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen3-6-flash</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=trinity-large-preview</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-2-codex</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-vl-4b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mixtral-8x7b-v0-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-vl-235b-a22b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mimo-v2-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=muse-spark</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-1-5-4b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-235b-a22b-2507</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-7b-instruct-v0-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-5v-turbo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-1-5-1-8b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-vl-32b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-7b-v0-3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=phi-2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-vl-4b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-medium-3-5</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-vl-32b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-7b-instruct-v0-3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-coder-7b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-1-5-7b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-vl-8b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-1-1-7b-it</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-large-2-nov-instruct-2411</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nova-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-max</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4-turbo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mimo-v2-5</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-4-0709</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-1-20250805</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=magistral-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=seed-oss-36b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=olmo-3-32b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-max-preview</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen3-coder-next</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=phi-3-mini-4k-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-30b-a3b-2507</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=ling-flash-2-0</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=human-expert</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=hy3-preview</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=teuken-7b-instruct-v0-4</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=ring-1t</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=zephyr-7b-beta</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o1-mini</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-7-max</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-medium-3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=step-3-7-flash</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nova-premier</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nova-lite</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-small-3-2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=kat-coder-pro-v1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=devstral-2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-4-maverick-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=median-human</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=c4ai-command-r-v01</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o1-preview</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=c4ai-command-r-08-2024</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-1-5-flash-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-sw3-20b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-1-nemotron-ultra-253b-v1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-codex</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-large-2-jul</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=jt-mini</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-small-3-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-6-max-preview</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-coder-480b-a35b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-pro-preview-06-05</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=phi-1-5</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=starling-lm-7b-beta</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-haiku-4-5-20251001</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=granite-3-0-8b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o1-2024-12-17</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-large-2411</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-vl-7b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=ling-1t</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nova-micro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=minimax-m1-80k</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-vl-32b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mimo-v2-omni</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-7-plus</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=solar-open-100b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=yi-1-5-34b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-3n-e4b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-2-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mercury-2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=yi-34b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=devstral-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=lfm2-8b-a1b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-nemo-minitron-8b-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-2-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-4-12b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-1-tulu-3-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=command-r-plus</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nvidia-nemotron-3-super</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-3n-e2b-it-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=falcon-h1r-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=yi-6b-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-5-instant</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=falcon-11b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=ling-2-6-1t</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=lfm2-24b-a2b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nemotron-3-nano-omni-30b-a3b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4-5-preview</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=solar-10-7b-instruct-v1-0</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-pro-preview-05-06</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-5-plus</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=step3-vl-10b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen3-omni-30b-a3b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-3-5-sonnet-20240620</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=ling-2-6-flash</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nemotron-3-ultra-550b-a55b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=sarvam-105b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=sarvam-30b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=solar-pro-3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-8b-ita</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=yi-1-5-9b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=kat-coder-pro-v2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=ring-2-6-1t</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-7b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=longcat-flash-lite</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-small-4</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=molmo2-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=granite-4-1-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-3-70b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-1-codex-max</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=phi-3-medium-4k-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nanbeige4-1-3b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-0-flash-preview</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-3-opus-20240229</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-20250514</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen3-5-omni-plus</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=phi-3-mini-128k-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen3-coder</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=step-3-5-flash-2603</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=trinity-large</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mixtral-8x22b-instruct-v0-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-2-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4-0613</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=falcon-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4-preview-1106</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-4-maverick-instruct-fp8</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-pro-03-25</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=wizardlm-2-8x22b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-3-haiku-20240307</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-medium-3-1-2508</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-0-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen2-5-omni-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-small-3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=chatglm2-6b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-flash-preview-09-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=yi-1-5-6b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=yi-34b-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4-0314</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-large-2-jul-instruct-2407</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=jamba-1-5-large</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-2-90b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-1-5-72b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-2-13b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=internlm2-chat-20b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=suzume-llama-3-8b-multilingual</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=yi-6b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-2026-03-05</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=neuralllama-3-8b-orpo-v0-3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=daredevil-8b-abliterated</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-8b-instruct-abliterated-v2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=alphamonarch-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-3-5-sonnet-june-2024</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4-turbo-2024-04-09</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=kimi-k2-instruct-0905</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-chatgpt</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=internlm2-chat-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=openchat-3-5-0106</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=phi-4-multimodal-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=falcon-7b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=neuralllama-3-8b-dt-v0-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-max</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=phi-3-medium-128k-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o1-mini-2024-09-12</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-3-5-turbo-0613</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=jamba-1-5-mini</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=stablelm-2-1-6b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-large</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-4-9b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-3-sonnet-20240229</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-flash-lite-preview-09-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-small-3-1-2503</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o3-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-14b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-max-preview</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-instant</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=granite-3-0-2b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=arcee-trinity-large</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=longcat-flash-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=phi-3-5-moe-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-small-3-2-2506</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=minimax-m1-40k</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-1-5-72b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=pixtral-large</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-flash-preview-05-20</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen3-5-flash</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-medium-3-2505</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-3-mini-beta</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=chatglm3-6b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-65b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=falcon-40b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=kimi-linear-48b-a3b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=internlm2-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-beta</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-small-instruct-2409</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-1-5-flash-001</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=seed-2-0-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=phi-3-small-8k-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=zephyr-7b-alpha</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-0-pro-preview-02-05</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=granite-3-0-2b-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-1-supernova-lite</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llamantino-3-anita-8b-inst-dpo-ita</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=maestrale-chat-v0-4-beta</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=lexora-medium-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-8b-4bit-ultrachat-ita</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=volare</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=occiglot-7b-it-en-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llamantino-2-7b-hf-ita</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-3-5-turbo-0125</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-0-flash-lite</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-5-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=command-r</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=hermes-2-pro-mistral-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=neuraldaredevil-8b-abliterated</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=salamandra-2b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-4-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=openchat-3-6-8b-20240522-2024-05-22</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-flash-preview</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-coder-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=command-r-plus-08-2024</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=sonar-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=sonar</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=tulu-2-13b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=tulu-2-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=minimax-text-01</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-coder-6-7b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-large-2402</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-small-4-2603</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-1-5-pro-001</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mixtral-8x7b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-2-13b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=codellama-34b-instruct-hf</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-3-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-13b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=text-davinci-001</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=vicuna-13b-v1-3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=jamba-1-6-large</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=aya-vision-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=hermes-2-theta-llama-3-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama3-chatqa-1-5-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=configurable-yi-1-5-9b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=configurable-hermes-2-pro-llama-3-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=yi-1-5-9b-chat-16k</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=faro-yi-9b-dpo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=configurable-janus-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=calme-3-2-instruct-78b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=starcoder2-15b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mixtral-8x22b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-mythos-preview</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=tulu-2-dpo-70b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=openhermes-2-5-mistral-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=dolly-v2-12b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mpt-30b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mpt-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=jamba-1-6-mini</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4-turbo-preview</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nous-hermes-2-solar-10-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=openbuddy-yi1-5-9b-v21-1-32k</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=neuralsynthesis-7b-v0-4-slerp</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v2-5</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=starcoder2-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=kimi-k2-0711</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-0-flash-01-21</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mixtral-8x7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=yi-9b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=command-r7b-12-2024</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-3-5-turbo-0301</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-3-5-turbo-1106</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mpt-7b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=vicuna-7b-v1-3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=internlm2-20b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-235b-a22b-fp8</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=ernie-4-5-21b-a3b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-small</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=opencoder-8b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-3-5</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-coder-1-3b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=yi-coder-9b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=falcon-180b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mpt-30b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=dbrx-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=tulu-2-dpo-13b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=tulu-2-dpo-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=yi-34b-200k</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-instruct-8b-simpo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=internlm2-base-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=internlm2-5-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=dbrx</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=codeqwen1-5-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=opencoder-1-5b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-small-2402</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=smollm-1-7b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=smollm-135m-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-turbo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-2-70b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-llm-7b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=granite-8b-code-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o1-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nous-hermes-2-mixtral-8x7b-dpo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=stablebeluga2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=baichuan-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-3-7-sonnet-february-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-max-2026-01-23</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-j-6b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-3-deep-think</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=minimax-m1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-coder-32b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-14b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-llm-67b-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=baichuan-2-13b-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-llm-7b-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=internlm-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-1-instant</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-2-9b-it-simpo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=minicpm-sala</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=infinity-instruct-7m-gen-llama3-1-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=infinity-instruct-3m-0625-llama3-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=starling-lm-7b-beta-expo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=infinity-instruct-3m-0625-yi-1-5-9b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mammoth2-8x7b-plus</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=yi-coder-9b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-7b-openorca</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=alpaca-native</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=redpajama-incite-7b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-instruct-8b-sppo-iter3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=suzume-llama-3-8b-multilingual-orpo-borda-half</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=dolphin-2-6-mixtral-8x7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=speechless-code-mistral-7b-v1-0</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=starcoder2-3b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-70b-synthia-v3-5</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=reflectioncoder-ds-33b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=reflectioncoder-cl-34b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=internlm-20b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=baichuan-2-7b-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-v3p3-70b-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-large-2407</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v3-march-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4o-may-2024</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-nemo-base-2407</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=cerebras-gpt-13b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=vicuna-13b-v1-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nous-hermes-2-yi-34b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nanbeige2-16b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=internlm2-chat-20b-expo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=tulu-2-dpo-70b-expo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=internlm2-chat-7b-expo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=starling-lm-7b-alpha-expo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-1-5-pro-sept</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=xuanyuan-70b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=luxia-21-4b-alignment-v1-0</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=luxia-21-4b-alignment-v1-2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=barcenas-14b-phi-3-medium-orpo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=sauerkrautlm-phi-3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=dolphin-2-9-1-yi-1-5-9b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=yi-1-5-9b-chat-16k-abliterated</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=carbonbeagle-11b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=free-solar-evo-v0-11</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=free-solar-evo-v0-13</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=yi-1-5-9b-chat-abliterated</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=faro-yi-9b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-8b-instruct-v0-10</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=free-solar-evo-v0-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-8b-instruct-v0-9</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=configurablebeagle-11b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-8b-instruct-v0-8</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=neuralsynthesis-7b-v0-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=dzakwan-moe-4x7b-beta</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=beyonder-4x7b-v3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-small-instruct-2409-abliterated</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mixtao-7bx2-moe-v8-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=multi-verse-model</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=bigyi-15b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=calme-4x7b-moe-v0-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=calme-4x7b-moe-v0-2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gem2-llamion-14b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mammoth2-7b-plus</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=dolphin-2-9-2-phi-3-medium-abliterated</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=dolphin-2-9-2-phi-3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-8b-magpie-align-v0-3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-instruct-8b-sppo-iter3-simpo-merge</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=chinese-alpaca-2-13b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-instruct-8b-simpo-sppo-iter3-merge</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gem2-llamion-14b-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-8b-prolong-512k-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-8b-prolong-64k-instruct</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-instruct-8b-cpo-simpo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=suzume-llama-3-8b-multilingual-orpo-borda-full</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=suzume-llama-3-8b-multilingual-orpo-borda-top75</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-instruct-8b-sppo-iter2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gem2-llamion-14b-longchat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=infinity-instruct-7m-0729-llama3-1-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-nectar-dpo-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=seallm-7b-v2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=suzume-llama-3-8b-multilingual-orpo-borda-top25</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=configurable-llama-3-8b-v0-3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral7b-pairrm-sppo-expo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=neuralllama-3-8b-orpo-v0-4</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-8b-synthia-v3-5</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-8b-magpie-align-sft-v0-3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral-orpo-capybara-7k</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral7b-pairrm-sppo-iter2</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mistral7b-pairrm-sppo-iter3</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=tess-2-0-llama-3-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gritlm-7b-kto</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=ghost-8b-beta-1608</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=bigstral-12b-32k</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-swallow-8b-instruct-v0-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=openchat-3-5-0106-laser</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=rombos-replete-coder-instruct-8b-merged</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=juud-mistral-7b-dpo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=chinese-llama-2-13b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=configurablehermes-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=configurable-mistral-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=janus-orpo-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-8b-prolong-64k-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-8b-orpo-v0-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=codeninja-1-0-openchat-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-8b-prolong-512k-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=synatra-7b-v0-3-rp</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=rombos-replete-coder-llama3-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=megabeam-mistral-7b-512k</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=megabeam-mistral-7b-300k</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=dolphin-2-9-1-mixtral-1x22b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=openbuddy-mistral-22b-v21-1-32k</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=armorm-llama3-8b-v0-1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-coder-6-7b-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-coder-1-3b-base</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-coder-480b-a35b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=internlm-20b-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-1-8b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-chatgpt-latest</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-4-preview-0125</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=kimi-k2-turbo</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=stablelm-tuned-alpha-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-pro-march-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-5</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=open-llama-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=opt-13b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-neox-20b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nanbeige4-3b-thinking-2511</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=orca-2-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=baidu-ernie-5-0</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nous-hermes-llama-2-7b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-1-0-pro-002</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=intern-s1</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=opt-1-3b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-0-flash-february-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-1-chatgpt</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-3-70b-it</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=llama-3-1-8b-it</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-2-5-7b-it</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-4-1-fast-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-oss-20b-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=olmo-3-1-32b-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-oss-20b-low</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=olmo-3-7b-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-mini-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-flash-lite-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-nano-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-mini-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-4b-2507-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=tiny-aya-global</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-14b-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-3-flash-preview-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-mini-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-nano-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-8b-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-flash-lite-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-next-80b-a3b-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-nano-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=apertus-70b-instruct-2509</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-0-6b-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4-5-20250929-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-flash-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-mini-low</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-nano-low</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=lfm2-1-2b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-flash-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=lfm2-5-1-2b-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-mini-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-nano-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-235b-a22b-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=kimi-k2-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=nemotron-cascade-2-30b-a3b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-30b-a3b-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-2-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-minimal</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-mini-minimal</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-nano-minimal</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-oss-120b-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-1-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v3-2-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=phi-4-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=kimi-k2-5-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-2-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-4-1v-9b-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-3-7-sonnet-20250219-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-vl-235b-a22b-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-vl-30b-a3b-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-30b-a3b-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-xhigh</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-5-xhigh</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-1-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-4-fast-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4-5-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-5-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-4-1-fast-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v3-2-exp-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o4-mini-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-2-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-8b-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o3-mini-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-2-xhigh</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-vl-8b-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-oss-120b-low</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-4-20-beta-0309-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-mini-xhigh</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-vl-4b-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=kimi-k2-5-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-4-20-0309-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v3-2-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-vl-32b-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-5-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-5-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-3-7-sonnet-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-3-5-flash-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=olmo-3-32b-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-14b-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-32b-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-4-fast-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-1-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-6-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-nano-xhigh</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-5-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-3-codex-xhigh</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-30b-a3b-2507-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-5-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-4-20-0309-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-2-low</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-haiku-4-5-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-4-7-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-4-6v-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-4-6v-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-3-pro-preview-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mimo-v2-flash-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-4-6-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=mimo-v2-flash-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-6-adaptive-reasoning-max-effort</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4-6-adaptive-reasoning-max-effort</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-7-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-5-122b-a10b-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-5-122b-a10b-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=kimi-k2-6-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-low</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-mini-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v3-1-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-5-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-5-1-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-nano-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-low</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-max-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4-5-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-5-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=hy3-preview-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-haiku-4-5-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=hy3-preview-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-4-7-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemma-4-31b-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v3-2-exp-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v3-1-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-4-5-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-0-6b-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-4-6-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-4b-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-5-low</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-7-adaptive-reasoning-max-effort</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-1-low</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-3-5-flash-minimal</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-8-adaptive-reasoning-max-effort</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-2-codex-xhigh</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-5-1-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=kimi-k2-6-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=glm-4-7-flash-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o3-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-3-1-pro-preview-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o3-mini-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-5-20251101-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4-6-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-8-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-3-flash-preview-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-6-thinking-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-0-flash-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-nano-low</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-8-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4-6-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-7-xhigh</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-mini-low</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4-6-thinking-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-fable-5-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-3-mini-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-3-codex-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-1-20250805-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-4-20-beta-0309-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-4-20-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o4-mini-2025-04-16-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-5-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-3-flash-preview-minimal</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-pro-xhigh</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-8-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-6-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-3-flash-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o4-mini-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-7-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o4-mini-low</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-haiku-4-5-20251001-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=arcee-trinity-large-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4-5-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-3-mini-low</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o3-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-1-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=grok-4-20-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o3-2025-04-16-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4-20250514-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=qwen-3-5-plus-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o3-mini-low</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-1-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-3-1-pro-preview-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-flash-preview-09-2025-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-0-flash-01-21-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-thinking-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-thinking-16k</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=ernie-4-5-21b-a3b-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o3-low</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o1-2024-12-17-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-5-flash-lite-preview-09-2025-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-thinking-xhigh</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-1-non-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o3-mini-2025-01-31-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-1-thinking-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o3-2025-04-16-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=o1-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-5-pro-xhigh</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v4-pro-max</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=deepseek-v4-flash-max</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-4-thinking-high</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=sauerkrautlm-phi-3-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=dolphin-2-9-2-phi-3-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gpt-5-thinking-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=gemini-2-0-flash-lite-preview</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=lfm-40b</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=kimi-k2-turbo-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4-5-thinking-16k</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4-thinking-16k</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-6-thinking-32k</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4-5-thinking-32k</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-5-thinking-16k</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-opus-4-5-20251101-thinking-32k</loc></url>
  <url><loc>https://aibenchmarks.dev/model?slug=claude-sonnet-4-5-20250929-thinking-32k</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=epoch-ai-eci</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=anthropic-eci-aeci</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=artificial-analysis</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-mmlu-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-gpqa-diamond</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-math-500</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-aime-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-humanity-s-last-exam</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-livecodebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-scicode</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-tau-2-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-terminal-bench-hard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-critpt</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-ifbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=episodic-memory</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-long-context-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gim</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-gdpval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-mmmu-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=webdev-arena</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=design-arena-website</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=design-arena-game-dev</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=design-arena-ui-components</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=design-arena-3d</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=design-arena-svg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=design-arena-data-viz</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=design-arena-image</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=design-arena-graphic-design</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=design-arena-logo</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=design-arena-video</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=design-arena-video-editing</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=design-arena-image-editing</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=design-arena-slides</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=design-arena-text-to-speech</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clockbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=terminal-bench-2-0</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=frontierswe</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=programbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=programbench-almost</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=otis-mock-aime-2024-25</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=math-level-5</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=iumb</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=deepresearchbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kaggle-deepsearchqa-google</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=tau-bench-retail</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=tau-bench-telecom</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=tau3-bench-retail</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=tau3-bench-telecom</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=tau3-bench-airline</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=tau3-bench-banking-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mcp-universe-llm-w-react</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mcp-universe-llm-w-function-calls</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mcp-universe-agent-track</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=the-agent-company</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=factorio-learning-environment</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=yc-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=foodtruckbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openclawprobench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=dabstep</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=klu-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vellum-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=huggingface-open-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=onyx-open-source-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=autobench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=prollm-stackunseen</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=prollm-stackeval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=prollm-q-a-assistant</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=prollm-summarization</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=prollm-image-understanding</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=prollm-entity-extraction</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=prollm-sql-disambiguation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=prollm-llm-as-a-judge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=prollm-openbook-q-a</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=prollm-function-calling</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=prollm-transcription</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=simplebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=skatebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=trackingai</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=trackingai-iq-mensa-norway</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=trackingai-iq-offline</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=trackingai-iq-vision</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=helm-stanford</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=helm-narrativeqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=helm-naturalquestions-open</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=helm-naturalquestions-closed</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=helm-wmt-2014</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=context-arena-mrcr-2-needle</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=context-arena-mrcr-4-needle</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=context-arena-mrcr-8-needle</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=platinumbench-mit</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chatbot-arena-lmarena</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lmarena-vision-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=arena-ai-search-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=arena-ai-document-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=arena-hard-v2-gpt-4-1-judge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=arena-hard-creative-writing</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livecodebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evalplus-humaneval-plus-mbpp-plus</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=swe-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=swe-bench-verified</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=swe-lancer</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kernelbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=weirdml</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=wolfram-llm-benchmarking-project</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multi-docker-eval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=scale-ai-swe-bench-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aider-polyglot-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=bigcodebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livecodebench-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=cybench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=boxpwnr-ctf-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aisi-cyber-tlo-10m</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aisi-cyber-tlo-100m</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aisi-cyber-ctf</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=esobench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=reasonscape-m12x</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=reasonscape-r12</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=benchtable-dubesor</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=canaicode</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=bigcode-models-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=big-code-memorization</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openlm-text2sql-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clerk-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=spacetimedb-llm-benchmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=spacetimedb-llm-benchmark-c</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=spacetimedb-llm-benchmark-rust</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=spacetimedb-llm-benchmark-typescript</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=android-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=arc-agi-2-arc-prize</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=arc-agi-3-arc-prize</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=humanity-s-last-exam</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=frontiermath</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livemathematicianbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=balrog</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=tau-bench-sierra</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agentbench-fc</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=embodiedbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=webarena</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=appworld-normal</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=appworld-challenge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=browsergym-miniwob</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=browsergym-workarena-l1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=browsergym-workarena-l2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mle-bench-openai</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=metr-time-horizons</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=scale-ai-mcp-atlas</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vending-bench-2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vitabench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vending-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=blueprint-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=berkeley-function-calling-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-agent-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gosuevals</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=cot-control-openai</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=spiral-bench-eq-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vectara-hallucination-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=huggingface-hallucinations-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mask-scale-ai</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=rag-hallucination-leaderboard-lechmazur</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=scale-ai-propensitybench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=linuxarena</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=truthfulqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=freshqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=halueval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=halueval-wild</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=factscore</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=selfcheckgpt</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=safetybench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=salad-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=alert</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=phare</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=harmbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=jailbreakbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=strongreject</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=advbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sorry-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=xstest</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=do-not-answer</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=beavertails</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pku-saferlhf</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aegis-safety-benchmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=wildguardtest</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agentharm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agentdojo</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agentdyn</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=os-harm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=wmdp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=biotier</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=b3-bacterial-biothreat-benchmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=abc-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=scimt-safety</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=scisafeeval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sosbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=biorisk-5</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=jailbreakdnabench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=labshield</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=dual-use-biology-uplift-eval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lab-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=biolp-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=bioplanner</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=machiavelli</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ethics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=hhh-alignment</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=trustllm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=realtoxicityprompts</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=toxigen</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=bbq</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=bold</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=stereoset</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=crows-pairs</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=winobias</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mm-safetybench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=safebench-multimodal</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vlguard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=figstep</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mssbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sb-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=crucible</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=flask</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=situational-awareness-dataset-sad</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=shade-arena</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=deceptionbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mm-deceptionbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sycbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=syco-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=elephant</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gaslightbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=or-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=reward-hacking-benchmark-rhb</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sycon-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=brokenmath</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=beacon</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=d-rex</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agentic-misalignment</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agentleak</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=privlm-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=privacybench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=valuebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agent-valuebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=moralbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mm-moralbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=socialharmbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=darkpatterns-llm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=simplesafetytests</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sg-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=falsereject</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=xl-safetybench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=fiction-livebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=eq-bench-creative-writing-v3</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=eq-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=eq-bench-longform-writing</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=divergent-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=asciibench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=asciieval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=babyvision</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=visualpuzzles</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=buzzbench-humour</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=judgemark-v2-1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=videogamebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gameworld-generalist</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gameworld-computer-use</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clem-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mme-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vpct-visual-physics-comprehension</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=zerobench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=scale-ai-vista</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=video-mmmu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=geobench-acw</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=geobench-photos</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=yapbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aragen</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openrca</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gta</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=carlini-applied-llm-benchmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mathvision-math-v</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmmu-benchmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=grab-lite</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sudokubench-single-shot-9x9</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sudokubench-multi-step-4x4</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sudokubench-multi-step-6x6</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sudokubench-single-shot-4x4</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sudokubench-single-shot-6x6</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sudokubench-multi-step-9x9</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=insanitybench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=bullshit-benchmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=nyt-connections-extended</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=nyt-connections-original</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=minebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=magebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-chess-maxim-saplin</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chessbench-llm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=katago-bench-1k</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=game-agent-coding-league</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gacl-battleship</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gacl-tic-tac-toe</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gacl-wizard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gacl-connect4</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gacl-wordmatrix</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gacl-2x8-minichess</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gacl-surround-morris</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multivac-evaluation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sgi-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vellum-gpqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vellum-humaneval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vellum-math</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vellum-aime-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vellum-arc-agi-2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vellum-humanity-s-last-exam</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vellum-livecodebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vellum-mmmlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vals-ai-benchmarks</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=scale-ai-seal-platform</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-eth-zurich</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=scale-ai-enigmaeval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=tpbench-theoretical-physics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=forecastbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=futureeval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=prophetarena</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=predictionarena-polymarket</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=predictionarena-kalshi</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chembench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=virology-capabilities-test</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=scale-ai-tutorbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=scale-ai-scipredict</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=osworld</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gaia</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=scicode</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=alpacaeval-2-0</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=alpacaeval-1-0</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mixeval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=cruxeval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=supergpqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=step-game-lechmazur</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=creative-writing-lechmazur</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=position-bias-lechmazur</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=elimination-game-lechmazur</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=persuasion-lechmazur</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multi-turn-debate-lechmazur</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sycophancy-lechmazur</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=generalization-v1-lechmazur</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=generalization-v2-lechmazur</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=nolima</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=bizfinbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=hlce</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=esolang-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pickmebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pinchbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=prinzbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=bongard-in-wonderland</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gso-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gdpval-openai-evals</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=simpleqa-verified</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chess-puzzles-epoch-ai</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gsm8k</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=big-bench-hard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=arc-challenge-ai2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=hellaswag</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=boolq</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=triviaqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=piqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=winogrande</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openbookqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=posttrainbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=alpha-arena-nof1-ai</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=bbeh</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=zebralogic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=can-llms-falsify</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=math-perturb-hard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=prmbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=olympiadbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=longvideobench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=video-mme</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=video-mme-v2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mlvu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=favor-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=tempcompass</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=visulogic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=physbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seed-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=olmocr-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mllm-as-a-judge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=web-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=codeclash</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=codetransocean-multilingualtrans</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=codetransocean-nichetrans</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=codetransocean-llmtrans</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=codetransocean-dltrans</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=super-nlp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=longproc</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kotlin-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=real-evals</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=snakebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=cubebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=runebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pokeagent-challenge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=amongais</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=werewolf-benchmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=werewolf-benchmark-win-rate</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gg-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=nonobench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=dibimt</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=benczechmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ai-energy-score</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=air-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=superclue</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=buyout-game-lechmazur</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=hal-usaco</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=hal-core-bench-hard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=hal-scienceagentbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=hal-assistantbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=hal-online-mind2web</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=liveoibench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-showdown</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-medical-llm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-swe-atlas-test-writing</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-swe-atlas-codebase-qna</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-agentic-tool-use-enterprise</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-agentic-tool-use-chat</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=react-native-evals</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=judge-arena</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=swe-arena</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=long-code-arena</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=duckdb-nsql</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=webapp1k</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=webapp1k-duo</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lmgame-bench-super-mario-bros</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lmgame-bench-2048</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lmgame-bench-tetris</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lmgame-bench-candy-crush</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lmgame-bench-sokoban</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lmgame-bench-ace-attorney</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=translation-lechmazur</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=deception-effectiveness-lechmazur</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=deception-resistance-lechmazur</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=repairbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=context-bench-filesystem</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-apex-agents</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=context-bench-skills</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mega-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=zeroeval-gpqa-diamond</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=zeroeval-math-500</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ux-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=fasteval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=scievalkit</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-benchmarker-suite</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-r1-eval-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lmarena-preference-proxy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sea-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llmzszl-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=swahili-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-pl-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmlu-by-task-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=naturalbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=yks-2025-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=koffvqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=trail-gaia</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=trail-swe</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sticktoyourrole</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ifeval-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=facts-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=cptu-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=polish-eq-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=humanlikeness-overall</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=shadermatch</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=fmnb-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=subquadratic-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clinicbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-italian-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=compl-ai-board</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=q-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chinesesafe-benchmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-persian-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=rabbits</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=bridgebench-ui</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=bridgebench-security</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=bridgebench-debugging</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=bridgebench-refactoring</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=bridgebench-hallucination</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ocrbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mme</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmeb-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-scienceqa-test</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-pope</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-seed-bench-2-plus</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-coco-captions</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmt-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-a-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mtvqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-ocr-vqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-seed-bench-2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-vcr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-portuguese-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=nphardeval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-trustworthy-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-cot-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-korean-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-chinese-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=la-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-english</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ru-arena-hard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=cybermetric</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=cyberbench-nlp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=tactl</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seccodeplt</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=cyberseceval-3</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=redcode</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=nyu-ctf-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=artifactsbenchmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=bird-interact-c-interact</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=bird-interact-a-interact</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=bird-critic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=orchestrationbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmsi-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=emobench-m</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-aggrefact</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=drop</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mathbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=hindigen</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=arabic-ifeval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=turkish-mmlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=igakuqa119</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pm-llm-benchmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=researcherbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=story-theory-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-public-goods-game</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-emergent-collusion</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ai-chess-leaderboard-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ai-chess-leaderboard-continuation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=score-robustness-accuracy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=score-robustness-consistency</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multilingual-mmlu-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pinocchio-italian-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=arabic-broad-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=darija-chatbot-arena</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kernel-arena-kernelbench-hip</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kernel-arena-waferbench-nvfp4</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=deepswe</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=frontiercode</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chi-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=cusp-science</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=spatialbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ntrex-translation-comet</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=european-llm-leaderboard-accuracy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=french-llm-leaderboard-average</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=icelandic-llm-leaderboard-average</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=piimb-pii-masking-avg-f2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=idea-bench-arena-anonymous</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=inferbench-text-to-image-genai-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evalcrafter-final-sum-score</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chronomagic-bench-gpt4o-mtscore</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-global-mmlu-lite</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-global-mmlu-lite-arabic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-global-mmlu-lite-bengali</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-global-mmlu-lite-burmese</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-global-mmlu-lite-chinese</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-global-mmlu-lite-english</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-global-mmlu-lite-french</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-global-mmlu-lite-german</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-global-mmlu-lite-hindi</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-global-mmlu-lite-indonesian</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-global-mmlu-lite-italian</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-global-mmlu-lite-japanese</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-global-mmlu-lite-korean</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-global-mmlu-lite-portuguese</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-global-mmlu-lite-spanish</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-global-mmlu-lite-swahili</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-global-mmlu-lite-yoruba</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-business</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-health</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-humanities-social-sciences</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-law</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-science-engineering-mathematics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-software-engineering-swe</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-software-engineering-swe-c</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-software-engineering-swe-dart</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-software-engineering-swe-go</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-software-engineering-swe-html</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-software-engineering-swe-java</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-software-engineering-swe-javascript</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-software-engineering-swe-julia</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-software-engineering-swe-kotlin</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-software-engineering-swe-php</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-software-engineering-swe-python</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-software-engineering-swe-r</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-software-engineering-swe-rust</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-software-engineering-swe-swift</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aa-omniscience-software-engineering-swe-typescript</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-brainstorming</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-figurative-language</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-humor</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-problem-solving</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-stem</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-story-narrative</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-analobench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-arastories</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-arn</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-brainteaser</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-c3-crosstalk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-chinese-homophonic-puns</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-conceptual-design</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-cpers</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-creatset</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-crowd-vote</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-cue-word-story</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-data-narrative</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-fann-or-flop</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-fig-qa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-future-ideas</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-futuregen</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-grapheval-ai-researcher</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-grapheval-iclr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-grapheval-review-advisor</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-historical-analogy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-humor-transfer</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-hypobench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-hypogen</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-irfl</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-lcc-metaphor</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-liveideabench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-meta4xnli</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-metaphor-generation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-metaphoric-analogies</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-moh-x</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-mops</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-munch</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-newyorker-humor</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-ocw</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-ocw-connections</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-outline-to-story</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-permpst</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-poetmt</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-pollux-creativity</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-pron-vs-prompt</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-proparalogy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-pun-eval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-puntuguese</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-riddlesense</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-rpgbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-scar</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-schnovel</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-science-analogies</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-scimon</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-sdat</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-showerthoughts</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-simile-generation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-slang-generation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-speak-to-structure</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-ss-gen</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-story-generation-rocstories</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-story-quality</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-thenextchapter</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-tinyfabulist</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-tinystories</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-ttcw</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-twistlist</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=agc-bench-unfun-corpus</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ai-energy-score-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ai-energy-score-text-generation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ai2d</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aj-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=arc-agi-1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=arc-agi-2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=arc-agi-3</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-action-prediction</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-action-recognition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-action-sequencing</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-action-tracing</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-animal-recognition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-audio-3d-angle-estimation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-audio-distance-estimation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-audio-time-estimation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-audio-visual-synchronization</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-bird-recognition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-dance-and-music-matching</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-eating-sound-recognition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-film-and-music-matching</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-gunshot-recognition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-hallucination</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-hazard-recognition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-instrument-recognition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-intricacy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-material-recognition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-melody</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-meme-understanding</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-music-genre-classification</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-music-score-matching</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-music-sentiment-analysis</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-scene-recognition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-singer-recognition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-space</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-speech-sentiment-analysis</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-timbre</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-time</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-tone</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=av-odyssey-transportation-recognition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=afrobench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=aider-polyglot-coding-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=alpha-arena-nof1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=arena-hard-v2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=artificial-analysis-intelligence-index</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=babilong-niah</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=balrog-babaisai-llm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=balrog-babaisai-vlm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=balrog-babyai-llm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=balrog-babyai-vlm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=balrog-crafter-llm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=balrog-crafter-vlm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=balrog-minihack-llm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=balrog-minihack-vlm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=balrog-nethack-llm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=balrog-nethack-vlm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=balrog-textworld-llm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=bird-sql</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=benchtable</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=biggen-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=big-code-memorization-humaneval-pass-1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=big-code-memorization-humaneval-pass-50</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=big-code-memorization-humaneval-et-pass-1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=big-code-memorization-humaneval-et-pass-50</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=binaryaudit</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=blueprint-bench-2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=c4-benchmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=car-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ccbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=claw-eval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clem-adventuregame</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clem-clean-up</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clem-clemscore</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clem-codenames</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clem-deal-or-no-deal</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clem-guesswhat</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clem-hot-air-balloon</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clem-imagegame</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clem-matchit-ascii</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clem-privateshared</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clem-referencegame</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clem-taboo</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clem-textmapworld</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clem-textmapworld-graphreasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clem-textmapworld-specificroom</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clem-wordle</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clem-wordle-with-clue</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clem-wordle-with-critic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=cmmlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=cmt-benchmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=crm-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=crust-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=carwashbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chartmuseum</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chatbot-arena-code</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chatbot-arena-document</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chatbot-arena-image-edit</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chatbot-arena-search</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chatbot-arena-text</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chatbot-arena-text-to-image</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chatbot-arena-vision</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chess-bench-llm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chessimagebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chinese-classical-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chinese-classical-bench-char-gloss-judge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chinese-classical-bench-compress-efficiency</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chinese-classical-bench-fill-in-exact</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chinese-classical-bench-idiom-source-book-em</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chinese-classical-bench-punctuate-punct-f1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chinese-classical-bench-translate-judge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=classeval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clawbench-v1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=clawbench-v2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=codeclash-battlesnake</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=codeclash-core-war</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=codeclash-halite</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=codeclash-poker</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=codeclash-robocode</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=codeclash-robot-rumble</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=codeelo</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=constory-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=confabulation-leaderboard-lechmazur</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=cursorbench-3-1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=deepresearch-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=deepresearch-bench-citation-accuracy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=deepresearch-bench-comprehensiveness</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=deepresearch-bench-effective-citations</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=deepresearch-bench-insight</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=deepresearch-bench-instruction-following</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=deepresearch-bench-readability</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=diplomacy-betrayal-tendency</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=diplomacy-overall-performance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=diplomacy-steerability</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=discox</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ducky-bench-africa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ducky-bench-saxo-frog</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ducky-bench-stabby-quack</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=dystopiabench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=eq-bench-3</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evalita-maia-gen</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evalita-maia-mc</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evalita-admission-test</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evalita-evalita-ner</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evalita-faq</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evalita-hate-speech-detection</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evalita-lexical-substitution</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evalita-relation-extraction</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evalita-sentiment-analysis</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evalita-summarization-fanpage</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evalita-text-entailment</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evalita-word-in-context</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evalita-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evmbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=embodiedbench-alfred</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=embodiedbench-habitat</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=empathybench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=epoch-ai-adversarial-nli</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=epoch-ai-apex-agents</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=epoch-ai-common-sense-qa-2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=epoch-ai-lambada</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=epoch-ai-science-qa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=epoch-ai-superglue</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-albanian</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-albanian-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-albanian-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-albanian-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-albanian-nlu-mms-sq</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-albanian-nlu-multiwikiqa-sq</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-albanian-nlu-scala-sq</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-albanian-nlu-wikiann-sq</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-albanian-summarization-lr-sum-sq</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-bosnian</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-bosnian-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-bosnian-nlu-mms-bs</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-bosnian-nlu-multi-wiki-qa-bs</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-bosnian-nlu-wikiann-bs</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-bosnian-summarization-lr-sum-bs</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-bulgarian</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-bulgarian-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-bulgarian-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-bulgarian-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-bulgarian-nlu-bg-ner-bsnlp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-bulgarian-nlu-cinexio</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-bulgarian-nlu-multiwikiqa-bg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-bulgarian-nlu-scala-bg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-catalan</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-catalan-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-catalan-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-catalan-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-catalan-nlu-guia-cat</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-catalan-nlu-multiwikiqa-ca</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-catalan-nlu-scala-ca</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-catalan-nlu-wikiann-ca</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-catalan-summarization-dacsa-ca</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-croatian</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-croatian-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-croatian-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-croatian-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-croatian-nlu-mms-hr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-croatian-nlu-multi-wiki-qa-hr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-croatian-nlu-scala-hr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-croatian-nlu-wikiann-hr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-czech</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-czech-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-czech-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-czech-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-czech-nlu-cs-gec</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-czech-nlu-csfd-sentiment</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-czech-nlu-poner</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-czech-nlu-sqad</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-czech-summarization-czech-news</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-danish</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-danish-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-danish-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-danish-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-danish-nlu-angry-tweets</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-danish-nlu-dansk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-danish-nlu-multi-wiki-qa-da</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-danish-nlu-scala-da</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-danish-summarization-nordjylland-news</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-dutch</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-dutch-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-dutch-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-dutch-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-dutch-nlu-conll-nl</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-dutch-nlu-dbrd</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-dutch-nlu-squad-nl</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-dutch-nlu-scala-nl</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-dutch-simplification-duidelijke-taal</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-dutch-summarization-wiki-lingua-nl</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-english-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-english-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-english-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-english-nlu-conll-en</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-english-nlu-squad</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-english-nlu-sst-5</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-english-nlu-scala-en</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-english-summarization-cnn-dailymail</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-estonian</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-estonian-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-estonian-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-estonian-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-estonian-nlu-estner</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-estonian-nlu-estonian-valence</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-estonian-nlu-grammar-et</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-estonian-nlu-multiwikiqa-et</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-estonian-summarization-err-news</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-faroese</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-faroese-nlu-fone</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-faroese-nlu-foqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-faroese-nlu-fosent</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-faroese-nlu-scala-fo</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-finnish</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-finnish-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-finnish-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-finnish-nlu-scala-fi</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-finnish-nlu-scandisent-fi</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-finnish-nlu-turku-ner-fi</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-finnish-nlu-tydiqa-fi</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-finnish-summarization-xlsum-fi</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-french</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-french-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-french-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-french-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-french-nlu-allocine</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-french-nlu-eltec</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-french-nlu-fquad</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-french-nlu-scala-fr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-french-summarization-orange-sum</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-german</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-german-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-german-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-german-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-german-nlu-germeval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-german-nlu-germanquad</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-german-nlu-sb10k</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-german-nlu-scala-de</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-german-summarization-mlsum-de</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-greek</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-greek-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-greek-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-greek-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-greek-nlu-elner</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-greek-nlu-greek-sa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-greek-nlu-multiwikiqa-el</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-greek-nlu-scala-el</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-greek-summarization-greek-wikipedia</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-hungarian</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-hungarian-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-hungarian-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-hungarian-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-hungarian-nlu-husst</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-hungarian-nlu-multiwikiqa-hu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-hungarian-nlu-scala-hu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-hungarian-nlu-szeged-ner</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-hungarian-summarization-hunsum</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-icelandic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-icelandic-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-icelandic-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-icelandic-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-icelandic-nlu-hotter-and-colder-sentiment</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-icelandic-nlu-mim-gold-ner</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-icelandic-nlu-nqii</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-icelandic-nlu-scala-is</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-icelandic-summarization-rrn</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-italian</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-italian-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-italian-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-italian-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-italian-nlu-multinerd-it</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-italian-nlu-squad-it</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-italian-nlu-scala-it</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-italian-nlu-sentipolc16</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-italian-summarization-ilpost-sum</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-latvian</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-latvian-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-latvian-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-latvian-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-latvian-nlu-fullstack-ner-lv</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-latvian-nlu-latvian-twitter-sentiment</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-latvian-nlu-multiwikiqa-lv</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-latvian-nlu-scala-lv</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-lithuanian</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-lithuanian-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-lithuanian-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-lithuanian-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-lithuanian-nlu-atsiliepimai</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-lithuanian-nlu-multiwikiqa-lt</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-lithuanian-nlu-scala-lt</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-lithuanian-nlu-wikiann-lt</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-lithuanian-summarization-lrytas</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-norwegian</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-norwegian-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-norwegian-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-norwegian-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-norwegian-nlu-norne-nb</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-norwegian-nlu-norne-nn</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-norwegian-nlu-norec</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-norwegian-nlu-norquad</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-norwegian-nlu-scala-nb</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-norwegian-nlu-scala-nn</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-norwegian-summarization-no-sammendrag</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-polish</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-polish-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-polish-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-polish-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-polish-nlu-kpwr-ner</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-polish-nlu-poquad</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-polish-nlu-polemo2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-polish-nlu-scala-pl</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-polish-summarization-psc</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-portuguese</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-portuguese-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-portuguese-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-portuguese-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-portuguese-nlu-harem</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-portuguese-nlu-multiwikiqa-pt</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-portuguese-nlu-sst-2-pt</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-portuguese-nlu-scala-pt</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-portuguese-summarization-publico</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-romanian</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-romanian-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-romanian-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-romanian-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-romanian-nlu-multiwikiqa-ro</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-romanian-nlu-ronec</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-romanian-nlu-rosent</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-romanian-nlu-scala-ro</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-romanian-summarization-sumo-ro</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-serbian</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-serbian-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-serbian-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-serbian-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-serbian-nlu-mms-sr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-serbian-nlu-multiwikiqa-sr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-serbian-nlu-scala-sr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-serbian-nlu-uner-sr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-serbian-summarization-lr-sum-sr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-slovak</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-slovak-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-slovak-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-slovak-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-slovak-nlu-csfd-sentiment-sk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-slovak-nlu-multi-wiki-qa-sk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-slovak-nlu-scala-sk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-slovak-nlu-uner-sk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-slovene</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-slovene-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-slovene-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-slovene-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-slovene-nlu-multiwikiqa-sl</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-slovene-nlu-ssj500k-ner</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-slovene-nlu-scala-sl</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-slovene-nlu-sentinews</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-spanish</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-spanish-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-spanish-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-spanish-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-spanish-nlu-conll-es</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-spanish-nlu-mlqa-es</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-spanish-nlu-scala-es</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-spanish-nlu-sentiment-headlines-es</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-spanish-summarization-mlsum-es</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-swedish</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-swedish-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-swedish-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-swedish-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-swedish-nlu-multi-wiki-qa-sv</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-swedish-nlu-suc3</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-swedish-nlu-scala-sv</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-swedish-nlu-swerec</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-swedish-summarization-swedn</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-ukrainian</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-ukrainian-common-sense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-ukrainian-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-ukrainian-nlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-ukrainian-nlu-cross-domain-uk-reviews</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-ukrainian-nlu-multiwikiqa-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-ukrainian-nlu-ner-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-ukrainian-nlu-scala-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=euroeval-ukrainian-summarization-lr-sum-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=european-llm-leaderboard-flores200</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=european-llm-leaderboard-flores200-source</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=european-llm-leaderboard-flores200-target</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=european-llm-leaderboard-zero-shot-accuracy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-arc</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-classification</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-ace</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-aeb</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-af</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-ak</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-am</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-apc</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-ar</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-ars</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-ary</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-arz</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-as</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-awa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-ay</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-az</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-ba</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-ban</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-be</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-bem</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-bg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-bho</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-bjn</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-bm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-bn</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-bs</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-ca</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-ceb</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-chm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-ckb</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-crh</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-cs</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-cv</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-cy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-da</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-de</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-doi</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-dz</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-ee</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-el</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-en</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-eo</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-es</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-et</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-eu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-language-fa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-mgsm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-mmlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-translation-from</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-translation-to</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evals-for-every-language-truthfulqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evasionbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=exploitbench-v8-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=factory-code-review-benchmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=fanoutqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=finben-financial-llm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=finben-fns</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=finben-finnum</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=finben-fintext</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=finben-multifin</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=finben-qa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=finsearchcomp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=formationeval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=french-llm-leaderboard-bac-fr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=french-llm-leaderboard-gpqa-fr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=french-llm-leaderboard-ifeval-fr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=frontiercode-diamond</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=frontiercode-extended</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=frontiercode-main</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=frontiermath-tier-4</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=frontiermath-tiers-1-3</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gdpval-aa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gsma-open-telco-3gpp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gsma-open-telco-oran-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gsma-open-telco-telelogs</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gsma-open-telco-telemath</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gsma-open-telco-teleqna</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gsma-open-telco-teletables</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gsma-open-telco-srsran-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gsma-open-telco-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-agent-banking-accuracy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-agent-banking-tsq</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-agent-healthcare-accuracy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-agent-healthcare-tsq</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-agent-insurance-accuracy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-agent-insurance-tsq</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-agent-investment-accuracy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-agent-investment-tsq</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-agent-telecom-accuracy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-agent-telecom-tsq</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-tool-tasks-bfcl-v3-irrelevance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-tool-tasks-bfcl-v3-multi-turn-base-multi-function</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-tool-tasks-bfcl-v3-multi-turn-base-single-function</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-tool-tasks-bfcl-v3-multi-turn-composite</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-tool-tasks-bfcl-v3-multi-turn-long-context</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-tool-tasks-bfcl-v3-multi-turn-missing-function</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-tool-tasks-bfcl-v3-multi-turn-missing-parameter</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-tool-tasks-multi-turn</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-tool-tasks-single-turn</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-tool-tasks-toolace-single-function-call</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-tool-tasks-tau-long-context</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-tool-tasks-xlam-multiple-tool-multiple-call</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-tool-tasks-xlam-multiple-tool-single-call</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-tool-tasks-xlam-single-tool-multiple-call</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-tool-tasks-xlam-single-tool-single-call</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=galileo-tool-tasks-xlam-tool-missing</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gapminder-ai-worldview</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=gorilla-api-bench-bfcl</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=helmet-128k</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=hallusionbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=healthbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=humanlikeness-discourse-1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=humanlikeness-discourse-2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=humanlikeness-meaning-1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=humanlikeness-meaning-2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=humanlikeness-sound-1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=humanlikeness-sound-2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=humanlikeness-syntax-1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=humanlikeness-syntax-2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=humanlikeness-word-1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=humanlikeness-word-2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=idp-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=imo-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=icelandic-llm-arc-challenge-is</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=icelandic-llm-belebele-is</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=icelandic-llm-ged</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=icelandic-llm-inflection</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=icelandic-llm-wikiqa-is</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=icelandic-llm-winogrande-is</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=infibench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=infinitebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kaggle-enterprise-ops-ibm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kaggle-facts-google</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kaggle-facts-grounding</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kaggle-facts-multimodal</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kaggle-facts-parametric</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kaggle-facts-search</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kaggle-game-arena-chess</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kaggle-game-arena-chess-openings</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kaggle-game-arena-four-in-a-row</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kaggle-game-arena-poker-heads-up</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kaggle-game-arena-werewolf</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kaggle-wwtp-engineering</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kluster-hallucination-detection-non-rag-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kluster-hallucination-detection-rag-method-1-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kluster-hallucination-detection-rag-method-2-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kluster-hallucination-detection-rag-method-3-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=kluster-hallucination-detection-rag-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=la-leaderboard-aquas</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=la-leaderboard-copa-spanish</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=la-leaderboard-eusexams-basque</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=la-leaderboard-galcola</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=la-leaderboard-piqa-catalan</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=la-leaderboard-spanish-law-exams</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=la-leaderboard-xnli-galician</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lexam</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-librusechistory</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-librusecmhqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-librusecmhqa-star</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-longcontextmultiq</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-longcontextmultiq-star</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-matreshkanames</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-matreshkanames-star</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-matreshkayesno</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-passkey</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-passkeywithlibrusec</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-ru2wikimultihopqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-ru2wikimultihopqa-star</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-rubabilongqa1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-rubabilongqa2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-rubabilongqa3</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-rubabilongqa3-star</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-rubabilongqa4</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-rubabilongqa5</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-rugsm100</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-ruqasper</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-ruquality</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-rusciabstractretrieval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-ruscifi</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-ruscipassagecount</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-ruscipassagecount-star</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-rutpo</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=libra-rutrec</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-chess-saplin</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-grid-game-benchmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-ai2d</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-aime-2024</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-aime-2026</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-amc-2022-23</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-androidworld-sr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-blink</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-bird-sql-dev</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-browsecomp-long-context-128k</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-browsecomp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-browsecomp-zh</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-c-eval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-cc-ocr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-cmmlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-collie</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-csimpleqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-charxiv-d</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-charxiv-r</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-charadessta</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-chartqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-claw-eval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-codeforces</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-complexfuncbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-countbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-cybergym</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-drop</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-deepplanning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-deepsearchqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-docvqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-docvqatest</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-dynamath</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-eclektic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-erqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-egoschema</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-embspatialbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-fleurs</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-finance-agent</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-gdpval-aa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-graphwalks-bfs-under-128k</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-graphwalks-bfs-over-128k</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-graphwalks-parents-under-128k</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-graphwalks-parents-over-128k</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-hmmt-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-hmmt-feb-26</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-hmmt25</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-hallusion-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-healthbench-hard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-healthbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-hiddenmath</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-imo-answerbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-include</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-infovqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-infovqatest</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-internal-api-instruction-following-hard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-lvbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-longbench-v2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-maxife</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-mcp-atlas</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-mcp-mark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-mlvu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-mlvu-m</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-mm-mt-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-mmbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-mmbench-v1-1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-mmlu-prox</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-mmlu-redux</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-mmlongbench-doc</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-mmmlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-mmstar</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-mrcr-v2-8-needle</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-mrcr-v2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-mrcr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-mvbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-mathvision</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-mathvista</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-mathvista-mini</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-medxpertqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-muirbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-multi-challenge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-multi-if</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-multi-swe-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-multipl-e</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-multilingual-mmlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-nl2repo</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-nova-63</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-natural-questions</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-natural2code</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-ocrbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-ocrbench-v2-en</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-ocrbench-v2-zh</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-odinw</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-ojbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-osworld</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-osworld-verified</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-omnidocbench-1-5</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-openai-mrcr-2-needle-128k</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-openai-mrcr-2-needle-1m</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-polymath</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-realworldqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-refcoco-avg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-refspatialbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-squality</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-swe-lancer-ic-diamond-subset</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-screenspot-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-screenspot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-seal-0</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-simplevqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-social-iqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-textvqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-theoremqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-toolathlon</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-triviaqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-v-star</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-vita-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-vibe-eval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-video-mme</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-videomme-w-sub</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-videomme-w-o-sub</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-videommmu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-wmt24-plus-plus</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-widesearch</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-wild-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-writingbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-zerobench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-zerobench-sub</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-zebralogic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-stats-t2-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-trustworthy-adversarial</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-trustworthy-adversarial-demo</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-trustworthy-ethics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-trustworthy-fairness</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-trustworthy-out-of-distribution</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-trustworthy-privacy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-trustworthy-stereotype</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-trustworthy-toxicity</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llm-wikirace</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llmeval-logic-base</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llmeval-logic-formalization-fixed</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llmeval-logic-formalization-free</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llmeval-logic-hard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llmeval-logic-hard-sub-q</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llmspark-dictator-game</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llmspark-nim-game</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llmspark-prisoner-s-dilemma</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llmspark-trust-game</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llmspark-who-is-spy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=llava-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lol-arena</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lom-benchmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lom-benchmark-bipartitematching</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lom-benchmark-commonneighbor</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lom-benchmark-cycledetection</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lom-benchmark-description</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lom-benchmark-edge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lom-benchmark-entityattribute</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lom-benchmark-entityrelation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lom-benchmark-entitytype</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lom-benchmark-mst</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lom-benchmark-neighbor</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lom-benchmark-pagerank</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lom-benchmark-predecessor</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lom-benchmark-relationbetween</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lom-benchmark-relationpath</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lom-benchmark-shortestpath</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lom-benchmark-summary</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lom-benchmark-treecheck</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lom-benchmark-treelca</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-amps-hard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-code-completion</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-code-generation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-connections</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-consecutive-events</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-integrals-with-game</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-javascript</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-logic-with-navigation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-math-comp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-olympiad</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-paraphrase</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-plot-unscrambling</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-python</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-simplify</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-spatial</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-story-generation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-summarize</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-table-join</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-table-reformat</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-theory-of-mind</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-typescript</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-typos</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=livebench-zebra-puzzle</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=longbench-v2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=longemotion</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=loong</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=lynchmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mcpbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=metr-benchmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=metr-benchmark-80-horizon</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mm-vet</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmbench-v1-1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mme-cc</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmstar</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmtu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmtu-column-relationship</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmtu-column-transform</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmtu-data-cleaning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmtu-kb-mapping</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmtu-nl-2-code</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmtu-table-join</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmtu-table-matching</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmtu-table-qa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmtu-table-transform</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmtu-table-understanding</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=magebench-s1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=magebench-s2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-aime-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-aime-2026</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-apex-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-apex-shortlist-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-arxiv-april</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-arxiv-february</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-arxiv-march</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-arxiv-may</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-arxivlean-march</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-arxiv-false-april</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-arxiv-false-february</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-arxiv-false-march</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-arxiv-false-may</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-arxiv-math-dec-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-arxiv-math-jan-2026</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-brumo-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-cmimc-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-hmmt-feb-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-hmmt-feb-2026</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-hmmt-nov-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-imc-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-imo-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-improofbench-final-answers</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-improofbench-proofs</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-kangaroo-2025-levels-1-2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-kangaroo-2025-levels-11-12</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-kangaroo-2025-levels-3-4</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-kangaroo-2025-levels-5-6</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-kangaroo-2025-levels-7-8</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-kangaroo-2025-levels-9-10</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-project-euler-943-970</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-project-euler-971-984</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-project-euler-985-988</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-putnam-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-smt-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-usamo-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=matharena-usamo-2026</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mathspatial</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mathvision</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mathvista</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-agentclinic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-careqa-en</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-careqa-open</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-head-qa-v2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-healthbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-healthbench-consensus</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-healthbench-hard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-longhealth-task-1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-longhealth-task-2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-m-arc</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medec</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-mmlu-pro-health</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-mtsamples-procedures</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-mtsamples-replicate</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-med-halt-reasoning-fct</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-med-halt-reasoning-nota</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medcalc-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medcasereasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medconceptsqa-easy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medconceptsqa-hard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medconceptsqa-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-meddialog</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medexqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medhallu-easy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medhallu-hard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medhallu-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medmcqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medr-bench-1-turn</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medr-bench-free-turn</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medr-bench-oracle</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medxpertqa-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medxpertqa-understanding</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medbullets-op4</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medbullets-op5</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-medicationqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-metamedqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-pubhealthbench-freeform</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-pubhealthbench-reviewed</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-pubmedqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-sctpublic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-supergpqa-medicine-easy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=medmarks-supergpqa-medicine-hard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mizan-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=monaco</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multilingual-mmlu-arabic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multilingual-mmlu-bengali</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multilingual-mmlu-chinese</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multilingual-mmlu-french</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multilingual-mmlu-german</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multilingual-mmlu-hindi</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multilingual-mmlu-indonesian</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multilingual-mmlu-italian</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multilingual-mmlu-japanese</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multilingual-mmlu-korean</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multilingual-mmlu-portuguese</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multilingual-mmlu-spanish</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multilingual-mmlu-swahili</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multilingual-mmlu-yoruba</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=nphardeval-edp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=nphardeval-gcp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=nphardeval-gcp-d</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=nphardeval-ksp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=nphardeval-msp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=nphardeval-sas</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=nphardeval-spp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=nphardeval-tsp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=nphardeval-tsp-d</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=nyt-connections-older-models</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=nanogpt-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ockbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=olympicarena</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=omni-math</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=onyx-open-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-all</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-alghafa-average</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-alghafa-meta-ar-dialects</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-alghafa-meta-ar-msa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-alghafa-multiple-choice-facts-truefalse-balanced-task</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-alghafa-multiple-choice-grounded-statement-soqal-task</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-alghafa-multiple-choice-grounded-statement-xglue-mlqa-task</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-alghafa-multiple-choice-rating-sentiment-no-neutral-task</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-alghafa-multiple-choice-rating-sentiment-task</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-alghafa-multiple-choice-sentiment-task</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-exams</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-accounting-university</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-arabic-language-general</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-arabic-language-grammar</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-arabic-language-high-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-arabic-language-middle-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-arabic-language-primary-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-average</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-biology-high-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-civics-high-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-civics-middle-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-computer-science-high-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-computer-science-middle-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-computer-science-primary-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-computer-science-university</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-driving-test</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-economics-high-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-economics-middle-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-economics-university</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-general-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-general-knowledge-middle-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-general-knowledge-primary-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-geography-high-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-geography-middle-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-geography-primary-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-abstract-algebra</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-anatomy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-astronomy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-average</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-business-ethics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-clinical-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-college-biology</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-college-chemistry</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-college-computer-science</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-college-mathematics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-college-medicine</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-college-physics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-computer-security</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-conceptual-physics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-econometrics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-electrical-engineering</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-elementary-mathematics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-formal-logic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-global-facts</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-high-school-biology</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-high-school-chemistry</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-high-school-computer-science</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-high-school-european-history</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-high-school-geography</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-high-school-government-and-politics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-high-school-macroeconomics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-high-school-mathematics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-high-school-microeconomics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-high-school-physics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-high-school-psychology</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-high-school-statistics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-high-school-us-history</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-high-school-world-history</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-human-aging</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-human-sexuality</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-international-law</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-jurisprudence</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-logical-fallacies</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-machine-learning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-management</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-marketing</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-medical-genetics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-miscellaneous</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-moral-disputes</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-moral-scenarios</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-nutrition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-philosophy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-prehistory</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-professional-accounting</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-professional-law</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-professional-medicine</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-professional-psychology</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-public-relations</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-security-studies</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-sociology</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-us-foreign-policy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-virology</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-ht-world-religions</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-history-high-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-history-middle-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-history-primary-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-islamic-studies</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-islamic-studies-high-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-islamic-studies-middle-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-islamic-studies-primary-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-law-professional</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-management-university</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-math-primary-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-natural-science-middle-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-natural-science-primary-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-philosophy-high-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-physics-high-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-political-science-university</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-social-science-middle-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-arabic-mmlu-social-science-primary-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-aratrust-average</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-aratrust-ethics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-aratrust-illegal</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-aratrust-mentalhealth</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-aratrust-offensive</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-aratrust-physicalhealth</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-aratrust-privacy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-aratrust-trustfulness</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-aratrust-unfairness</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-madinah-qa-arabic-language-general</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-madinah-qa-arabic-language-grammar</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-arabic-llm-madinah-qa-average</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-chinese-llm-arc-challenge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-chinese-llm-c-eval-semantic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-chinese-llm-cmmlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-chinese-llm-gsm8k</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-chinese-llm-hellaswag</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-chinese-llm-truthfulqa-mc</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-chinese-llm-winogrande</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-cot-lsat-analytical-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-cot-lsat-logical-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-cot-lsat-reading-comprehension</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-cot-logiqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-cot-logiqa-2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-aio-char-f1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-alt-e-to-j-bert-score-ja-f1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-alt-e-to-j-bleu-ja</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-alt-e-to-j-comet-wmt22</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-alt-j-to-e-bert-score-en-f1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-alt-j-to-e-bleu-en</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-alt-j-to-e-comet-wmt22</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-cg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-commonsensemoralja-exact-match</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-el</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-fa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-he</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-jamp-exact-match</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-janli-exact-match</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-jcommonsenseqa-exact-match</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-jemhopqa-char-f1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-jmmlu-exact-match</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-jnli-exact-match</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-jsem-exact-match</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-jsick-exact-match</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-jsts-pearson</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-jsts-spearman</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-kuci-exact-match</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-mc</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-mmlu-en-exact-match</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-mr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-mt</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-mbpp-pylint-check</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-nli</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-niilc-char-f1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-qa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-rc</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-sum</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-wiki-coreference-set-f1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-wiki-dependency-set-f1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-wiki-ner-set-f1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-wiki-pas-set-f1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-wiki-reading-char-f1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-wikicorpus-e-to-j-bert-score-ja-f1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-wikicorpus-e-to-j-bleu-ja</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-wikicorpus-e-to-j-comet-wmt22</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-wikicorpus-j-to-e-bert-score-en-f1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-wikicorpus-j-to-e-bleu-en</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-wikicorpus-j-to-e-comet-wmt22</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-xlsum-ja-bert-score-ja-f1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-xlsum-ja-bleu-ja</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-xlsum-ja-rouge1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-xlsum-ja-rouge2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-japanese-llm-xlsum-ja-rougelsum</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-level-elementary-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-level-elementary-school-avg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-level-high-school</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-level-high-school-avg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-level-undergraduate</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-level-undergraduate-avg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-overall-avg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-subject-algebra</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-subject-algebra-avg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-subject-analytic-geometry</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-subject-analytic-geometry-avg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-subject-arithmetic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-subject-arithmetic-avg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-subject-graph-theory</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-subject-graph-theory-avg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-subject-plane-geometry</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-subject-plane-geometry-avg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-subject-puzzle-test</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-subject-puzzle-test-avg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-subject-scientific-figure</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-subject-scientific-figure-avg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-subject-solid-geometry</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-subject-solid-geometry-avg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-subject-statistics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-dynamath-subject-statistics-avg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-logicvista</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-logicvista-deductive</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-logicvista-inductive</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-logicvista-mechanical</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-logicvista-numerical</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-logicvista-spatial</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath-difficulty-easy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath-difficulty-hard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath-difficulty-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath-knowledge-l1-functions</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath-knowledge-l1-properties-of-shapes</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath-knowledge-l1-transformations-of-shapes</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath-knowledge-l2-acute-angle-trigonometric-functions</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath-knowledge-l2-circle</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath-knowledge-l2-intersecting-and-parallel-lines</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath-knowledge-l2-inverse-proportional-function</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath-knowledge-l2-linear-function</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath-knowledge-l2-quadratic-function</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath-knowledge-l2-quadrilateral</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath-knowledge-l2-similarity-of-shapes</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath-knowledge-l2-symmetry-of-shapes</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath-knowledge-l2-triangle</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath-year-eight</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath-year-nine</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mmmath-year-seven</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathverse</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathverse-analytic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathverse-angle</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathverse-applied</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathverse-area</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathverse-coordinate</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathverse-expression</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathverse-functions</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathverse-length</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathverse-plane-geometry</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathverse-property</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathverse-solid-geometry</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathverse-volume</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvision</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvision-algebra</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvision-analytic-geometry</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvision-arithmetic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvision-combinatorial-geometry</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvision-combinatorics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvision-counting</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvision-descriptive-geometry</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvision-graph-theory</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvision-logic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvision-metric-geometry-angle</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvision-metric-geometry-area</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvision-metric-geometry-length</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvision-solid-geometry</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvision-statistics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvision-topology</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvision-transformation-geometry</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvista</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvista-alg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvista-ari</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvista-fqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvista-geo</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvista-gps</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvista-log</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvista-mwp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvista-num</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvista-sci</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvista-sta</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvista-tqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-mathvista-vqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-olympiadbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-olympiadbench-cn</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-olympiadbench-en</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-olympiadbench-geometry</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-olympiadbench-plane-geometry</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-olympiadbench-probability-and-statistics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-olympiadbench-solid-geometry</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-olympiadbench-trigonometric-functions</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-angles-and-length</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-basic-transformations-of-figures</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-calculation-of-plane-figures</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-calculation-of-solid-figures</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-completemastery-loose</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-completemastery-strict</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-correspondence-of-coordinates-and-positions</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-cutting-and-combining-of-figures</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-direction</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-inadequategeneralization-loose</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-insufficientknowledge-loose</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-one-step-s1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-position</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-rotememorization-loose</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-rotememorization-strict</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-route-map</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-score-loose</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-three-step-s3</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-two-step-s2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-understanding-and-conversion-of-units</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-understanding-of-plane-figures</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-lmm-reasoning-wemath-understanding-of-solid-figures</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-medical-llm-mmlu-anatomy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-medical-llm-mmlu-clinical-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-medical-llm-mmlu-college-biology</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-medical-llm-mmlu-college-medicine</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-medical-llm-mmlu-medical-genetics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-medical-llm-mmlu-professional-medicine</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-medical-llm-medmcqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-medical-llm-medqa-usmle</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-medical-llm-pubmedqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-multilingual-reasoning-13b-mgsm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-multilingual-reasoning-13b-mnumgluesub</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-multilingual-reasoning-13b-msvamp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-multilingual-reasoning-7b-mgsm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-multilingual-reasoning-7b-mnumgluesub</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-multilingual-reasoning-7b-msvamp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-pl-llm-generative</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-pl-llm-multiple-choice</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-pl-llm-rag</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-persian-llm-arc-challenge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-persian-llm-arc-easy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-persian-llm-aut-multiple-choice-persian</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-persian-llm-mmlu-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-persian-llm-part-multiple-choice</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-agent-multi-turn</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-agent-tool-use</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-code-competition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-code-comprehensive</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-knowledge-common-sense</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-knowledge-engineering</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-knowledge-humanities</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-knowledge-science</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-knowledge-social-science</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-llm-agent</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-llm-code</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-llm-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-llm-language</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-llm-math</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-llm-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-language-creation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-language-dialogue</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-language-instruction-following</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-language-nlp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-math-college</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-math-competition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-multimodal-ai2d</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-multimodal-hallusionbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-multimodal-mm-vet</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-multimodal-mmbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-multimodal-mmmu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-multimodal-mmstar</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-multimodal-mathvista</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-multimodal-ocrbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-reasoning-academic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-reasoning-common</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-research-aime-2025</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-research-gpqa-diamond</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-research-hle</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-research-ifeval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-research-livecodebench-v6</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opencompass-research-mmlu-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-ai2d-atom-structure</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-ai2d-eclipses</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-ai2d-faults-and-earthquakes</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-ai2d-food-chains-and-webs</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-ai2d-life-cycles</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-ai2d-moon-phase-and-equinox</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-ai2d-parts-of-a-whole</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-ai2d-parts-of-the-earth</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-ai2d-photosynthesis-and-respiration</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-ai2d-rock-cycle</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-ai2d-rock-strata</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-ai2d-solar-system</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-ai2d-types-of</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-ai2d-volcano</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-ai2d-water-carbon-and-nitrogen-cycle</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-hallusionbench-aacc</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-hallusionbench-facc</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-hallusionbench-qacc</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-llava-bench-complex</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-llava-bench-conv</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-llava-bench-detail</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-ar</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-action-recognition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-attribute-comparison</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-attribute-recognition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-cp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-celebrity-recognition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-fp-c</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-fp-s</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-function-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-future-prediction</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-identity-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-image-emotion</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-image-quality</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-image-scene</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-image-style</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-image-topic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-lr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-nature-relation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-ocr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-object-localization</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-overall</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-physical-property-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-physical-relation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-rr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-social-relation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-spatial-relationship</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-cn-structuralized-imagetext-understanding</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-ar</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-action-recognition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-attribute-comparison</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-attribute-recognition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-cp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-celebrity-recognition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-fp-c</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-fp-s</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-function-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-future-prediction</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-identity-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-image-emotion</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-image-quality</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-image-scene</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-image-style</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-image-topic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-lr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-nature-relation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-ocr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-object-localization</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-overall</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-physical-property-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-physical-relation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-rr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-social-relation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-spatial-relationship</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmbench-v1-1-en-structuralized-imagetext-understanding</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mme-artwork</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mme-celebrity</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mme-code-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mme-cognition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mme-color</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mme-commonsense-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mme-count</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mme-existence</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mme-landmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mme-numerical-calculation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mme-ocr</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mme-perception</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mme-position</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mme-posters</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mme-scene</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mme-text-translation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-accounting</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-agriculture</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-architecture-and-engineering</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-art</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-art-theory</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-art-and-design</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-basic-medical-science</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-biology</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-business</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-chemistry</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-clinical-medicine</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-computer-science</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-design</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-diagnostics-and-laboratory-medicine</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-economics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-electronics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-energy-and-power</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-finance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-geography</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-health-and-medicine</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-history</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-humanities-and-social-science</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-literature</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-management</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-marketing</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-materials</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-math</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-mechanical-engineering</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-music</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-pharmacy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-physics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-psychology</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-public-health</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-science</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-sociology</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmmu-tech-and-engineering</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmstar-coarse-perception</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmstar-fine-grained-perception</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmstar-instance-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmstar-logical-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmstar-math</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mmstar-science-and-technology</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mathvista-alg</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mathvista-ari</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mathvista-fqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mathvista-geo</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mathvista-gps</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mathvista-log</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mathvista-mwp</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mathvista-num</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mathvista-sci</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mathvista-sta</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mathvista-tqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-mathvista-vqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openvlm-ocrbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=opper-taskbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pact-lechmazur</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pm-agi</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pm-agi-category-action-based</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pm-agi-category-critical-thinking</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pm-agi-category-google-ads</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pm-agi-category-meta-ads</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pm-agi-difficulty-easy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pm-agi-difficulty-hard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pm-agi-difficulty-medium</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pm-agi-question-type-action-based</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pm-agi-question-type-mcq</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pm-agi-reasoning-type-adversarial</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pm-agi-reasoning-type-creative-strategy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pm-agi-reasoning-type-diagnostic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pm-agi-reasoning-type-quantitative</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pm-agi-reasoning-type-recall</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-country</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-double-choco</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-firefly</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-heyawake</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-hitori</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-kurodoko</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-lits</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-light-up</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-mashu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-norinori</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-nurikabe</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-nurimaze</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-nurimisaki</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-sashigane</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-shakashaka</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-shikaku</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-slitherlink</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-sudoku</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-tapa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pencil-puzzle-bench-yajilin</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pinocchio-italian-cultura</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pinocchio-italian-diritto</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pinocchio-italian-generale</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pinocchio-italian-lingua-straniera</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pinocchio-italian-logica</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pinocchio-italian-matematica-e-scienze</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=pubmedqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=qzhou-flowchart-qa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=rbench-v</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=rpc-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ruler</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=researchcodebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=rubberduckbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=salad-bench-attack</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=salad-bench-base</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sea-llm-leaderboard-seabench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sea-llm-leaderboard-seaexam</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sea-helm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-adversarial-robustness</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-arabic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-audiomultichallenge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-audiomultichallenge-text-output</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-chinese</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-coding</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-enigmaeval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-fortress</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-humanity-s-last-exam</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-instruction-following</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-japanese</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-korean</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-mask</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-mcp-atlas</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-math</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-multichallenge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-multinrc</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-professional-reasoning-benchmark-finance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-professional-reasoning-benchmark-legal</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-propensitybench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-remote-labor-index-rli</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-swe-bench-pro-private-dataset</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-swe-bench-pro-public-dataset</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-scipredict</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-spanish</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-tutorbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-vista</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-visualtoolbench-vtb</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seedbench-img</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sgi-bench-deep-research</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sgi-bench-dry-experiment</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sgi-bench-experimental-reasoning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sgi-bench-idea-generation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sgi-bench-wet-experiment</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sibench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=slm-rag-arena</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=smdd-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=smdd-bench-diversity</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=squad-v1-1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=squad-v2-0</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=swe-lancer-diamond</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=swe-rebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=science-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-cross-lingual-consistency-cross-logiqa-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-cross-lingual-consistency-cross-logiqa-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-cross-lingual-consistency-cross-mmlu-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-cross-lingual-consistency-cross-mmlu-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-cross-lingual-consistency-cross-xquad-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-cross-lingual-consistency-cross-xquad-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-cultural-reasoning-cn-eval-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-cultural-reasoning-cn-eval-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-cultural-reasoning-ph-eval-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-cultural-reasoning-ph-eval-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-cultural-reasoning-sg-eval-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-cultural-reasoning-sg-eval-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-cultural-reasoning-sg-eval-v1-cleaned-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-cultural-reasoning-sg-eval-v1-cleaned-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-cultural-reasoning-sg-eval-v2-mcq-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-cultural-reasoning-sg-eval-v2-open-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-cultural-reasoning-us-eval-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-cultural-reasoning-us-eval-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-dialogue-dream-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-dialogue-dream-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-dialogue-dialogsum-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-dialogue-samsum-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-emotion-indoemotion-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-emotion-indoemotion-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-emotion-sst-2-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-emotion-sst-2-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-flores-translation-chinese-to-english-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-flores-translation-chinese-to-english-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-flores-translation-indonesian-to-english-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-flores-translation-indonesian-to-english-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-flores-translation-malay-to-english-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-flores-translation-malay-to-english-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-flores-translation-vietnamese-to-english-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-flores-translation-vietnamese-to-english-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-fundamental-nlp-tasks-c3-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-fundamental-nlp-tasks-c3-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-fundamental-nlp-tasks-cola-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-fundamental-nlp-tasks-cola-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-fundamental-nlp-tasks-mnli-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-fundamental-nlp-tasks-mnli-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-fundamental-nlp-tasks-mrpc-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-fundamental-nlp-tasks-mrpc-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-fundamental-nlp-tasks-ocnli-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-fundamental-nlp-tasks-ocnli-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-fundamental-nlp-tasks-qnli-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-fundamental-nlp-tasks-qnli-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-fundamental-nlp-tasks-qqp-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-fundamental-nlp-tasks-qqp-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-fundamental-nlp-tasks-rte-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-fundamental-nlp-tasks-rte-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-fundamental-nlp-tasks-wnli-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-fundamental-nlp-tasks-wnli-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-multilingual-reasoning-c-eval-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-multilingual-reasoning-c-eval-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-multilingual-reasoning-cmmlu-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-multilingual-reasoning-cmmlu-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-multilingual-reasoning-indommlu-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-multilingual-reasoning-indommlu-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-multilingual-reasoning-mmlu-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-multilingual-reasoning-mmlu-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-multilingual-reasoning-zbench-few-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seaeval-multilingual-reasoning-zbench-zero-shot</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seephys</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=slopcodebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=spacevista</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=spider-2-0-dbt</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=spider-2-0-lite</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=spider-2-0-snow</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=spiral-bench-delusion</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=spreadsheetbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=sveltebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=t-eval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=tablebench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=tau-bench-airline</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=taxcalcbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=trackingai-iq-test</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=trackingai-iq-test-mensa-norway</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=trackingai-iq-test-offline</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=trackingai-iq-test-vision</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ugi-natural-intelligence</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ugi-willingness-w-10</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ugi-writing</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ugi-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-arc-challenge-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-arc-easy-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-belebele-ukr-cyrl</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-flores-crh-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-flores-cs-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-flores-de-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-flores-en-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-flores-hu-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-flores-pl-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-flores-ro-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-flores-ru-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-flores-sk-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-flores-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-flores-uk-crh</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-flores-uk-cs</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-flores-uk-de</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-flores-uk-en</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-flores-uk-hu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-flores-uk-pl</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-flores-uk-ro</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-flores-uk-ru</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-flores-uk-sk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-gsm8k-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-abstract-algebra</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-anatomy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-astronomy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-business-ethics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-clinical-knowledge</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-college-biology</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-college-chemistry</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-college-computer-science</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-college-mathematics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-college-medicine</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-college-physics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-computer-security</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-conceptual-physics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-econometrics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-electrical-engineering</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-elementary-mathematics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-formal-logic</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-global-facts</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-high-school-biology</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-high-school-chemistry</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-high-school-computer-science</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-high-school-european-history</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-high-school-geography</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-high-school-government-and-politics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-high-school-macroeconomics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-high-school-mathematics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-high-school-microeconomics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-high-school-physics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-high-school-psychology</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-high-school-statistics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-high-school-us-history</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-high-school-world-history</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-human-aging</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-human-sexuality</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-humanities</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-international-law</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-jurisprudence</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-logical-fallacies</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-machine-learning</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-management</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-marketing</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-medical-genetics</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-miscellaneous</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-moral-disputes</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-moral-scenarios</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-nutrition</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-other</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-philosophy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-prehistory</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-professional-accounting</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-professional-law</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-professional-medicine</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-professional-psychology</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-public-relations</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-security-studies</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-social-sciences</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-sociology</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-stem</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-us-foreign-policy</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-virology</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-global-mmlu-full-uk-world-religions</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-ifeval-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-long-flores-crh-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-long-flores-cs-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-long-flores-de-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-long-flores-en-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-long-flores-hu-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-long-flores-pl-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-long-flores-ro-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-long-flores-ru-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-long-flores-sk-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-long-flores-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-long-flores-uk-crh</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-long-flores-uk-cs</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-long-flores-uk-de</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-long-flores-uk-en</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-long-flores-uk-hu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-long-flores-uk-pl</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-long-flores-uk-ro</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-long-flores-uk-ru</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-long-flores-uk-sk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-squad-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-triviaqa-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-wmt-en-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-winogrande-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-xlsum-uk</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-zno-uk-geography</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-zno-uk-history</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-zno-uk-language-and-literature</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ukrainian-llm-zno-uk-math</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vgbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vpct</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vals-ai-vals-index</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vals-ai-caselaw-v2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vals-ai-corpfin-v2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vals-ai-finance-agent</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vals-ai-ioi</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vals-ai-legalbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vals-ai-mgsm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vals-ai-medcode</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vals-ai-medqa</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vals-ai-medscribe</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vals-ai-mortgagetax</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vals-ai-multimodal-index</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vals-ai-poker-agent</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vals-ai-proofbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vals-ai-sage</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vals-ai-taxeval-v2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vals-ai-terminal-bench-2-0</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vals-ai-vibe-code-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-arc-c</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-arc-e</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-agentharm</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-agentharm-benign</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-drop</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-gaia</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-gpqa-d</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-gsm8k</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-hellaswag</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-humaneval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-ifeval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-in-house-ctf</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-intercode-ctf</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-math</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-mmlu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-mmlu-pro</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-mmmu-mc</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-mmmu-oe</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-swe-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vector-eval-winogrande</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=virology-benchmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=voxelbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=wdcd</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=wdcd-r1-understanding</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=wdcd-r2-in-document-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=wdcd-r3-pressure-integrity</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=wildbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=vellum-llm-leaderboard-multi-benchmark</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openlm-text2sql-wikisql</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openlm-text2sql-spider-em</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openlm-text2sql-spider-ex</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openlm-text2sql-bird-r-ves</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=openlm-text2sql-bird-ex</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmeb-image</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmeb-video</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mmeb-visdoc</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=super</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=swe-bench-live</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chatbot-arena-text-to-video</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chatbot-arena-image-to-video</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-humanity-s-last-exam-text-only</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=seal-audiomultichallenge-audio-output</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=open-llm-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=idea-bench-arena-full</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=inferbench-text-to-image-hps-v2-1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=inferbench-text-to-image-drawbench-imagereward</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=inferbench-text-to-image-drawbench-clipscore</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=inferbench-text-to-image-partiprompts-clipscore</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evalcrafter-visual-quality</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evalcrafter-text-video-alignment</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evalcrafter-motion-quality</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=evalcrafter-temporal-consistency</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chronomagic-bench-umt-fvd</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chronomagic-bench-umtscore</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chronomagic-bench-mtscore</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=chronomagic-bench-chscore</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=mle-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=appbench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multimodal-hallucination-avh-audio-crosscheck-explicit-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multimodal-hallucination-avh-audio-refcheck-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multimodal-hallucination-avh-audio-selfcheckgpt-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multimodal-hallucination-avh-visual-crosscheck-explicit-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multimodal-hallucination-avh-visual-refcheck-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multimodal-hallucination-avh-visual-selfcheckgpt-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multimodal-hallucination-vh-chair-i-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multimodal-hallucination-vh-chair-s-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multimodal-hallucination-vh-crosscheck-explicit-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multimodal-hallucination-vh-crosscheck-implicit-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multimodal-hallucination-vh-human-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multimodal-hallucination-vh-pope-coco</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multimodal-hallucination-vh-selfcheckgpt-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=multimodal-hallucination-vh-unihd-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ai-forever-rag-retrieval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ai-forever-rag-generation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ai-forever-rag-simple-retrieval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ai-forever-rag-simple-generation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ai-forever-rag-conditional-retrieval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ai-forever-rag-conditional-generation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ai-forever-rag-set-retrieval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ai-forever-rag-set-generation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ai-forever-rag-multi-hop-retrieval</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ai-forever-rag-multi-hop-generation</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=enterpriserag-bench</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=enterpriserag-bench-correctness</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=enterpriserag-bench-completeness</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=enterpriserag-bench-recall</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=enterpriserag-bench-valid-extra-docs-resistance</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=coding-agent-leaderboard</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=coding-agent-leaderboard-swe-bench-pro-ansible</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=coding-agent-leaderboard-swe-bench-verified</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ntrex-translation-bleu</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=ntrex-translation-chrf</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=piimb-pii-masking-avg-f1</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=piimb-pii-masking-openpii-f2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=piimb-pii-masking-gretel-f2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=piimb-pii-masking-nemotron-pii-f2</loc></url>
  <url><loc>https://aibenchmarks.dev/benchmark?slug=piimb-pii-masking-privy-f2</loc></url>
</urlset>
