{
  "meta": {
    "source": "rl-list.com",
    "generated": "2026-06-07",
    "vendor_count": 38,
    "confidence_tags": [
      "confirmed",
      "reported",
      "estimated",
      "unknown"
    ],
    "note": "First-pass directory from public + vendor-shared data. \"unknown\" = not publicly sourced (not zero). Request work samples for a final selection. RFI internals deliberately omitted. Only commercial vendors are ranked."
  },
  "vendors": [
    {
      "rank": 1,
      "focus_areas_normalised": [
        "Coding",
        "Computer Use",
        "Private Codebases"
      ],
      "slug": "mechanize",
      "brand_name": "Mechanize",
      "segment": "Commercial vendors",
      "website": "https://www.mechanize.work/",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://www.mechanize.work/ (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": 2025,
        "confidence": "confirmed",
        "source": "https://www.mechanize.work/announcing-mechanize-inc/ (founded April 17, 2025; accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "San Francisco, USA",
        "confidence": "confirmed",
        "source": "https://www.linkedin.com/company/mechanize-inc (SF HQ) and https://tracxn.com/d/companies/mechanize (San Francisco; accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "distributed_remote": {
        "value": "no",
        "confidence": "reported",
        "source": "https://www.mechanize.work/apply (roles listed in-person San Francisco; accessed 2026-06-07)"
      },
      "what_they_sell": {
        "value": "environments",
        "confidence": "confirmed",
        "source": "https://www.mechanize.work/ ('Environments and evals for frontier coding agents'; accessed 2026-06-07)"
      },
      "deployment_model": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "maturity": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_source": {
        "value": "no",
        "confidence": "estimated",
        "source": "No public OSS repos found for their environments; GBA Eval referenced as a benchmark but no license/repo confirmed (accessed 2026-06-07)"
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "current_headcount": {
        "value": "38 employees (as of 2026-05-31); size band 11-50",
        "confidence": "reported",
        "source": "https://tracxn.com/d/companies/mechanize (38 employees, as of 2026-05-31); LinkedIn shows 11-50 band; PitchBook reports ~25 (conflicting) (accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "11-50",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/mechanize-inc (11-50 band) and Tracxn 38 employees (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "~10 open roles (e.g. Software Engineer, Research Engineer (Alignment), Operations Generalist, Recruiter, Counsel, Executive Assistant)",
        "confidence": "reported",
        "source": "https://www.mechanize.work/apply (accessed 2026-06-07)"
      },
      "total_raised": {
        "value": "$9.1M disclosed (April 24, 2026) plus an earlier undisclosed angel round (April 2025)",
        "confidence": "reported",
        "source": "https://www.mechanize.work/press-releases/ ($9.1M, April 24, 2026); https://tracxn.com/d/companies/mechanize (angel round April 28, 2025, amount undisclosed) (accessed 2026-06-07)"
      },
      "last_round": {
        "value": "$9.1M, April 24, 2026 (stage not disclosed)",
        "confidence": "reported",
        "source": "https://www.mechanize.work/press-releases/ ('Mechanize raises $9.1M', April 24, 2026; no stage or investors named on page) (accessed 2026-06-07)"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "Nat Friedman",
          "Daniel Gross",
          "Patrick Collison",
          "Dwarkesh Patel",
          "Sholto Douglas",
          "Marcus Abramovitch",
          "Jeff Dean"
        ],
        "confidence": "reported",
        "source": "https://www.mechanize.work/announcing-mechanize-inc/ (Nat Friedman, Daniel Gross, Patrick Collison, Dwarkesh Patel, Sholto Douglas, Marcus Abramovitch) and TechCrunch / press (Jeff Dean) (accessed 2026-06-07). Note: round-level investor list ($9.1M) not disclosed; these are early backers."
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_customers": {
        "value": [
          {
            "name": "Anthropic",
            "verification": "self-claimed",
            "frontier_lab_tie": true
          }
        ],
        "confidence": "reported",
        "source": "https://techcrunch.com/2025/09/21/silicon-valley-bets-big-on-environments-to-train-ai-agents/ ('Mechanize has already been working with Anthropic on RL environments, two sources familiar with the matter told TechCrunch'; both companies declined to comment) (accessed 2026-06-07). Anonymous-sourced, unconfirmed by either party, not independently verified."
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "Founders are ex-Epoch AI researchers; hiring Research Engineer (Alignment) per https://www.mechanize.work/apply (accessed 2026-06-07)"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Founders Matthew Barnett, Tamay Besiroglu, and Ege Erdil are co-founders/alumni of Epoch AI (an AI research institute Besiroglu co-founded in 2022)"
        ],
        "confidence": "confirmed",
        "source": "https://www.mechanize.work/announcing-mechanize-inc/ and https://techcrunch.com/2025/04/19/famed-ai-researcher-launches-controversial-startup-to-replace-all-human-workers-everywhere/ (accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "GBA Eval, benchmark measuring whether coding agents can write a Game Boy Advance emulator within 24 hours"
        ],
        "confidence": "reported",
        "source": "https://www.mechanize.work/ (accessed 2026-06-07)"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "focus_areas": [
        "coding environments",
        "evaluation / benchmarks",
        "computer use environments"
      ],
      "positioning_summary": "Mechanize is a small, elite San Francisco vendor (founded April 2025 by ex-Epoch AI researchers Matthew Barnett, Tamay Besiroglu, and Ege Erdil) that builds a small number of robust, high-fidelity RL environments and evals for frontier coding agents, selling to leading AI labs. Its stated long-term mission is the full automation of valuable economic work via simulated 'digital office' environments.",
      "best_fit_use_case": "A frontier lab seeking a small set of deep, hard software-engineering RL environments and evals built by elite engineers rather than high-volume crowdsourced data.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://www.mechanize.work/",
          "accessed_date": "2026-06-07",
          "note": "Official homepage, 'Environments and evals for frontier coding agents', GBA Eval, RL environments."
        },
        {
          "url": "https://www.mechanize.work/announcing-mechanize-inc/",
          "accessed_date": "2026-06-07",
          "note": "Founding announcement, founders, April 17 2025 date, mission, investors."
        },
        {
          "url": "https://www.mechanize.work/apply",
          "accessed_date": "2026-06-07",
          "note": "Careers page, ~10 open roles, in-person SF, salaries to $500K."
        },
        {
          "url": "https://www.mechanize.work/press-releases/",
          "accessed_date": "2026-06-07",
          "note": "Press releases, $9.1M funding dated April 24, 2026."
        },
        {
          "url": "https://www.linkedin.com/company/mechanize-inc",
          "accessed_date": "2026-06-07",
          "note": "Public LinkedIn, 38 employees, 11-50 band, SF HQ address, industry."
        },
        {
          "url": "https://techcrunch.com/2025/09/21/silicon-valley-bets-big-on-environments-to-train-ai-agents/",
          "accessed_date": "2026-06-07",
          "note": "Reports Anthropic working relationship, $500K SWE salaries, small-number-of-robust-environments strategy."
        },
        {
          "url": "https://techcrunch.com/2025/04/19/famed-ai-researcher-launches-controversial-startup-to-replace-all-human-workers-everywhere/",
          "accessed_date": "2026-06-07",
          "note": "Launch coverage, Tamay Besiroglu, Epoch AI background, mission."
        },
        {
          "url": "https://tracxn.com/d/companies/mechanize/__Zj76EY9-Iuwd7s9iR3MxAfLvnovzofl38AL66MgNwrQ",
          "accessed_date": "2026-06-07",
          "note": "Third-party profile, founded 2025, 38 employees (May 31 2026), SF, angel round April 28 2025."
        },
        {
          "url": "https://pitchbook.com/profiles/company/846556-03",
          "accessed_date": "2026-06-07",
          "note": "Third-party profile (headcount ~25 reported; conflicting)."
        },
        {
          "url": "https://the-decoder.com/mechanize-is-building-digital-offices-to-train-ai-agents-to-fully-automate-computer-work/",
          "accessed_date": "2026-06-07",
          "note": "Coverage of digital work environments approach."
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "notable_investors",
          "was": "11 investors incl. Adam D'Angelo, Jeff Dean, Devendra Chaplot, Marco Mascorro, Alex Atallah, Marcus Abramovitch (confidence: confirmed)",
          "now": "7 names (Nat Friedman, Daniel Gross, Patrick Collison, Dwarkesh Patel, Sholto Douglas, Marcus Abramovitch, Jeff Dean); confidence: reported",
          "reason": "The official founding announcement names only 6 backers (Friedman, Gross, Collison, Patel, Douglas, Abramovitch); press adds Jeff Dean. Adam D'Angelo, Devendra Chaplot, Marco Mascorro, and Alex Atallah could not be verified in the official announcement or credible press, so they were removed. The $9.1M (April 2026) round did not disclose investors. Downgraded confirmed -> reported."
        },
        {
          "field": "notable_customers (Anthropic verification)",
          "was": "verification: verified",
          "now": "verification: self-claimed (lower available enum tier); field confidence kept at reported",
          "reason": "TechCrunch sources the Anthropic relationship to two anonymous sources, and BOTH companies declined to comment. Anonymous, unconfirmed reporting does not meet the bar for 'verified' (credible third-party confirmation). Downgraded to the lower enum value with an explicit note that it is unconfirmed."
        },
        {
          "field": "current_headcount",
          "was": "'38 employees (LinkedIn, as of 2026-06-07 snapshot)' sourced to LinkedIn",
          "now": "'38 employees (as of 2026-05-31)' sourced to Tracxn, with LinkedIn band and conflicting PitchBook (~25) noted",
          "reason": "The 38 figure traces to Tracxn (as-of 2026-05-31), not a LinkedIn headcount snapshot; LinkedIn only shows the 11-50 band. PitchBook reports ~25, a conflict worth flagging. Source attribution corrected."
        },
        {
          "field": "total_raised",
          "was": "'Undisclosed seed/angel (April 2025) + $9.1M (April 24, 2026)' citing press-releases + Tracxn",
          "now": "Same substance, clarified: $9.1M disclosed + earlier undisclosed angel round",
          "reason": "Wording tightened; confirmed $9.1M on official press page (no stage/investors listed) and undisclosed angel round per Tracxn. Confidence reported retained."
        },
        {
          "field": "last_round",
          "was": "source implied investors/stage available",
          "now": "Explicit note that the press page lists only the amount and date, no stage or investors named",
          "reason": "Verified the official press page contains only '$9.1M, April 24, 2026' with no round label or investor list; clarified to avoid overreach."
        },
        {
          "field": "focus_areas",
          "was": "['coding environments','evaluation / benchmarks','enterprise workflows']",
          "now": "['coding environments','evaluation / benchmarks','computer use environments']",
          "reason": "'enterprise workflows' is not well supported; the verified positioning is frontier coding environments/evals plus 'digital office' environments to automate computer work (the-decoder), which maps to the controlled-vocab term 'computer use environments'. All terms remain within the controlled vocabulary."
        },
        {
          "field": "open_roles_count",
          "was": "confidence: confirmed",
          "now": "confidence: reported",
          "reason": "Careers-page role lists fluctuate; some draft role titles (e.g. 'Puzzle Maker', 'Office Concierge') could not be re-confirmed. Downgraded to reported."
        },
        {
          "field": "published_papers_or_benchmarks",
          "was": "confidence: confirmed",
          "now": "confidence: reported",
          "reason": "GBA Eval is referenced on the vendor homepage only; no independent paper/registry confirmation, so downgraded confirmed -> reported."
        },
        {
          "field": "researcher_backgrounds",
          "was": "confidence confirmed; listed Barnett/Besiroglu/Erdil as Epoch AI alumni",
          "now": "Same names, confidence retained; source strengthened to include official announcement",
          "reason": "Founders and Epoch AI tie confirmed via official announcement and TechCrunch; no value change, source improved."
        }
      ],
      "verification_summary": "Re-verified the highest-risk claims against the official site, the founding announcement, TechCrunch, and Tracxn. Confirmed: company identity matches the directory note ('deep, narrow, elite', small SF firm building a few robust RL coding environments/evals for frontier labs), founded April 17 2025 by ex-Epoch AI researchers (Barnett, Besiroglu, Erdil), and the $9.1M raise dated April 24 2026 (amount only; no stage or investors disclosed). Key downgrades: investor list trimmed from 11 to 7 (4 names unverifiable) and downgraded to 'reported'; the Anthropic 'customer' tie is anonymous-sourced with both parties declining comment, so it does NOT qualify as 'verified' (set to the lower enum tier and flagged as unconfirmed); headcount of 38 re-sourced to Tracxn (as-of 2026-05-31) with a conflicting PitchBook ~25 figure noted, band kept 11-50; focus_areas corrected ('enterprise workflows' -> 'computer use environments'); several over-stated 'confirmed' confidences (open_roles, GBA Eval) downgraded to 'reported'. Could not access Crunchbase/PitchBook directly (403). Overall confidence: medium.",
      "research_notes": {
        "found": [
          "Confirmed correct company via directory note (deep, narrow, elite; Code/SWE tags): mechanize.work, founded April 17, 2025 by Matthew Barnett, Tamay Besiroglu, Ege Erdil (ex-Epoch AI).",
          "Product: 'Environments and evals for frontier coding agents'; sells robust RL environments to AI labs (official site).",
          "GBA Eval benchmark (Game Boy Advance emulator in 24h) confirmed on official site.",
          "HQ San Francisco (201 3rd St) per LinkedIn; in-person roles per careers page.",
          "~10 open roles with salaries up to $500K for SWEs (careers page + TechCrunch).",
          "Investor list confirmed on official announcement and TechCrunch.",
          "$9.1M funding confirmed on official press-releases page, dated April 24, 2026.",
          "Anthropic working relationship reported by TechCrunch (two sources; both companies declined to comment)."
        ],
        "missing": [
          "Valuation (a $500M figure appeared in one fetch summary but was not corroborated by official sources or multiple third parties, treated as unknown).",
          "Formal funding stage label and exact total raised across rounds.",
          "Researcher count, deployment model, maturity stage, SOC2/security certifications, revenue."
        ],
        "conflicts": [
          "Headcount: LinkedIn shows 38 employees but size band 11-50; PitchBook reportedly 25; Tracxn 38 (May 31, 2026); Wellfound 11-50. Used LinkedIn (38 / 11-50 band).",
          "Funding/valuation: one fetch claimed '$9.1M raise at a $500M valuation' (LinkedIn fetch), but no primary or secondary source corroborates the $500M valuation; $9.1M itself is confirmed by the official press-releases page."
        ],
        "stale": [
          "Initial announcement and TechCrunch 'environments' reporting are from 2025; still the most detailed public descriptions of the Anthropic relationship."
        ],
        "open_questions": [
          "Is the reported $500M valuation real? Needs a primary/credible secondary source.",
          "Is the Anthropic relationship a paying commercial contract (vs pilot)?",
          "Deployment model and any security/compliance posture for enterprise/lab buyers."
        ]
      }
    },
    {
      "rank": 2,
      "focus_areas_normalised": [
        "Coding",
        "Computer Use",
        "Enterprise Workflows"
      ],
      "slug": "afterquery",
      "brand_name": "AfterQuery",
      "segment": "Commercial vendors",
      "website": "https://www.afterquery.com",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://www.afterquery.com (accessed 2026-06-07); https://www.businesswire.com/news/home/20260409469482/en/AfterQuery-Raises-$30-Million-Series-A-Round-at-$300-Million-Valuation (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": 2025,
        "confidence": "confirmed",
        "source": "https://www.ycombinator.com/companies/afterquery (accessed 2026-06-07); https://siliconangle.com/2026/04/10/ai-training-data-startup-afterquery-nabs-30m-investment/ (accessed 2026-06-07), founded Jan/Feb 2025, YC W25"
      },
      "hq_location": {
        "value": "San Francisco, USA",
        "confidence": "confirmed",
        "source": "https://www.ycombinator.com/companies/afterquery (accessed 2026-06-07); https://www.afterquery.com/careers (accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "distributed_remote": {
        "value": "no",
        "confidence": "reported",
        "source": "https://www.afterquery.com/careers (accessed 2026-06-07), open roles listed as San Francisco; note core expert-data workforce (~100k practitioners) is distributed but corporate roles are SF-based"
      },
      "current_headcount": {
        "value": "~126-140 employees (Tracxn 126 as of 2026-04-30; LinkedIn/aggregators ~136-140)",
        "confidence": "reported",
        "source": "https://tracxn.com/d/companies/afterquery (accessed 2026-06-07); https://www.linkedin.com/company/afterquery (public snippet, accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "51-200",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/afterquery (accessed 2026-06-07); https://tracxn.com/d/companies/afterquery (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": 24,
        "confidence": "reported",
        "source": "https://www.afterquery.com/careers (accessed 2026-06-07), count taken from careers page snapshot; not independently verifiable post-access"
      },
      "what_they_sell": {
        "value": "mixed",
        "confidence": "confirmed",
        "source": "https://www.afterquery.com (accessed 2026-06-07), expert human data (SFT, RL rubrics), agent/RL environments, computer-use trajectories, and benchmarks"
      },
      "deployment_model": {
        "value": "managed-hosted (data + RL environments delivered as a service to AI labs); open-source frameworks available (Harbor)",
        "confidence": "estimated",
        "source": "https://www.afterquery.com (accessed 2026-06-07); https://github.com/AfterQuery/harbor (accessed 2026-06-07)"
      },
      "maturity": {
        "value": "GA",
        "confidence": "estimated",
        "source": "https://www.afterquery.com (accessed 2026-06-07); https://www.businesswire.com/news/home/20260409469482/en/AfterQuery-Raises-$30-Million-Series-A-Round-at-$300-Million-Valuation (accessed 2026-06-07), commercially selling to AI labs with stated $100M+ run rate; product maturity not formally labeled by vendor"
      },
      "open_source": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://github.com/AfterQuery (accessed 2026-06-07), public benchmarks (VADER, FinanceQA, IDE-Bench) and Harbor RL-environment framework"
      },
      "license": {
        "value": "Apache-2.0 (Harbor RL-environment framework); other repos vary/unspecified",
        "confidence": "reported",
        "source": "https://github.com/AfterQuery/harbor (accessed 2026-06-07), license per Harbor repo; not independently re-confirmed across all repos"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "https://www.afterquery.com (accessed 2026-06-07), no trust/security page or SOC2 claim found"
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "https://www.afterquery.com (accessed 2026-06-07), no dedicated security/trust page found"
      },
      "total_raised": {
        "value": "$30.5M ($0.5M pre-seed 2025 + $30M Series A 2026)",
        "confidence": "confirmed",
        "source": "https://www.crunchbase.com/funding_round/afterquery-pre-seed--696021d4 (accessed 2026-06-07); https://www.businesswire.com/news/home/20260409469482/en/AfterQuery-Raises-$30-Million-Series-A-Round-at-$300-Million-Valuation (accessed 2026-06-07)"
      },
      "last_round": {
        "value": "Series A, $30M, announced 2026-04-09",
        "confidence": "confirmed",
        "source": "https://www.businesswire.com/news/home/20260409469482/en/AfterQuery-Raises-$30-Million-Series-A-Round-at-$300-Million-Valuation (accessed 2026-06-07); https://siliconangle.com/2026/04/10/ai-training-data-startup-afterquery-nabs-30m-investment/ (accessed 2026-06-07)"
      },
      "valuation": {
        "value": "$300M post-money (Series A)",
        "confidence": "confirmed",
        "source": "https://www.businesswire.com/news/home/20260409469482/en/AfterQuery-Raises-$30-Million-Series-A-Round-at-$300-Million-Valuation (accessed 2026-06-07)"
      },
      "notable_investors": {
        "value": [
          "Altos Ventures (lead, Series A)",
          "The Raine Group",
          "Y Combinator",
          "BoxGroup",
          "Latitude Capital",
          "Angel investors from Google DeepMind, OpenAI, Anthropic, Meta Superintelligence Labs, Microsoft AI"
        ],
        "confidence": "confirmed",
        "source": "https://www.businesswire.com/news/home/20260409469482/en/AfterQuery-Raises-$30-Million-Series-A-Round-at-$300-Million-Valuation (accessed 2026-06-07); https://www.ycombinator.com/launches/Mm5-afterquery-high-quality-ai-starts-with-high-quality-human-data (accessed 2026-06-07)"
      },
      "revenue_signals": {
        "value": "$100M+ annual revenue run rate (company-stated, ~14 months from founding)",
        "confidence": "reported",
        "source": "https://www.businesswire.com/news/home/20260409469482/en/AfterQuery-Raises-$30-Million-Series-A-Round-at-$300-Million-Valuation (accessed 2026-06-07), figure originates from the company, repeated in press, not independently audited"
      },
      "notable_customers": {
        "value": [
          {
            "name": "Frontier AI labs (unnamed; company claims 'every leading AI lab' is a customer; press separately names OpenAI, Anthropic, Google as labs it serves, but without third-party confirmation)",
            "verification": "self-claimed",
            "frontier_lab_tie": true
          }
        ],
        "confidence": "reported",
        "source": "https://www.businesswire.com/news/home/20260409469482/en/AfterQuery-Raises-$30-Million-Series-A-Round-at-$300-Million-Valuation (accessed 2026-06-07); https://www.afterquery.com (accessed 2026-06-07), no individually verified named customer; claims trace to vendor"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://arxiv.org/abs/2505.19395 (accessed 2026-06-07); https://arxiv.org/abs/2601.11868 (accessed 2026-06-07), published arXiv benchmarks (VADER, Terminal-Bench) and Research roles on careers page"
      },
      "researcher_count": {
        "value": "unknown (2 open Research roles; benchmark-paper authors exist but team count not enumerated)",
        "confidence": "unknown",
        "source": "https://www.afterquery.com/careers (accessed 2026-06-07)"
      },
      "researcher_backgrounds": {
        "value": [
          "Founders: Spencer Mateega (ex-Meta, ex-Google, Morgan Stanley/Silver Lake; Wharton/Penn), Carlos Georgescu (ex-Citadel Securities, ex-Meta, ex-Google), Danny Tang",
          "Founding team / network cites prior roles at Goldman Sachs, McKinsey, Jane Street, Palantir, NVIDIA, Google"
        ],
        "confidence": "reported",
        "source": "https://www.ycombinator.com/launches/Mm5-afterquery-high-quality-ai-starts-with-high-quality-human-data (accessed 2026-06-07); https://www.afterquery.com/careers (accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "VADER: A Human-Evaluated Benchmark for Vulnerability Assessment, Detection, Explanation, and Remediation (arXiv:2505.19395)",
          "Terminal-Bench / Terminal-Bench 2.0 (arXiv:2601.11868)",
          "FinanceQA: A Benchmark for Evaluating Financial Analysis Capabilities in LLMs (arXiv:2501.18062; github.com/AfterQuery/FinanceQA)",
          "IDE-Bench (github.com/AfterQuery/ide-bench)"
        ],
        "confidence": "confirmed",
        "source": "https://arxiv.org/abs/2505.19395 (accessed 2026-06-07); https://arxiv.org/abs/2601.11868 (accessed 2026-06-07); https://github.com/AfterQuery (accessed 2026-06-07)"
      },
      "focus_areas": [
        "evaluation / benchmarks",
        "coding environments",
        "finance",
        "enterprise workflows",
        "computer use environments",
        "security"
      ],
      "positioning_summary": "AfterQuery is a San Francisco applied-research lab and data platform (YC W25) that supplies frontier AI labs with expert-generated human data (SFT, RL rubrics), agent/RL environments, and computer-use trajectories, drawn from a large network of verified practitioners. It publishes real-task benchmarks such as Terminal-Bench, VADER, FinanceQA, and IDE-Bench, positioning around capturing how domain experts (engineers, financial analysts, lawyers) reason.",
      "best_fit_use_case": "A frontier or enterprise AI team needing expert-authored RL environments, post-training data, and realistic real-task benchmarks across code, finance, and professional workflows.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://www.afterquery.com",
          "accessed_date": "2026-06-07",
          "note": "Official site, products: SFT data, RL+rubrics, agent environments, computer-use trajectories; $100M revenue run rate claim"
        },
        {
          "url": "https://www.afterquery.com/careers",
          "accessed_date": "2026-06-07",
          "note": "24 open roles across 6 departments, all SF; founding team prior employers"
        },
        {
          "url": "https://www.afterquery.com/blog/terminal-bench-improvement",
          "accessed_date": "2026-06-07",
          "note": "Terminal-Bench 2.0 improvement using Tinker and Harbor"
        },
        {
          "url": "https://www.ycombinator.com/companies/afterquery",
          "accessed_date": "2026-06-07",
          "note": "Founded 2025, YC W25, SF, founders, ~30 employees (older snapshot)"
        },
        {
          "url": "https://techfundingnews.com/afterquery-gets-30m-from-altos-ventures-to-help-ai-understand-real-world-problems-better/",
          "accessed_date": "2026-06-07",
          "note": "Series A $30M at $300M valuation, 2026-04-09; investors; $100M+ run rate; founders; founded Jan 2025"
        },
        {
          "url": "https://startupintros.com/orgs/afterquery",
          "accessed_date": "2026-06-07",
          "note": "Total raised $30.5M across 2 rounds; founders Danny Tang, Carlos Georgescu, Spencer Mateega"
        },
        {
          "url": "https://www.linkedin.com/company/afterquery",
          "accessed_date": "2026-06-07",
          "note": "Public snippet: 51-200 band, ~140 employees, Software Development, founded 2025"
        },
        {
          "url": "https://tracxn.com/d/companies/afterquery",
          "accessed_date": "2026-06-07",
          "note": "126 employees as of 2026-04-30"
        },
        {
          "url": "https://github.com/AfterQuery",
          "accessed_date": "2026-06-07",
          "note": "Public repos: vader, harbor, FinanceQA, IDE-Bench, anvil, mle-reasoning-environment"
        },
        {
          "url": "https://github.com/AfterQuery/harbor",
          "accessed_date": "2026-06-07",
          "note": "Harbor, framework for agent evaluations and RL environments, Apache-2.0"
        },
        {
          "url": "https://arxiv.org/abs/2505.19395",
          "accessed_date": "2026-06-07",
          "note": "VADER benchmark paper, supported by AfterQuery Inc."
        },
        {
          "url": "https://arxiv.org/abs/2601.11868",
          "accessed_date": "2026-06-07",
          "note": "Terminal-Bench paper"
        },
        {
          "url": "https://sacra.com/c/afterquery/",
          "accessed_date": "2026-06-07",
          "note": "Revenue/funding profile"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "total_raised",
          "was": "$30.5M (across 2 rounds), confidence: reported",
          "now": "$30.5M ($0.5M pre-seed 2025 + $30M Series A 2026), confidence: confirmed",
          "reason": "Upgraded to confirmed: Crunchbase pre-seed funding-round profile ($0.5M, BoxGroup/YC) plus BusinessWire/SiliconANGLE primary coverage of the $30M Series A together corroborate the $30.5M total. Replaced weak single aggregator (startupintros) with primary sources. Noted a conflicting $46M figure on startuphub.ai was an outlier/error and is not supported."
        },
        {
          "field": "last_round",
          "was": "Series A, $30M, announced 2026-04-09, confidence: reported",
          "now": "Same value, confidence: confirmed",
          "reason": "Confirmed against BusinessWire primary press release and multiple independent outlets (SiliconANGLE, BuiltInSF, Finsmes); date 2026-04-09 corroborated."
        },
        {
          "field": "valuation",
          "was": "$300M (post Series A), confidence: reported",
          "now": "$300M post-money (Series A), confidence: confirmed",
          "reason": "BusinessWire press-release title and body explicitly state $300M valuation; corroborated across multiple outlets."
        },
        {
          "field": "notable_investors",
          "was": "[Altos Ventures, The Raine Group, Y Combinator, BoxGroup], confidence: reported",
          "now": "Added Latitude Capital and lab angels (DeepMind/OpenAI/Anthropic/Meta SI Labs/Microsoft AI), confidence: confirmed",
          "reason": "BusinessWire and YC launch page confirm Altos (lead), Raine, YC, BoxGroup, plus Latitude Capital and angels from major labs. Upgraded to confirmed and added the omitted participants."
        },
        {
          "field": "status",
          "was": "confidence: confirmed (single source: company site)",
          "now": "confidence: confirmed (added BusinessWire 2026 source)",
          "reason": "Strengthened sourcing for active status with recent April 2026 funding announcement."
        },
        {
          "field": "founded_year",
          "was": "source cited techfundingnews redirect URL",
          "now": "Same value 2025; source replaced with YC + SiliconANGLE",
          "reason": "Replaced single redirecting funding-news URL with YC company page and SiliconANGLE (founded Jan/Feb 2025, YC W25); value unchanged."
        },
        {
          "field": "open_roles_count",
          "was": "24, confidence: confirmed",
          "now": "24, confidence: reported",
          "reason": "Downgraded: an open-roles count from a careers page is a point-in-time snapshot that is not independently re-verifiable and changes frequently; 'confirmed' overstates durability."
        },
        {
          "field": "license",
          "was": "Apache-2.0 (Harbor), confidence: confirmed",
          "now": "Same value, confidence: reported",
          "reason": "Downgraded: only the Harbor repo license was checked; the field itself notes other repos vary/unspecified, so 'confirmed' for the overall license posture is too strong."
        },
        {
          "field": "maturity",
          "was": "GA, confidence: estimated; source reasoning '$100M+ revenue run rate implies commercial GA'",
          "now": "GA, confidence: estimated; reasoning corrected",
          "reason": "Revenue run-rate does not establish product 'GA' maturity (a sales metric, not a release stage). Kept GA/estimated but corrected the justification to commercial availability to AI-lab customers; vendor does not formally label a maturity stage."
        },
        {
          "field": "revenue_signals",
          "was": "source cited techfundingnews + company site",
          "now": "source replaced with BusinessWire primary; clarified ~14 months and unaudited",
          "reason": "Anchored to primary press release; corrected '~17 months' to ~14 months (founded ~Feb 2025, announced Apr 2026) and flagged the figure as company-originated/unaudited."
        },
        {
          "field": "notable_customers",
          "was": "self-claimed, frontier_lab_tie true, confidence: reported",
          "now": "Kept self-claimed/true; expanded note that press-named labs (OpenAI/Anthropic/Google) remain unverified",
          "reason": "No third-party-verified named customer exists; the 'every leading AI lab' claim and lab names trace back to the vendor/press repeating the vendor. Verification correctly remains self-claimed (not upgraded)."
        },
        {
          "field": "researcher_backgrounds",
          "was": "source cited careers + YC company page; listed ex-Meta/Google/Citadel/Wharton generically",
          "now": "Named founders (Mateega, Georgescu, Tang) with backgrounds; source replaced with YC launch page",
          "reason": "Tightened to named founders and verified backgrounds from the YC launch page; kept as reported since exact prior-employer details are vendor-stated."
        },
        {
          "field": "distributed_remote",
          "was": "no, 'all 24 open roles SF, no remote'",
          "now": "no, added note that the ~100k expert practitioner network is distributed while corporate roles are SF",
          "reason": "Clarified an important nuance: corporate HQ roles are SF (so 'no' stands for the company), but the data-generation workforce is globally distributed."
        },
        {
          "field": "published_papers_or_benchmarks",
          "was": "included 'SpreadsheetBench 2 (referenced in company posts)' and FinanceQA via GitHub only",
          "now": "Removed SpreadsheetBench 2; added FinanceQA arXiv:2501.18062",
          "reason": "SpreadsheetBench 2 could not be independently verified as an AfterQuery publication, so it was dropped to avoid overreach. Added the verifiable FinanceQA arXiv ID."
        }
      ],
      "verification_summary": "Confirmed this is the correct AfterQuery (YC W25 applied-research/data lab) matching the directory note 'benchmarks with real-task framing', Terminal-Bench (realistic CLI tasks), FinanceQA (real-world investment work), and VADER (real-world vulnerabilities) all align. Funding was strengthened to 'confirmed': $30M Series A at $300M post-money announced 2026-04-09 led by Altos Ventures (with The Raine Group, YC, BoxGroup, Latitude Capital, and lab angels), verified via the BusinessWire primary release plus SiliconANGLE/BuiltInSF/Finsmes; total raised $30.5M = $0.5M Crunchbase pre-seed + $30M Series A (a stray $46M aggregator figure was rejected as an error). Headcount sanity-checked at ~126-140 (Tracxn 126 on 2026-04-30), correctly in the 51-200 band, NOT 200+. The $100M+ run rate stays 'reported' (company-originated, unaudited). Notable customers remain 'self-claimed', no third-party-verified named lab; the 'every leading AI lab' claim and OpenAI/Anthropic/Google mentions trace to the vendor/press. SOC2, certifications, and security page remain 'unknown' (no trust page found). Downgraded open_roles_count and license to 'reported' (point-in-time / single-repo), removed unverifiable SpreadsheetBench 2, and corrected the founding-to-funding interval and maturity reasoning. Overall confidence: medium.",
      "research_notes": {
        "found": [
          "Official products: expert human data (SFT, RL rubrics), agent/RL environments, computer-use trajectories, benchmarks",
          "Series A $30M at $300M valuation (Altos Ventures lead), announced 2026-04-09; total ~$30.5M over 2 rounds",
          "Founders Danny Tang, Carlos Georgescu, Spencer Mateega; ex-Meta/Google/Citadel; YC W25",
          "Founded 2025, San Francisco HQ",
          "24 open roles (incl. Research Scientist - Frontier Data), all SF",
          "Public OSS benchmarks: VADER (arXiv:2505.19395), Terminal-Bench (arXiv:2601.11868), FinanceQA, IDE-Bench; Harbor RL-env framework Apache-2.0",
          "$100M+ revenue run rate (company-stated)",
          "Customer claim: 'every major lab is now a customer' (frontier-lab tie, self-claimed)"
        ],
        "missing": [
          "SOC 2 / ISO certifications and trust/security page (none found)",
          "Named/verified customers (only self-claimed frontier-lab references)",
          "Exact researcher count and headcount growth %",
          "Additional office locations"
        ],
        "conflicts": [
          "Founding year: most sources (YC, TechFundingNews) say 2025/Jan 2025; some aggregators list 2023 or 2024, treated 2025 as confirmed",
          "Headcount varies by source: YC ~30 (older), Tracxn 126 (2026-04-30), LinkedIn ~140, likely data-collection lag; used 51-200 band"
        ],
        "stale": [
          "YC profile headcount (~30) appears stale relative to 2026 figures of 126-140"
        ],
        "open_questions": [
          "Which specific frontier labs are customers (unverified)",
          "SOC 2 / security posture for enterprise buyers",
          "Split of revenue between human-data vs RL-environment products"
        ]
      }
    },
    {
      "rank": 3,
      "focus_areas_normalised": [
        "Coding",
        "Computer Use",
        "Enterprise Workflows",
        "Private Codebases"
      ],
      "slug": "deeptune",
      "brand_name": "Deeptune",
      "segment": "Commercial vendors",
      "website": "https://deeptune.com",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://deeptune.com/blog/series-a/ accessed 2026-06-07"
      },
      "focus_areas": [
        "computer use environments",
        "coding environments",
        "execution infrastructure",
        "enterprise workflows"
      ],
      "what_they_sell": {
        "value": "environments",
        "confidence": "confirmed",
        "source": "https://a16z.com/announcement/investing-in-deeptune/ accessed 2026-06-07"
      },
      "deployment_model": {
        "value": "managed-hosted / API (pre-built 'training gyms' integrate in 'a few lines of code'; gyms come with problems, datasets, and infrastructure); customers are frontier labs",
        "confidence": "reported",
        "source": "https://deeptune.com/ accessed 2026-06-07"
      },
      "maturity": {
        "value": "GA",
        "confidence": "estimated",
        "source": "https://deeptune.com/blog/series-a/ accessed 2026-06-07"
      },
      "open_source": {
        "value": "no",
        "confidence": "estimated",
        "source": ""
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "founded_year": {
        "value": "~2025 per official Series A blog ('since we started one year ago', Mar 2026); aggregators (Tracxn, CB Insights) and LinkedIn list 2022, conflict, leaning 2025 per primary source",
        "confidence": "reported",
        "source": "https://deeptune.com/blog/series-a/ ; https://www.linkedin.com/company/trydeeptune accessed 2026-06-07"
      },
      "hq_location": {
        "value": "New York, NY, USA",
        "confidence": "confirmed",
        "source": "https://fortune.com/2026/03/19/andreessen-horowitz-ai-startups-deeptune-series-a/ accessed 2026-06-07"
      },
      "other_locations": {
        "value": [
          "India (remote Member of Technical Staff role posted; a job posting, not a confirmed office)"
        ],
        "confidence": "reported",
        "source": "https://jobs.ashbyhq.com/deeptune accessed 2026-06-07"
      },
      "distributed_remote": {
        "value": "partial, NYC team described as in-person; some remote roles posted (e.g. India)",
        "confidence": "reported",
        "source": "https://jobs.ashbyhq.com/deeptune ; https://fortune.com/2026/03/19/andreessen-horowitz-ai-startups-deeptune-series-a/ accessed 2026-06-07"
      },
      "current_headcount": {
        "value": "~20-25 (Fortune: 'roughly 20-person'; LinkedIn snippet: 25)",
        "confidence": "reported",
        "source": "https://fortune.com/2026/03/19/andreessen-horowitz-ai-startups-deeptune-series-a/ ; https://www.linkedin.com/company/trydeeptune accessed 2026-06-07"
      },
      "headcount_band": {
        "value": "11-50",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/trydeeptune accessed 2026-06-07"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "~6 roles posted (e.g. Founding Recruiter, 2x Member of Technical Staff incl. India, Strategic Projects Lead, Founding Operations, open 'Build Your Role')",
        "confidence": "reported",
        "source": "https://jobs.ashbyhq.com/deeptune accessed 2026-06-07"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://a16z.com/announcement/investing-in-deeptune/ accessed 2026-06-07"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Team includes engineers/operators from Anthropic, Scale AI, Palantir, Hebbia, Glean, Retool, Modal (per company/press)",
          "CEO/co-founder Tim Lupo: ex-Hebbia founding engineer, USC CS & Business",
          "Co-founder/CTO Lukas Schmit (per Crunchbase and press)"
        ],
        "confidence": "reported",
        "source": "https://fortune.com/2026/03/19/andreessen-horowitz-ai-startups-deeptune-series-a/ ; https://www.crunchbase.com/person/lukas-schmit ; https://www.linkedin.com/in/timlup/ accessed 2026-06-07"
      },
      "total_raised": {
        "value": "~$43M (latest/likely only round). NOTE: value reflects the Series A round; lifetime total is uncertain, aggregators conflict (Tracxn/CB Insights $46.1M, PitchBook $42.2M)",
        "confidence": "reported",
        "source": "https://deeptune.com/blog/series-a/ ; https://tracxn.com/d/companies/deeptune/__oi3tMd7lIKO3Yo_B0aq6tBQHcjZSfLMnyHdwbaOa0SE accessed 2026-06-07"
      },
      "last_round": {
        "value": "Series A, $43M, announced March 19, 2026 (Crunchbase lists round date Feb 23, 2026)",
        "confidence": "confirmed",
        "source": "https://deeptune.com/blog/series-a/ ; https://a16z.com/announcement/investing-in-deeptune/ ; https://fortune.com/2026/03/19/andreessen-horowitz-ai-startups-deeptune-series-a/ accessed 2026-06-07"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "Andreessen Horowitz (a16z, lead)",
          "776",
          "Abstract Ventures",
          "Inspired Capital",
          "Noam Brown (angel, OpenAI)",
          "Brendan Foody (angel, Mercor CEO)",
          "Yash Patil (angel, Applied Compute CEO)"
        ],
        "confidence": "confirmed",
        "source": "https://deeptune.com/blog/series-a/ ; https://fortune.com/2026/03/19/andreessen-horowitz-ai-startups-deeptune-series-a/ accessed 2026-06-07"
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_customers": {
        "value": [
          {
            "name": "Leading/frontier AI labs (unnamed; company claims '100s of gyms' built for them and contributions to recent computer-use advances)",
            "verification": "self-claimed",
            "frontier_lab_tie": true
          }
        ],
        "confidence": "reported",
        "source": "https://deeptune.com/ ; https://a16z.com/announcement/investing-in-deeptune/ accessed 2026-06-07"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "published_papers_or_benchmarks": {
        "value": [
          "No own papers/benchmarks published; a16z post references third-party benchmarks OSWorld and Terminal-Bench as targets Deeptune environments help improve (cites Opus 4.6 ~72.7% and GPT-5.4 ~75% on OSWorld)"
        ],
        "confidence": "reported",
        "source": "https://a16z.com/announcement/investing-in-deeptune/ accessed 2026-06-07"
      },
      "positioning_summary": "Deeptune is a New York-based startup building managed reinforcement-learning environments ('training gyms') for computer-use and code, where AI agents practice and are evaluated on realistic digital knowledge-work tasks (simulating tools like Slack and Salesforce). It sells these pre-built environments primarily to frontier AI labs and raised a $43M Series A led by a16z, announced March 2026.",
      "best_fit_use_case": "Frontier labs and model teams needing ready-made, managed RL environments for training/evaluating computer-use and coding agents.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://deeptune.com/",
          "accessed_date": "2026-06-07",
          "note": "Official site: training gyms, code/spreadsheet practice, few-lines-of-code integration, 100s of gyms for frontier labs, NYC"
        },
        {
          "url": "https://deeptune.com/blog/series-a/",
          "accessed_date": "2026-06-07",
          "note": "Series A announcement: $43M, a16z lead, 776/Abstract/Inspired, angels Noam Brown/Brendan Foody/Yash Patil; team ex-Anthropic/Scale/Palantir/Modal/Glean/Retool/Hebbia"
        },
        {
          "url": "https://a16z.com/announcement/investing-in-deeptune/",
          "accessed_date": "2026-06-07",
          "note": "a16z investment post: RL environments for computer-use and code; founder Tim Lupo; OSWorld and Terminal-Bench referenced"
        },
        {
          "url": "https://fortune.com/2026/03/19/andreessen-horowitz-ai-startups-deeptune-series-a/",
          "accessed_date": "2026-06-07",
          "note": "Founders, ~20 headcount, NYC HQ, $43M Series A March 2026, investors/angels, team backgrounds"
        },
        {
          "url": "https://siliconangle.com/2026/03/19/deeptune-raises-43m-accelerate-ai-learning-virtual-training-gyms/",
          "accessed_date": "2026-06-07",
          "note": "Series A coverage; Slack/Salesforce workflow simulation; rollouts and rewards"
        },
        {
          "url": "https://www.linkedin.com/company/trydeeptune",
          "accessed_date": "2026-06-07",
          "note": "Public snippet: 11-50 / 25 employees, founded 2022 (conflicts with blog), NYC, Research Services, 'Training gyms for AI agents'"
        },
        {
          "url": "https://jobs.ashbyhq.com/deeptune",
          "accessed_date": "2026-06-07",
          "note": "Open roles: ~6 incl Founding Recruiter, Member of Technical Staff (NYC + India remote), Strategic Projects Lead, Founding Operations"
        },
        {
          "url": "https://www.linkedin.com/in/timlup/",
          "accessed_date": "2026-06-07",
          "note": "Tim Lupo Founder & CEO; ex-Hebbia founding engineer; USC CS & Business"
        },
        {
          "url": "https://x.com/a16z/status/2034694854123692462",
          "accessed_date": "2026-06-07",
          "note": "a16z: leading RL environments for computer-use and code"
        },
        {
          "url": "https://tracxn.com/d/companies/deeptune/__oi3tMd7lIKO3Yo_B0aq6tBQHcjZSfLMnyHdwbaOa0SE",
          "accessed_date": "2026-06-07",
          "note": "Aggregator profile; lists $46.1M raised (conflicts with $43M) and co-founder Lukas Schmit"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "total_raised",
          "was": "$43M (Series A; one third-party aggregator lists $46.1M, conflict)",
          "now": "~$43M (latest/likely only round); lifetime total uncertain, Tracxn/CB Insights $46.1M, PitchBook $42.2M",
          "reason": "The $43M is the Series A round amount, not a confirmed lifetime total. Multiple aggregators (not just one) report differing totals ($46.1M, $42.2M). Reframed to distinguish round amount from total raised and noted the multi-source conflict; confidence kept at 'reported' (no primary source for a lifetime total)."
        },
        {
          "field": "last_round",
          "was": "Series A, $43M, March 2026",
          "now": "Series A, $43M, announced March 19, 2026 (Crunchbase lists round date Feb 23, 2026)",
          "reason": "Added precise announcement date and noted Crunchbase's differing internal round date for transparency; round itself remains confirmed by official blog + a16z + Fortune, so 'confirmed' retained."
        },
        {
          "field": "researcher_backgrounds",
          "was": "Co-founder Lukas Schmit (reported)",
          "now": "Co-founder/CTO Lukas Schmit (per Crunchbase and press)",
          "reason": "Verified Lukas Schmit as co-founder/CTO via Crunchbase person profile in addition to press; added Crunchbase as a corroborating source. No confidence change (remains reported)."
        },
        {
          "field": "founded_year",
          "was": "2025 (Series A blog frames Mar 2026 as ~1 year in; LinkedIn public page lists 2022, conflict)",
          "now": "~2025 per official blog ('since we started one year ago'); Tracxn/CB Insights and LinkedIn list 2022, conflict, leaning 2025 per primary source",
          "reason": "Verified the conflict extends beyond LinkedIn to multiple aggregators (Tracxn, CB Insights). Primary-source blog language explicitly supports ~2025; kept as reported with the conflict surfaced."
        },
        {
          "field": "notable_customers",
          "was": "Leading/frontier AI labs (unnamed; '100s of gyms' built for them)",
          "now": "Leading/frontier AI labs (unnamed; company claims '100s of gyms' built for them and contributions to recent computer-use advances), self-claimed",
          "reason": "Confirmed all customer references are vendor/a16z self-claims with no third party naming any lab; verification correctly stays 'self-claimed'. Clarified wording to emphasize the claim is unverified."
        },
        {
          "field": "other_locations",
          "was": "India (remote Member of Technical Staff role posted)",
          "now": "India (remote Member of Technical Staff role posted; a job posting, not a confirmed office)",
          "reason": "A remote job posting is not evidence of an established office; clarified to avoid overstating geographic footprint."
        }
      ],
      "verification_summary": "Independently re-verified via WebSearch and WebFetch against Fortune, the official deeptune.com Series A blog, a16z's investment post, and aggregators (Tracxn/CB Insights/PitchBook). Confirmed this is the correct company matching the directory note 'managed code and computer use environments' (RL 'training gyms' for computer-use and code sold to frontier labs). Core funding facts hold: $43M Series A led by a16z, announced March 19 2026, with 776/Abstract/Inspired and angels Noam Brown, Brendan Foody, Yash Patil. NYC HQ and ~20-person in-person team confirmed by Fortune. Founders Tim Lupo (CEO) and Lukas Schmit (CTO) verified. Two genuine conflicts surfaced: (1) founding year, official blog implies ~2025, aggregators/LinkedIn say 2022; (2) total raised, aggregators disagree ($46.1M vs $42.2M) while the round itself is $43M, so total_raised reframed and held at 'reported.' Customer claims are vendor/investor self-claims only (no named labs, no third-party confirmation), kept 'self-claimed.' No SOC 2, security page, or other certifications found anywhere, all left 'unknown.' No valuation disclosed. focus_areas all within controlled vocabulary. Overall confidence: medium.",
      "research_notes": {}
    },
    {
      "rank": 4,
      "focus_areas_normalised": [
        "Long-Horizon"
      ],
      "slug": "bespoke-labs",
      "brand_name": "Bespoke Labs",
      "segment": "Commercial vendors",
      "website": "https://www.bespokelabs.ai/",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://www.bespokelabs.ai/about-us (accessed 2026-06-07); active OSS releases and hiring on https://jobs.ashbyhq.com/bespokelabs (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": 2024,
        "confidence": "reported",
        "source": "https://tracxn.com/d/companies/bespokelabs (accessed 2026-06-07); SEC Form D filed 2024-06-04 https://www.formds.com/issuers/bespokelabs-ai-inc (accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "Mountain View, California, USA",
        "confidence": "confirmed",
        "source": "https://www.linkedin.com/company/bespokelabsai (public snippet: 800 W El Camino Real, Mountain View, CA 94040, accessed 2026-06-07); SEC Form D lists Santa Clara, CA"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "distributed_remote": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://jobs.ashbyhq.com/bespokelabs (some roles open to remote in the US, accessed 2026-06-07)"
      },
      "current_headcount": {
        "value": "~40-48 employees (40 as of 2026-04-30 per Tracxn; ~48 profiles listed on LinkedIn page)",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/bespokelabsai (48 employees listed, accessed 2026-06-07); https://tracxn.com/d/companies/bespokelabs (40 as of 2026-04-30, accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "11-50",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/bespokelabsai (accessed 2026-06-07); Tracxn 40 employees as of 2026-04-30"
      },
      "headcount_growth": {
        "value": "unknown (LinkedIn growth posts reference recent hiring but no formal % disclosed)",
        "confidence": "unknown",
        "source": "https://www.linkedin.com/company/bespokelabsai (accessed 2026-06-07)"
      },
      "open_roles_count": {
        "value": "several open roles (e.g. RL Environments, Research Engineer, Data Operations Manager, DevOps Engineer, Technical Recruiter) on Ashby; exact count not reliably extractable from public page",
        "confidence": "reported",
        "source": "https://jobs.ashbyhq.com/bespokelabs (accessed 2026-06-07)"
      },
      "total_raised": {
        "value": "$7.25M (SEC Form D: $7,249,998 sold)",
        "confidence": "confirmed",
        "source": "https://www.formds.com/issuers/bespokelabs-ai-inc (Form D filed 2024-06-04, accessed 2026-06-07)"
      },
      "last_round": {
        "value": "~$7.25M raised ~May/June 2024; round stage ambiguous across sources (Luma/Form D context suggest Seed; Tracxn/Crunchbase label Series A 2024-05-22)",
        "confidence": "reported",
        "source": "https://www.formds.com/issuers/bespokelabs-ai-inc (Form D filed 2024-06-04); https://tracxn.com/d/companies/bespokelabs (Series A label); https://luma.com/36sbgxgo ('$7.25M Seed') (all accessed 2026-06-07)"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [],
        "confidence": "unknown",
        "source": "No investor names disclosed in public sources reviewed (Crunchbase profile inaccessible; Tracxn/PitchBook do not surface named lead investors). SEC Form D lists directors Georgios Alex Dimakis, Maheswaran Sathiamoorthy, Bhaskar Ghosh, and Priyank Patel but does not name investors. https://www.formds.com/issuers/bespokelabs-ai-inc (accessed 2026-06-07)"
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "what_they_sell": {
        "value": "mixed (data/RL-environment curation, open datasets and evals, and custom data delivery to enterprises and labs)",
        "confidence": "reported",
        "source": "https://www.bespokelabs.ai/ (vendor self-description, accessed 2026-06-07); https://github.com/bespokelabsai (OSS tooling, accessed 2026-06-07)"
      },
      "deployment_model": {
        "value": "mixed, open-source libraries/datasets (self-hosted) plus custom data/RL-environment delivery via direct engagement; Curator integrates with provider APIs (OpenAI/Anthropic, etc.)",
        "confidence": "reported",
        "source": "https://www.bespokelabs.ai/; https://github.com/bespokelabsai/curator (accessed 2026-06-07)"
      },
      "maturity": {
        "value": "GA (open-source tools publicly available; commercial data delivery via direct contracts)",
        "confidence": "estimated",
        "source": "https://github.com/bespokelabsai/curator (Apache-2.0, 1.6k+ stars; PyPI package published, accessed 2026-06-07)"
      },
      "open_source": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://github.com/bespokelabsai (accessed 2026-06-07)"
      },
      "license": {
        "value": "Apache-2.0 (Curator, SkyRL); MIT (Verifiers fork); OpenThinker/OpenThoughts artifacts Apache-2.0",
        "confidence": "confirmed",
        "source": "https://github.com/bespokelabsai (accessed 2026-06-07)"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://www.bespokelabs.ai/about-us (self-described 'applied AI research lab'; team from Google/DeepMind, UC Berkeley, Stanford, NYU; multiple published works, accessed 2026-06-07)"
      },
      "researcher_count": {
        "value": "unknown (research-led team; exact count not extractable from public team page)",
        "confidence": "unknown",
        "source": "https://www.bespokelabs.ai/about-us (accessed 2026-06-07)"
      },
      "researcher_backgrounds": {
        "value": [
          "Co-founder/CEO Maheswaran (Mahesh) Sathiamoorthy, ex-Google DeepMind",
          "Co-founder/Chief Scientist Georgios (Alex) Dimakis, Professor, UC Berkeley (formerly UT Austin)",
          "Team members with backgrounds from Google, UC Berkeley, Stanford, NYU, Microsoft, Scale AI, AI2 (per vendor about-us page)"
        ],
        "confidence": "reported",
        "source": "https://www.bespokelabs.ai/about-us (vendor-stated; founder roles corroborated by https://www.linkedin.com/in/alex-dimakis-b1b20320/ and Tracxn founders page, accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "OpenThoughts / OpenThoughts-114k, open reasoning dataset (collaboration with DataComp community; 190+ public models trained on it per GitHub)",
          "Bespoke Curator, synthetic data curation library",
          "Evalchemy, evaluation/benchmark tooling",
          "Bespoke-Stratos / OpenThinker reasoning models",
          "Bespoke-MiniCheck, factuality model"
        ],
        "confidence": "reported",
        "source": "https://github.com/open-thoughts/open-thoughts; https://github.com/bespokelabsai/curator; https://www.bespokelabs.ai/ (accessed 2026-06-07)"
      },
      "notable_customers": {
        "value": [
          {
            "name": "Fortune 500 enterprises (unnamed)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Frontier labs / top labs (unnamed)",
            "verification": "self-claimed",
            "frontier_lab_tie": true
          },
          {
            "name": "Model builders using OpenThoughts datasets (190+ public HF models; unnamed)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          }
        ],
        "confidence": "reported",
        "source": "https://www.bespokelabs.ai/ ('Fortune 500 enterprises and frontier labs trust us'); https://github.com/open-thoughts/open-thoughts (190+ public models). No specific named customers disclosed or third-party verified. (accessed 2026-06-07)"
      },
      "focus_areas": [
        "long-horizon / general reasoning",
        "evaluation / benchmarks",
        "execution infrastructure"
      ],
      "positioning_summary": "Bespoke Labs is an applied AI research lab (Mountain View, CA, founded 2024) focused on data curation and RL-environment curation for training and evaluating agents, known for open datasets and reproducible recipes (OpenThoughts) and open-source tools (Curator, Evalchemy). It pairs a public open-source/open-data presence with commercial custom data and RL-environment delivery.",
      "best_fit_use_case": "Buyers needing reasoning-focused data curation, open reproducible datasets/recipes, and custom RL-environment/eval data delivery from a research-led team.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://www.bespokelabs.ai/",
          "accessed_date": "2026-06-07",
          "note": "Homepage, positioning, products (Curator, OpenThoughts, Evalchemy, GEPA, TerminalBench), 'Fortune 500 and frontier labs trust us', ICLR 2026 papers"
        },
        {
          "url": "https://www.bespokelabs.ai/about-us",
          "accessed_date": "2026-06-07",
          "note": "Team/founders, backgrounds, advisors, HQ Mountain View, applied AI research lab mission"
        },
        {
          "url": "https://www.linkedin.com/company/bespokelabsai",
          "accessed_date": "2026-06-07",
          "note": "Public snippet, ~48 employees, HQ Mountain View, June hiring growth, specialties"
        },
        {
          "url": "https://tracxn.com/d/companies/bespokelabs/__TgeW4_XxZv-sKUrOh6M6QeTLr6e9xHzW26BbTJzHYbQ",
          "accessed_date": "2026-06-07",
          "note": "Founded 2024, 40 employees as of 2026-04-30, $7.25M raised, Series A label"
        },
        {
          "url": "https://www.formds.com/issuers/bespokelabs-ai-inc",
          "accessed_date": "2026-06-07",
          "note": "SEC Form D filed 2024-06-04, $7,249,998 raised, directors/execs listed (Dimakis, Sathiamoorthy, Bhaskar Ghosh, Priyank Patel)"
        },
        {
          "url": "https://luma.com/36sbgxgo",
          "accessed_date": "2026-06-07",
          "note": "Event listing labels round '$7.25M Seed'; CEO ex-Google DeepMind"
        },
        {
          "url": "https://github.com/bespokelabsai",
          "accessed_date": "2026-06-07",
          "note": "OSS repos, Curator (1.68k stars, Apache-2.0), Verifiers (MIT), SkyRL (Apache-2.0), SkyThought, Sandbox, ProRL-Agent-Server, gepa artifacts"
        },
        {
          "url": "https://jobs.ashbyhq.com/bespokelabs",
          "accessed_date": "2026-06-07",
          "note": "Careers, open roles incl. RL Environments, Research Engineer, Data Operations Manager, DevOps, recruiter; some remote-US"
        },
        {
          "url": "https://newsletter.semianalysis.com/p/rl-environments-and-rl-for-science",
          "accessed_date": "2026-06-07",
          "note": "Industry context placing Bespoke Labs among RL environment / data foundry vendors"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "what_they_sell.confidence",
          "was": "confirmed",
          "now": "reported",
          "reason": "The specific scope (RL-environment curation, delivery to frontier labs/enterprises) is drawn from the vendor's own homepage positioning, which is self-description, not independently confirmed. Downgraded to reported."
        },
        {
          "field": "headcount_growth",
          "was": "13 people joining in June (7 full-time + 6 interns) per LinkedIn growth post; no formal % stated (confidence: reported)",
          "now": "unknown (confidence: unknown)",
          "reason": "The specific '13 people joining in June' figure could not be re-corroborated from the public LinkedIn snippet and no formal growth % is published; downgraded to unknown to avoid relying on an unverifiable single post."
        },
        {
          "field": "researcher_backgrounds.confidence",
          "was": "confirmed",
          "now": "reported",
          "reason": "Team-composition claims (Google, Stanford, NYU, Microsoft, Scale AI, AI2; advisors) come from the vendor's own about-us page. Only founder roles are externally corroborable (LinkedIn/Tracxn). Downgraded to reported; trimmed unverifiable advisor specifics."
        },
        {
          "field": "published_papers_or_benchmarks.confidence",
          "was": "confirmed",
          "now": "reported",
          "reason": "Item list mixes vendor claims with OSS artifacts; some entries (GEPA, TerminalBench contributions, 'multiple ICLR 2026 papers') are vendor-stated and not individually verified. Kept well-corroborated items (OpenThoughts, Curator, Evalchemy) and downgraded confidence to reported."
        },
        {
          "field": "notable_customers.value",
          "was": "included '200+ model builders' phrasing",
          "now": "reframed as 'Model builders using OpenThoughts datasets (190+ public HF models)'",
          "reason": "Third-party-corroborated figure is ~190+ public models on Hugging Face (per OpenThoughts GitHub), not the vendor's '200+'. Kept verification as self-claimed since it remains a vendor/community-attributed, not buyer-named, reference."
        },
        {
          "field": "focus_areas",
          "was": "[\"long-horizon / general reasoning\",\"evaluation / benchmarks\",\"science/math\"]",
          "now": "[\"long-horizon / general reasoning\",\"evaluation / benchmarks\",\"execution infrastructure\"]",
          "reason": "Replaced 'science/math' (no dedicated science/math product line; OpenThoughts reasoning data is general-reasoning oriented) with 'execution infrastructure' from the controlled vocabulary, reflecting their RL-environment / sandbox / SkyRL execution tooling on GitHub."
        },
        {
          "field": "last_round.value",
          "was": "Seed, $7.25M, ~May 2024 (Form D filed 2024-06-04); some aggregators label it Series A",
          "now": "~$7.25M raised ~May/June 2024; round stage ambiguous across sources (Seed per Luma/Form D context; Series A per Tracxn/Crunchbase)",
          "reason": "Clarified that the round-stage label is genuinely conflicting across sources rather than presenting Seed as primary; amount and date retained."
        }
      ],
      "verification_summary": "Independently re-verified via WebSearch and WebFetch (SEC Form D, LinkedIn public snippet, Tracxn, GitHub/OpenThoughts) on 2026-06-07. Confirmed this is the CORRECT company matching the 'open data and reproducible recipes' note: Bespoke Labs (Mountain View, CA), founders Alex Dimakis and Mahesh Sathiamoorthy, maker of Curator and OpenThoughts. Funding: SEC Form D confirms $7,249,998 sold (filed 2024-06-04), total_raised kept 'confirmed' on the dollar figure; round stage left 'reported' due to Seed-vs-Series-A conflict across aggregators. No named investors are publicly disclosed (Crunchbase inaccessible/403; Tracxn/PitchBook surface no lead), notable_investors stays unknown; the Bhaskar Ghosh/8VC inference was correctly NOT asserted as an investor. Headcount ~40 (Tracxn, 2026-04-30) to ~48 (LinkedIn), band 11-50, well below 200+. notable_customers remain self-claimed (no third-party-verified named customers; frontier-lab ties are vendor-asserted and unnamed). Downgraded over-confident vendor-sourced fields (what_they_sell, researcher_backgrounds, published_papers, headcount_growth) and corrected focus_areas to the controlled vocabulary. SOC2/certifications/security page remain unknown (no trust page found).",
      "research_notes": {
        "found": [
          "Confirmed identity: AI research lab for data curation + RL-environment curation, matches 'open data and reproducible recipes' directory note (OpenThoughts datasets, Curator open-source recipes)",
          "Founders Mahesh Sathiamoorthy (CEO, ex-Google DeepMind) and Alex Dimakis (CSO, UC Berkeley professor)",
          "HQ Mountain View, CA; founded 2024",
          "$7.25M raised confirmed via SEC Form D ($7,249,998, filed 2024-06-04)",
          "Strong open-source presence: Curator (Apache-2.0, ~1.68k stars), Evalchemy, SkyRL, Verifiers; OpenThoughts open reasoning dataset",
          "Self-claimed customers: Fortune 500 + frontier labs (unnamed); OpenThoughts used by 200+ model builders",
          "Multiple ICLR 2026 papers accepted"
        ],
        "missing": [
          "Named investors (none disclosed publicly; directors include Bhaskar Ghosh of 8VC and Priyank Patel but investor confirmation absent)",
          "Valuation",
          "Revenue signals",
          "SOC 2 / ISO certifications / security/trust page (none found)",
          "Exact researcher count",
          "Specific named customers",
          "Exact open-roles count"
        ],
        "conflicts": [
          "Total raised: $7.25M (SEC Form D, confirmed) vs one aggregator stating $8.25M, treating $7.25M as authoritative",
          "Round labeled 'Seed' (Luma event) vs 'Series A' (Tracxn/aggregators)",
          "Headcount: 40 (Tracxn, 2026-04-30) vs ~48 (LinkedIn page), both within 11-50 band; LinkedIn 'about' section still shows outdated '2-10 employees'",
          "HQ: Mountain View (LinkedIn) vs Santa Clara (SEC Form D / PitchBook)"
        ],
        "stale": [
          "Funding data (May/June 2024) is >12 months old; no newer round found as of 2026-06-07",
          "LinkedIn 'about' size band '2-10 employees' is outdated"
        ],
        "open_questions": [
          "Who led the $7.25M round and full investor list (possible 8VC involvement via director Bhaskar Ghosh, unconfirmed)?",
          "Is there a post-2024 funding round given headcount growth to ~40-48?",
          "Which frontier labs / Fortune 500 enterprises are actual paying customers?",
          "Does Bespoke Labs hold any security certifications (SOC 2) for its commercial data delivery?"
        ]
      }
    },
    {
      "rank": 5,
      "focus_areas_normalised": [
        "Coding",
        "Computer Use",
        "Enterprise Workflows",
        "Long-Horizon",
        "Private Codebases"
      ],
      "slug": "huzzle-labs",
      "brand_name": "Huzzle Labs",
      "segment": "Commercial vendors",
      "website": "https://labs.huzzle.com/",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://labs.huzzle.com/ (accessed 2026-06-07); https://webflow.huzzle.com/aix (accessed 2026-06-07)"
      },
      "focus_areas": [
        "coding environments",
        "computer use environments",
        "enterprise workflows",
        "long-horizon / general reasoning",
        "evaluation / benchmarks",
        "human data"
      ],
      "what_they_sell": {
        "value": "mixed",
        "confidence": "confirmed",
        "source": "https://labs.huzzle.com/ (accessed 2026-06-07); https://webflow.huzzle.com/aix (accessed 2026-06-07), RL environments, expert human trajectory data, and contextual evals in one stack"
      },
      "deployment_model": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "maturity": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_source": {
        "value": "partial",
        "confidence": "reported",
        "source": "https://labs.huzzle.com/blog/openenv-visual-memory-spreadsheet (accessed 2026-06-07), OpenEnv environment published openly with Meta; core products otherwise proprietary"
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "founded_year": {
        "value": 2020,
        "confidence": "reported",
        "source": "Search consensus (theorg.com, Tracxn, press) gives founding year 2020 by Ingmar Klein, Parham Rakhshanfar, and Amit Choudhary; refers to parent Huzzle. Conflicting aggregator dates (2021) exist. Labs is a later AI pivot (~2025). (accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "London, United Kingdom",
        "confidence": "reported",
        "source": "https://github.com/huzzle-app (accessed 2026-06-07); Tracxn registered address 85 Great Portland Street, London (accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [
          "Berlin, Germany",
          "United States (operations)"
        ],
        "confidence": "reported",
        "source": "https://webflow.huzzle.com/aix (accessed 2026-06-07), US and European (Berlin) operations; bebee.com job posting also lists San Francisco (accessed 2026-06-07)"
      },
      "current_headcount": {
        "value": "~30 (15 engineers + 15 operators) for the Labs/AI division per vendor; parent Huzzle.com aggregators report ~81-93",
        "confidence": "reported",
        "source": "https://webflow.huzzle.com/aix (accessed 2026-06-07), vendor self-description of Labs team size; parent figures from Tracxn/CBInsights aggregators"
      },
      "headcount_band": {
        "value": "11-50",
        "confidence": "reported",
        "source": "https://webflow.huzzle.com/aix (accessed 2026-06-07), vendor states 15 engineers + 15 operators for Labs. Parent-company aggregator figure of ~81-93 reflects the larger talent-platform entity, not the Labs division."
      },
      "headcount_growth": {
        "value": "Parent Huzzle reportedly ranked among Sifted 100 UK & Ireland 2026 fastest-growing startups; Labs-specific growth not separately disclosed",
        "confidence": "reported",
        "source": "https://sifted.eu/leaderboards/sifted-100-uk-ireland-2026 (accessed 2026-06-07), applies to parent Huzzle, ranking via secondary sources; downgraded as the #7 figure is from a job-posting claim, not primary confirmation"
      },
      "open_roles_count": {
        "value": "~44 open positions (parent Huzzle, 2026)",
        "confidence": "reported",
        "source": "https://apply.workable.com/huzzle/ (accessed 2026-06-07); Sifted/Glassdoor snippets, parent company, not Labs-specific"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "Confirmed by Huzzle Labs (company-provided), accessed 2026-06-07"
      },
      "researcher_count": {
        "value": "~15 engineers plus a vetted external PhD/expert network (size of dedicated research staff not separately disclosed)",
        "confidence": "reported",
        "source": "https://webflow.huzzle.com/aix (accessed 2026-06-07)"
      },
      "researcher_backgrounds": {
        "value": [
          "RL engineer, ex-Turing",
          "Researchers from IIT Kharagpur and IIT Bombay (incl. PhD)",
          "Ex post-training lead on BharatGen’s 2.9B-parameter PARAM-1 LLM"
        ],
        "confidence": "confirmed",
        "source": "Confirmed by Huzzle Labs (company-provided), accessed 2026-06-07"
      },
      "notable_customers": {
        "value": [
          {
            "name": "Apple",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Lazard",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Financial Times",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          }
        ],
        "confidence": "reported",
        "source": "https://webflow.huzzle.com/aix (accessed 2026-06-07), listed on vendor's own site; no third-party confirmation found. Vendor also references unnamed 'frontier AI labs' as partners."
      },
      "notable_investors": {
        "value": [
          "10x Founders",
          "Angel Invest",
          "Emerge",
          "Former CTO of Hugging Face (angel)",
          "Researchers at Meta (angels)",
          "Researchers at Applied Intuition (angels)"
        ],
        "confidence": "confirmed",
        "source": "Confirmed by Huzzle Labs (company-provided), accessed 2026-06-07"
      },
      "last_round": {
        "value": "$6M",
        "confidence": "confirmed",
        "source": "Confirmed by Huzzle Labs (company-provided), accessed 2026-06-07"
      },
      "total_raised": {
        "value": "$6M",
        "confidence": "confirmed",
        "source": "Confirmed by Huzzle Labs (company-provided), accessed 2026-06-07"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "soc2": {
        "value": "Type II",
        "confidence": "confirmed",
        "source": "https://huzzle.secureframetrustuk.com/#compliance-d852f4b3-d3ac-4c7e-9e48-bb30d52d89c9, Huzzle Labs trust center (Secureframe), accessed 2026-06-07"
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "https://huzzle.secureframetrustuk.com/#compliance-d852f4b3-d3ac-4c7e-9e48-bb30d52d89c9",
        "confidence": "confirmed",
        "source": "https://huzzle.secureframetrustuk.com/#compliance-d852f4b3-d3ac-4c7e-9e48-bb30d52d89c9, Huzzle Labs trust center (Secureframe), accessed 2026-06-07"
      },
      "distributed_remote": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://webflow.huzzle.com/aix (accessed 2026-06-07), operations across US and Europe running 'around the clock'; bebee.com role open to remote (accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "OpenEnv, visual-memory spreadsheet environment, published with Meta"
        ],
        "confidence": "confirmed",
        "source": "https://labs.huzzle.com/blog/openenv-visual-memory-spreadsheet (accessed 2026-06-07)"
      },
      "positioning_summary": "Huzzle Labs is the AI division of London-based talent platform Huzzle (founded ~2020 by Ingmar Klein, Parham Rakhshanfar, and Amit Choudhary). It positions itself as a human-intelligence data foundry that builds RL environments (code, tool-use, computer-use, long-horizon enterprise workflows), expert trajectory data, and contextual evaluations for frontier AI labs and regulated European enterprises, leveraging Huzzle's vetted PhD/expert network. It bundles environments, human data, and evals in one stack.",
      "best_fit_use_case": "Frontier labs and regulated enterprises needing custom RL environments plus expert human trajectory data and evals for code, computer-use, and long-horizon professional workflows.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://labs.huzzle.com/blog/openenv-visual-memory-spreadsheet",
          "accessed_date": "2026-06-07",
          "note": "OpenEnv, visual-memory spreadsheet environment published openly with Meta"
        },
        {
          "url": "https://huzzle.secureframetrustuk.com/#compliance-d852f4b3-d3ac-4c7e-9e48-bb30d52d89c9",
          "accessed_date": "2026-06-07",
          "note": "Huzzle Labs trust center (Secureframe), SOC 2 / compliance attestations"
        },
        {
          "url": "https://labs.huzzle.com/",
          "accessed_date": "2026-06-07",
          "note": "Official Huzzle Labs site, human data, RL environments, contextual evals; CLI-style landing page"
        },
        {
          "url": "https://x.com/himanshustwts/status/2041877003251695733",
          "accessed_date": "2026-06-07",
          "note": "Podcast sponsor post describing Huzzle Labs as engineering/research company focused on RL environments (code, computer use, long-horizon enterprise workflows)"
        },
        {
          "url": "https://bebee.com/us/jobs/founding-engagement-manager-frontier-labs-huzzlecom--theirstack-649700226",
          "accessed_date": "2026-06-07",
          "note": "Job posting, confirms offices London/Berlin/SF, builds RL environments + expert trajectories for frontier labs, GTM with frontier AI labs, Sifted #7 claim, $150-210k base"
        },
        {
          "url": "https://github.com/huzzle-app",
          "accessed_date": "2026-06-07",
          "note": "GitHub org, only job-board list repos and forks, no public RL-environment repos; based in UK"
        },
        {
          "url": "https://www.eu-startups.com/2024/04/london-based-huzzle-secures-e1-67-million-pre-seed-to-help-students-land-their-dream-graduate-job/",
          "accessed_date": "2026-06-07",
          "note": "Pre-seed funding (£1.43M/€1.67M, April 2024) for the graduate-jobs platform; investors; London HQ"
        },
        {
          "url": "https://tech.eu/2024/04/18/meet-the-gen-z-founders-rewiring-the-graduate-job-market/",
          "accessed_date": "2026-06-07",
          "note": "Founders Ingmar Klein, Parham Rakhshanfar; London; pre-seed; original graduate-jobs product"
        },
        {
          "url": "https://tracxn.com/d/companies/huzzle/__gSmaXUdJfRlNTOIE3SEajswKR4IZuSyaJRpXk91s88g",
          "accessed_date": "2026-06-07",
          "note": "Aggregator, total funding ~$2.07M over 2 rounds, 7 investors; headcount figures for parent"
        },
        {
          "url": "https://sifted.eu/leaderboards/sifted-100-uk-ireland-2026",
          "accessed_date": "2026-06-07",
          "note": "Sifted 100 UK & Ireland 2026 fastest-growing leaderboard (Huzzle ranked #7 per secondary sources)"
        },
        {
          "url": "https://www.linkedin.com/company/huzzle-com",
          "accessed_date": "2026-06-07",
          "note": "Parent company LinkedIn (public snippet), founded 2021, London HQ; page for company not directly fetchable"
        },
        {
          "url": "https://www.hud.ai/resources/platforms-agent-evals-rl-training-data",
          "accessed_date": "2026-06-07",
          "note": "Listed among platforms turning agent evals into RL training data (429 on fetch; appears in search index)"
        },
        {
          "url": "https://alignlist.com/guides/top-40-rl-environments-startups-and-companies",
          "accessed_date": "2026-06-07",
          "note": "Third-party directory listing Huzzle Labs among RL-environment startups"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "founded_year",
          "was": "2021 (reported)",
          "now": "2020 (reported)",
          "reason": "Multiple sources (theorg.com, search consensus, press) state the parent Huzzle was founded in 2020 by Ingmar Klein, Parham Rakhshanfar, and Amit Choudhary. The draft's 2021 came from a LinkedIn snippet; 2020 is better supported. Kept as 'reported' given aggregator conflicts."
        },
        {
          "field": "headcount_band",
          "was": "51-200 (reported)",
          "now": "11-50 (reported)",
          "reason": "The 51-200 band relied on parent-company aggregator figures (~81-93). The vendor's own AIX page states the Labs/AI division is 15 engineers + 15 operators (~30), which falls in 11-50. A startup AI division at 200-scale was an overreach."
        },
        {
          "field": "current_headcount",
          "was": "~81-93 employees (parent Huzzle.com); Labs size not disclosed",
          "now": "~30 (15 engineers + 15 operators) for Labs per vendor; parent ~81-93 per aggregators",
          "reason": "Added the vendor's own Labs-division headcount, which is the decision-relevant figure for this entity. The draft only had parent-company aggregator numbers."
        },
        {
          "field": "researcher_count",
          "was": "unknown",
          "now": "~15 engineers plus vetted external PhD/expert network (reported)",
          "reason": "Vendor AIX page discloses 15 engineers and a vetted expert network; upgraded from unknown to reported based on a primary vendor source."
        },
        {
          "field": "researcher_backgrounds",
          "was": "['Leverages a vetted network of PhDs / domain experts; specific prior-org backgrounds not disclosed']",
          "now": "Added 'Founders and senior technical leaders from Meta, Hugging Face, Applied Intuition, and Magic'",
          "reason": "The vendor's AIX page explicitly names these prior-org backgrounds; the draft missed them. Kept as 'reported' since individuals are not named or independently verified."
        },
        {
          "field": "notable_customers",
          "was": "[] (unknown), no named customers",
          "now": "Apple, Lazard, Financial Times, all self-claimed (reported)",
          "reason": "The vendor's AIX page (webflow.huzzle.com/aix) names these customers. They are self-claimed (vendor's own site), not verified; no third-party confirmation found. None are frontier-lab ties."
        },
        {
          "field": "notable_investors",
          "was": "Includes 'Steffen Zoller'",
          "now": "Removed 'Steffen Zoller'",
          "reason": "Steffen Zoller could not be confirmed across funding sources (EU-Startups, Tracxn, Nordic9). Removed to avoid an unverified name; remaining investors confirmed across multiple snippets."
        },
        {
          "field": "other_locations",
          "was": "['Berlin, Germany','San Francisco, USA'] (sourced only to a job board)",
          "now": "['Berlin, Germany','United States (operations)']",
          "reason": "Vendor AIX page confirms US + European (Berlin) operations as a primary source. San Francisco appears only in a single job-board posting, so generalized US to avoid over-precision; primary source upgraded."
        },
        {
          "field": "focus_areas",
          "was": "5 areas",
          "now": "Added 'human data'",
          "reason": "Human data is a core, explicitly marketed product line (RL environments + human data + evals), matching the directory note and controlled vocabulary."
        },
        {
          "field": "status",
          "now": "source strengthened",
          "reason": "Added webflow.huzzle.com/aix as a second confirming primary source for active operations."
        },
        {
          "field": "headcount_growth",
          "was": "Sifted #7 stated fairly directly",
          "now": "Hedged; #7 attributed to job-posting/secondary claim",
          "reason": "The #7 Sifted ranking is sourced to a job posting and secondary references, not primary Sifted confirmation. Downgraded the certainty of the specific rank while retaining the general signal."
        },
        {
          "field": "total_raised / investors / team / soc2",
          "was": "reported/unknown (public proxies only)",
          "now": "confirmed via company-provided data ($6M raised; investors 10x Founders, Angel Invest, Emerge; team incl. former Hugging Face CTO, ex-Meta, ex-Applied Intuition; SOC 2 Type II)",
          "reason": "Company-provided primary data supplied 2026-06-07; tagged confirmed per methodology (primary/official source)."
        }
      ],
      "verification_summary": "Confirmed this is the CORRECT company matching the directory note ('environments + human data + evals in one stack'): Huzzle Labs, the AI division of London talent platform Huzzle, founded ~2020 by Ingmar Klein, Parham Rakhshanfar, and Amit Choudhary (the draft sources' 'Patrick Buford' CEO claim is a single-source hallucination and was disregarded; Ingmar Klein is confirmed CEO via LinkedIn/TheOrg/podcast). Highest-risk corrections: (1) headcount band lowered from 51-200 to 11-50, the 51-200 used parent-company aggregator data, but the vendor's own AIX page states the Labs division is ~30 people (15 engineers + 15 operators); (2) added self-claimed customers Apple, Lazard, Financial Times from the vendor's site, explicitly marked self-claimed with no third-party verification and no frontier-lab ties; (3) founding year corrected 2021->2020; (4) added researcher backgrounds (Meta, Hugging Face, Applied Intuition, Magic) from the vendor; (5) removed unverified investor 'Steffen Zoller'. Funding (~$2.07M total, April 2024 pre-seed ~€1.67M) is parent-company and pre-AI-pivot; no Labs-specific raise is disclosed, kept 'reported' given aggregator inconsistencies (pre-seed vs seed labeling, $1.43M vs $1.77M). SOC2/certifications/valuation/revenue remain unknown (no trust page found). Overall confidence remains LOW: most figures are vendor self-claims or parent-company aggregator data, with little independent third-party confirmation of the Labs entity specifically.",
      "research_notes": {
        "found": [
          "Confirmed correct company: Huzzle Labs = AI/RL division of London talent platform Huzzle (huzzle.com / huzzle.app). Matches directory note 'environments + human data + evals in one stack' and tags Long Horizon / RLHF.",
          "Sells RL environments (code, tool-use, computer-use, long-horizon enterprise workflows) + expert trajectory/human data + contextual evals to frontier AI labs.",
          "Offices in London (HQ), Berlin, and San Francisco.",
          "Founders of parent Huzzle: Ingmar Klein (CEO), Amit Choudhary (CTO), Parham Rakhshanfar (COO).",
          "Parent raised pre-seed ~£1.43M/€1.67M April 2024 (10x Founders lead); total ~$2.07M reported.",
          "Ranked #7 on Sifted 100 UK & Ireland 2026 fastest-growing list (parent).",
          "Actively hiring GTM/frontier-lab engagement roles (SF-preferred); ~44 open roles at parent."
        ],
        "missing": [
          "Labs-division-specific headcount and researcher count (only parent ~81-93 figures found).",
          "Named customers / verified frontier-lab ties (vendor references 'frontier labs' generically; none named).",
          "SOC 2 / ISO / any security or trust page.",
          "Deployment model, product maturity (beta/GA), valuation, Labs-specific funding/revenue.",
          "Published papers or benchmarks.",
          "Founding date of the Labs/AI division specifically (parent founded ~2021; AI pivot appears 2025)."
        ],
        "conflicts": [
          "Founding year varies across aggregators: 2020 vs 2021 vs 2024 (2024 = product launch; 2021 = LinkedIn-stated incorporation). Used 2021 (reported).",
          "Headcount: ~81 (2026 aggregator) vs ~93 (Sifted/Tracxn). Both refer to parent, not Labs.",
          "One aggregator snippet named 'Patrick Buford' as CEO, contradicts all primary/secondary sources (Ingmar Klein) and is treated as an aggregator error.",
          "webflow.huzzle.com/ai 301-redirects to labs.huzzle.com (same entity)."
        ],
        "stale": [
          "Pre-seed funding data is from April 2024 (>12 months old) and predates the AI/RL pivot, likely not representative of current Labs operations.",
          "Some headcount snapshots (e.g. '3 employees as of Mar') are clearly outdated aggregator artifacts."
        ],
        "open_questions": [
          "Is Huzzle Labs a separately funded entity or an internal division of Huzzle.com? Has the Labs side raised its own round?",
          "Which frontier labs are actual customers (none verified)?",
          "How many dedicated researchers/engineers are on the Labs team vs the parent talent platform?",
          "What is the deployment model and maturity of the RL-environment product?"
        ]
      }
    },
    {
      "rank": 6,
      "focus_areas_normalised": [
        "Computer Use",
        "Enterprise Workflows"
      ],
      "slug": "fleet-ai",
      "brand_name": "Fleet AI",
      "segment": "Commercial vendors",
      "website": "https://www.fleetai.com/",
      "focus_areas": [
        "enterprise workflows",
        "computer use environments",
        "browser environments",
        "evaluation / benchmarks"
      ],
      "positioning_summary": "Fleet AI builds high-fidelity reinforcement-learning training environments ('gyms') that replicate enterprise software such as Salesforce and Excel, plus browser/desktop workflows, so frontier AI labs and large enterprises can train and evaluate computer-use agents. It ships a Python SDK, a platform API, and the open-source 'Harbor' agent-evaluation/RL-environment tooling, pairing simulated environments with human supervision.",
      "best_fit_use_case": "A frontier lab or large enterprise that needs bespoke, high-fidelity RL environments simulating real enterprise software (CRM, spreadsheets, browser/desktop) to train and evaluate computer-use agents.",
      "what_they_sell": {
        "value": "environments",
        "confidence": "confirmed",
        "source": "https://www.fleetai.com/about (accessed 2026-06-07); https://sacra.com/c/fleet/ (accessed 2026-06-07)"
      },
      "deployment_model": {
        "value": "managed-hosted (platform API + Python SDK); some tooling is self-hostable via OSS repos",
        "confidence": "estimated",
        "source": "https://sacra.com/c/fleet/ (accessed 2026-06-07); https://github.com/fleet-ai (accessed 2026-06-07), fleet-sdk and platform API; explicit deployment model not stated"
      },
      "maturity": {
        "value": "GA",
        "confidence": "estimated",
        "source": "https://sacra.com/c/fleet/ (accessed 2026-06-07), reported paying labs/enterprises and a published SDK/API imply commercial availability; vendor does not use a GA label"
      },
      "open_source": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://github.com/fleet-ai (accessed 2026-06-07), publishes fleet-sdk (Apache-2.0), zeroboot (Apache-2.0), harbor/harbor-train (Apache-2.0), and other repos"
      },
      "license": {
        "value": "Apache-2.0 (for published OSS repos such as fleet-sdk, zeroboot, harbor); core hosted product not open source",
        "confidence": "confirmed",
        "source": "https://github.com/fleet-ai (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": 2024,
        "confidence": "reported",
        "source": "https://www.kucoin.com/news/flash/ai-training-firm-fleet-eyes-750m-valuation-amid-60x-revenue-surge (accessed 2026-06-07); https://www.theinformation.com/newsletters/ai-agenda/reinforcement-learning-gym-startup-buoyed-labs-appetite-training-data-reaches-750-million-valuation (accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "New York, NY, USA",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/fleet-so (accessed 2026-06-07); https://sacra.com/c/fleet/ (accessed 2026-06-07), both list New York, NY; careers page conflictingly lists a San Francisco (SOMA) HQ"
      },
      "other_locations": {
        "value": [
          "San Francisco, CA (careers page lists SOMA office, in conflict with NY HQ)",
          "Delray Beach / Chelsea-area secondary office (signals conflict: LinkedIn lists Delray Beach, FL; careers page lists New York/Chelsea)"
        ],
        "confidence": "reported",
        "source": "https://www.fleetai.com/careers (accessed 2026-06-07); https://www.linkedin.com/company/fleet-so (accessed 2026-06-07), location signals are inconsistent across sources"
      },
      "current_headcount": {
        "value": "11-50 per LinkedIn public company-size band as of 2026-06-07 (RocketReach ~41 is consistent); an earlier draft figure of ~133 could not be reproduced and appears erroneous",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/fleet-so (accessed 2026-06-07); https://rocketreach.co/fleet-ai-management_b6f95050c628d05e (accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "11-50",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/fleet-so (accessed 2026-06-07), LinkedIn shows the '11-50 employees' size band"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "https://www.fleetai.com/careers (accessed 2026-06-07), careers page does not show a numeric open-role count"
      },
      "total_raised": {
        "value": "~$15M disclosed (seed); a further round of at least $50M was reported in negotiation (~April 2026) but not confirmed closed",
        "confidence": "reported",
        "source": "https://sacra.com/c/fleet/ (accessed 2026-06-07); https://www.kucoin.com/news/flash/ai-training-firm-fleet-eyes-750m-valuation-amid-60x-revenue-surge (accessed 2026-06-07)"
      },
      "last_round": {
        "value": "Reported in talks (not confirmed closed): at least $50M at ~$750M post-money valuation, with Bain Capital Ventures reported as prospective lead, ~April 2026. Prior disclosed: ~$15M seed.",
        "confidence": "reported",
        "source": "https://www.theinformation.com/newsletters/ai-agenda/reinforcement-learning-gym-startup-buoyed-labs-appetite-training-data-reaches-750-million-valuation (accessed 2026-06-07); https://www.kucoin.com/news/flash/ai-training-firm-fleet-eyes-750m-valuation-amid-60x-revenue-surge (accessed 2026-06-07)"
      },
      "valuation": {
        "value": "~$750M reported for an in-talks round (~April 2026, not confirmed closed); prior seed valuation reported under $100M",
        "confidence": "reported",
        "source": "https://www.kucoin.com/news/flash/ai-training-firm-fleet-eyes-750m-valuation-amid-60x-revenue-surge (accessed 2026-06-07); https://www.theinformation.com/newsletters/ai-agenda/reinforcement-learning-gym-startup-buoyed-labs-appetite-training-data-reaches-750-million-valuation (accessed 2026-06-07)"
      },
      "notable_investors": {
        "value": [
          "Sequoia Capital",
          "Menlo Ventures",
          "SV Angel",
          "Bain Capital Ventures (reported prospective lead of an in-talks round; not confirmed closed)"
        ],
        "confidence": "reported",
        "source": "https://www.fleetai.com/about (accessed 2026-06-07), lists Sequoia, Menlo, SV Angel; https://www.kucoin.com/news/flash/ai-training-firm-fleet-eyes-750m-valuation-amid-60x-revenue-surge (accessed 2026-06-07), BCV reported as prospective lead"
      },
      "revenue_signals": {
        "value": "Reported ~$60M annualized run-rate (~April 2026, computed as latest quarter x4), up from ~$1M annualized end-2025",
        "confidence": "reported",
        "source": "https://sacra.com/c/fleet/ (accessed 2026-06-07); https://www.kucoin.com/news/flash/ai-training-firm-fleet-eyes-750m-valuation-amid-60x-revenue-surge (accessed 2026-06-07)"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://www.fleetai.com/about (accessed 2026-06-07), team described as including people with prior research roles at Anthropic, xAI, Meta Superintelligence, Essential AI, Contextual AI, etc."
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Team self-describes prior experience at Anthropic, xAI, Meta Superintelligence, Essential AI, Contextual AI, Mercor, Docker, Citadel, Jane Street, and Cruise",
          "Founder/CEO Nicolai (Nic) Ouporov: ex-founding engineer at Respell (acquired by Salesforce, Jan 2024); prior research at Stanford and Columbia per personal site"
        ],
        "confidence": "reported",
        "source": "https://www.fleetai.com/about (accessed 2026-06-07); https://www.nicolas.info/ (accessed 2026-06-07), backgrounds are self-described and not independently verified per person"
      },
      "notable_customers": {
        "value": [],
        "confidence": "unknown",
        "source": "No specific Fleet AI customer is named or third-party-verified. Press/profiles describe frontier labs (OpenAI, Anthropic, Meta, Google) and financial-services/insurance enterprises only as the buyer CATEGORY for RL-environment vendors, not as confirmed Fleet customers. https://sacra.com/c/fleet/ (accessed 2026-06-07); https://sapphireventures.com/blog/reinforcement-learning-environments-ai-agents/ (accessed 2026-06-07)"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No security/trust page found on fleetai.com (https://www.fleetai.com/security returned 404, accessed 2026-06-07); SOC 2 search results referred to an unrelated company (fleetdm.com)"
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "https://www.fleetai.com/security (accessed 2026-06-07), returned 404; a status page exists but no trust/security page found"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "'Harbor', open-source framework for agent evaluations and creating/using RL environments (Apache-2.0; published under Fleet AI's GitHub org and a standalone harbor-framework repo)"
        ],
        "confidence": "reported",
        "source": "https://github.com/fleet-ai (accessed 2026-06-07); https://github.com/harbor-framework/harbor (accessed 2026-06-07); https://sacra.com/c/fleet/ (accessed 2026-06-07)"
      },
      "distributed_remote": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "https://www.fleetai.com/careers (accessed 2026-06-07), multiple office locations are listed but no explicit remote/distributed policy is stated; prior 'yes' was an inference, not stated"
      },
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://www.fleetai.com/ (accessed 2026-06-07); https://github.com/fleet-ai (accessed 2026-06-07), active site and recent OSS activity"
      },
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://www.fleetai.com/",
          "accessed_date": "2026-06-07",
          "note": "Official homepage, mission, status page (All Systems Operational)."
        },
        {
          "url": "https://www.fleetai.com/about",
          "accessed_date": "2026-06-07",
          "note": "Official about page, founder, team backgrounds (Anthropic, xAI, Meta Superintelligence, Docker, Jane Street, Cruise), 'training gyms for agents', investors."
        },
        {
          "url": "https://www.fleetai.com/careers",
          "accessed_date": "2026-06-07",
          "note": "Careers page, lists SF (SOMA) HQ and NYC (Chelsea) satellite office; no numeric open-role count."
        },
        {
          "url": "https://www.fleetai.com/security",
          "accessed_date": "2026-06-07",
          "note": "Returned HTTP 404, no security/trust page found."
        },
        {
          "url": "https://sacra.com/c/fleet/",
          "accessed_date": "2026-06-07",
          "note": "Third-party profile, revenue ($60M annualized Apr 2026 from $1M 2025), seed $15M, pending $50M+ at ~$750M, HQ New York, Harbor eval framework, 99.993% uptime."
        },
        {
          "url": "https://www.kucoin.com/news/flash/ai-training-firm-fleet-eyes-750m-valuation-amid-60x-revenue-surge",
          "accessed_date": "2026-06-07",
          "note": "Press, founded 2024, founder Nic Ouporov (ex-Respell), $50M+ round, $750M valuation, Bain Capital Ventures lead, seed under $100M valuation."
        },
        {
          "url": "https://www.theinformation.com/newsletters/ai-agenda/reinforcement-learning-gym-startup-buoyed-labs-appetite-training-data-reaches-750-million-valuation",
          "accessed_date": "2026-06-07",
          "note": "The Information article (paywalled, not circumvented), headline confirms $750M valuation RL-gym startup."
        },
        {
          "url": "https://www.nicolas.info/",
          "accessed_date": "2026-06-07",
          "note": "Founder personal site, background (Respell/Salesforce, Stanford Robotics, Columbia Creative Machines), 'applied AI & product lab. Simulations for testing & training agents.'"
        },
        {
          "url": "https://www.linkedin.com/company/fleet-so",
          "accessed_date": "2026-06-07",
          "note": "Public LinkedIn company snippet, ~133 employees, New York NY HQ, 'Gym Environments for Frontier Agents', Software Development industry."
        },
        {
          "url": "https://rocketreach.co/fleet-ai-management_b6f95050c628d05e",
          "accessed_date": "2026-06-07",
          "note": "Third-party, ~41 employees (conflicts with LinkedIn)."
        },
        {
          "url": "https://www.crunchbase.com/organization/fleet-6338",
          "accessed_date": "2026-06-07",
          "note": "Crunchbase profile, returned HTTP 403 (not accessible)."
        },
        {
          "url": "https://sapphireventures.com/blog/reinforcement-learning-environments-ai-agents/",
          "accessed_date": "2026-06-07",
          "note": "VC market overview, Fleet positioned alongside Habitat and Matrices building enterprise-software/web RL environments; frontier labs (Anthropic/OpenAI/Google) as buyer category."
        },
        {
          "url": "https://newsletter.semianalysis.com/p/rl-environments-and-rl-for-science",
          "accessed_date": "2026-06-07",
          "note": "Market context on RL environments / data foundries."
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "open_source",
          "was": "no (estimated)",
          "now": "yes (confirmed)",
          "reason": "Fleet AI's GitHub org (github.com/fleet-ai, matching fleetai.com / team@fleet.so / LinkedIn slug fleet-so) publishes multiple OSS repos including fleet-sdk, zeroboot, harbor, and harbor-train, mostly under Apache-2.0. The draft's claim of no public OSS was wrong."
        },
        {
          "field": "license",
          "was": "unknown",
          "now": "Apache-2.0 for published OSS repos (confirmed)",
          "reason": "Fleet's GitHub repos (fleet-sdk, zeroboot, harbor, harbor-train) are Apache-2.0; updated to reflect the discovered OSS licensing while noting the hosted product itself is not open."
        },
        {
          "field": "headcount_band",
          "was": "51-200 (estimated)",
          "now": "11-50 (reported)",
          "reason": "LinkedIn public company page shows the '11-50 employees' size band, consistent with RocketReach (~41). The 51-200 band rested on an unreproducible ~133 figure; a 200-adjacent band is implausible for a 2024-founded seed company."
        },
        {
          "field": "current_headcount",
          "was": "Conflicting ~41 (RocketReach) and ~133 (LinkedIn) (reported)",
          "now": "11-50 per LinkedIn band, ~41 RocketReach consistent; ~133 not reproduced (reported)",
          "reason": "Re-checking LinkedIn returned an 11-50 size band, not ~133 employees. The draft's 133 could not be reproduced and is treated as erroneous/stale; the figures actually converge low."
        },
        {
          "field": "notable_investors",
          "was": "BCV listed first as reported lead, then Sequoia/Menlo/SV Angel",
          "now": "Sequoia, Menlo, SV Angel listed first (confirmed on official site); BCV explicitly labeled prospective lead of an in-talks, unclosed round",
          "reason": "Only Sequoia, Menlo, and SV Angel appear as investors on the official about page. BCV is described by press only as in talks to lead an unclosed round, so it must not be presented as a confirmed investor."
        },
        {
          "field": "last_round",
          "was": "Reported $50M+ at ~$750M led by Bain Capital Ventures",
          "now": "Same figures but explicitly framed as 'in talks / not confirmed closed' with BCV as prospective lead",
          "reason": "The Information's own post says Fleet 'is in talks to raise'; Sacra and KuCoin say 'negotiating'. No source confirms the round closed, so wording was tightened to avoid implying a closed round."
        },
        {
          "field": "valuation",
          "was": "~$750M (reported, pending round)",
          "now": "~$750M for an in-talks/unclosed round (reported)",
          "reason": "Same downgrade rationale: valuation pertains to a reported, unclosed negotiation, not a completed financing."
        },
        {
          "field": "other_locations",
          "was": "SF (SOMA) HQ and New York (Chelsea) satellite",
          "now": "SF (SOMA) plus a conflicting secondary location; LinkedIn lists Delray Beach, FL rather than Chelsea NY",
          "reason": "LinkedIn's public page indicates a Delray Beach, FL office, which conflicts with the careers page's NYC/Chelsea claim. The conflict is surfaced rather than asserting one definitive satellite."
        },
        {
          "field": "focus_areas",
          "was": "['enterprise workflows','computer use environments','browser environments','coding environments']",
          "now": "['enterprise workflows','computer use environments','browser environments','evaluation / benchmarks']",
          "reason": "No evidence Fleet focuses on coding environments; their public artifacts (Harbor eval framework, enterprise-software gyms, computer-use) support 'evaluation / benchmarks' instead. All values remain within the controlled vocabulary."
        },
        {
          "field": "has_researchers",
          "was": "yes (confirmed)",
          "now": "yes (reported)",
          "reason": "Research backgrounds are self-described on the about page and not independently verified per individual; confidence downgraded from confirmed to reported."
        },
        {
          "field": "researcher_backgrounds",
          "was": "confirmed",
          "now": "reported",
          "reason": "Backgrounds (Anthropic/xAI/Meta Superintelligence, founder's Stanford/Columbia roles) are self-claimed via the vendor about page and founder's personal site; not independently corroborated, so downgraded to reported."
        },
        {
          "field": "distributed_remote",
          "was": "yes (estimated)",
          "now": "unknown (unknown)",
          "reason": "Having multiple offices does not establish a remote/distributed policy; no source states one. Per the downgrade-when-in-doubt rule, set to unknown rather than infer."
        },
        {
          "field": "status",
          "was": "confirmed via homepage only",
          "now": "confirmed via homepage plus recent GitHub OSS activity",
          "reason": "Added a second corroborating signal (active OSS commits) to support the active status."
        },
        {
          "field": "published_papers_or_benchmarks",
          "was": "'Harbor' referenced in a third-party profile; not independently verified",
          "now": "'Harbor' confirmed as a real Apache-2.0 OSS agent-eval/RL-environment framework on GitHub",
          "reason": "Harbor is verifiable on GitHub (under both Fleet's org and a standalone harbor-framework repo), upgrading it from an unverified reference to a confirmed public artifact, though still reported as Fleet's specifically."
        }
      ],
      "verification_summary": "Confirmed Fleet AI (fleetai.com / GitHub fleet-ai / team@fleet.so / LinkedIn 'fleet-so') is the correct 'enterprise agent environments' company building RL training gyms (Salesforce/Excel/browser/desktop replicas) for frontier labs, matches the directory note; no wrong same-named entity. Biggest corrections: (1) headcount is 11-50 per LinkedIn, not the draft's 133-derived 51-200 band; (2) the company IS open source (Apache-2.0 fleet-sdk, zeroboot, harbor, harbor-train), reversing the draft's 'no'; (3) the $50M+/~$750M/BCV round is only IN TALKS, not closed, only Sequoia, Menlo, and SV Angel are confirmed investors on the official site, so BCV is labeled a prospective lead. Funding/valuation/revenue figures all rest on press (KuCoin/Phemex/The Information, the latter paywalled) plus Sacra, so kept at 'reported'. No customers are third-party-verified (only frontier-lab/enterprise buyer CATEGORY is cited), kept empty/unknown. No SOC 2 or trust page exists; security page 404s, kept unknown. founded_year 2024 and active status corroborated. Self-described team backgrounds and distributed/remote inference downgraded. Overall confidence: medium.",
      "research_notes": {
        "found": [
          "Confirmed correct company: Fleet AI (fleetai.com), builds RL training environments/'gyms' replicating enterprise software (Salesforce, Excel) and browser/desktop workflows for frontier agents, matches directory note 'enterprise agent environments'.",
          "Founder/CEO Nicolai (Nic) Ouporov; ex-Respell founding engineer (Respell acquired by Salesforce); prior Stanford/Columbia robotics research.",
          "Founded 2024 (reported).",
          "Seed round $15M (Sequoia, Menlo Ventures, SV Angel).",
          "Reported pending round $50M+ at ~$750M post-money valuation, led by Bain Capital Ventures (~April 2026).",
          "Reported revenue ~$60M annualized (April 2026) up from ~$1M (end 2025).",
          "Team self-claims alumni from Anthropic, xAI, Meta Superintelligence, Docker, Jane Street, Cruise (frontier-lab ties: Anthropic, xAI, Meta).",
          "Two offices: San Francisco (SOMA) and New York (Chelsea)."
        ],
        "missing": [
          "Verified named customers (none disclosed publicly).",
          "SOC 2 / ISO / HIPAA certifications and any security/trust page (security URL 404s).",
          "Exact headcount (conflicting figures), headcount growth, numeric open-role count.",
          "Researcher count, OSS license, deployment model specifics.",
          "Confirmation that the $50M+/$750M round has closed."
        ],
        "conflicts": [
          "HQ location: LinkedIn company page and Sacra list New York, NY; vendor careers page lists HQ in SOMA, San Francisco with New York as a satellite.",
          "Headcount: RocketReach shows ~41 employees; LinkedIn company-page snippet shows ~133. Band estimated as 51-200 but genuinely uncertain.",
          "Revenue narrative described as '60x growth' in some headlines vs $1M->$60M (~60x) figures, consistent but all third-party/reported, not vendor-confirmed."
        ],
        "stale": [],
        "open_questions": [
          "Has the reported $50M+/$750M Bain Capital Ventures round actually closed, and at what final terms?",
          "Which frontier labs / enterprises are actual paying Fleet customers (none verified)?",
          "Is the true HQ San Francisco or New York, and what is the accurate current headcount?",
          "Does Fleet AI hold any security certifications (SOC 2 etc.)?"
        ]
      }
    },
    {
      "rank": 7,
      "focus_areas_normalised": [
        "Coding",
        "Private Codebases"
      ],
      "slug": "datacurve",
      "brand_name": "Datacurve",
      "segment": "Commercial vendors",
      "website": "https://datacurve.ai/",
      "focus_areas": [
        "coding environments",
        "evaluation / benchmarks",
        "execution infrastructure"
      ],
      "what_they_sell": {
        "value": "mixed",
        "confidence": "confirmed",
        "source": "https://datacurve.ai/ (accessed 2026-06-07); https://techcrunch.com/2025/10/09/datacurve-raises-15-million-to-take-on-scaleai/ (accessed 2026-06-07)"
      },
      "positioning_summary": "Datacurve is a YC W24 commercial data vendor that supplies expert-curated frontier coding data, RLHF traces, and repository-wide reinforcement learning environments (with unit-test verifiers) to foundation model labs, sourced via its Shipd bounty platform of vetted software engineers. It also publishes DeepSWE, a long-horizon agentic coding benchmark.",
      "best_fit_use_case": "Frontier/foundation model labs needing expert-sourced coding SFT/RLHF data and code-execution RL environments with verifiable rewards (code execution, tight loops).",
      "maturity": {
        "value": "GA",
        "confidence": "estimated",
        "source": "https://datacurve.ai/ (accessed 2026-06-07), commercially operating with paying labs and active Shipd bounty platform"
      },
      "deployment_model": {
        "value": "managed-hosted (data delivered as a service; private model endpoints spun up for RLHF traces)",
        "confidence": "reported",
        "source": "https://sacra.com/c/datacurve/ (accessed 2026-06-07)"
      },
      "open_source": {
        "value": "no (company products are commercial/proprietary; only the DeepSWE benchmark repo is published publicly)",
        "confidence": "reported",
        "source": "https://github.com/datacurve-ai/deep-swe (accessed 2026-06-07); https://datacurve.ai/ (accessed 2026-06-07)"
      },
      "license": {
        "value": "unknown (DeepSWE repo has no license file shown as of access date)",
        "confidence": "unknown",
        "source": "https://github.com/datacurve-ai/deep-swe (accessed 2026-06-07), no license specified"
      },
      "founded_year": {
        "value": 2024,
        "confidence": "confirmed",
        "source": "https://www.ycombinator.com/companies/datacurve (accessed 2026-06-07), Winter 2024 batch, founded 2024"
      },
      "hq_location": {
        "value": "San Francisco, USA",
        "confidence": "confirmed",
        "source": "https://www.ycombinator.com/companies/datacurve (accessed 2026-06-07); https://www.linkedin.com/company/datacurveai (accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "current_headcount": {
        "value": "approx 36 (LinkedIn 'Discover all 36 employees', size band 11-50) as of 2026-06-07",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/datacurveai (accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "11-50",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/datacurveai (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": 3,
        "confidence": "confirmed",
        "source": "https://datacurve.ai/careers (accessed 2026-06-07), Software Engineer, Research Engineer, Growth/Operations"
      },
      "distributed_remote": {
        "value": "no (San Francisco office-based; careers page lists in-office meals and commuter benefits)",
        "confidence": "estimated",
        "source": "https://datacurve.ai/careers (accessed 2026-06-07)"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://datacurve.ai/careers (accessed 2026-06-07), Research Engineer role; co-founders have ML/AI research backgrounds"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Serena Ge (co-founder/CEO): worked on LLM reasoning during a co-op at Cohere; University of Waterloo CS; Forbes 30 Under 30",
          "Charley Lee (co-founder): University of Waterloo CS; AI research background"
        ],
        "confidence": "reported",
        "source": "https://uwaterloo.ca/computer-science/news/cs-led-startup-secures-177m-transform-ai-training-data (accessed 2026-06-07)"
      },
      "total_raised": {
        "value": "$17.7M ($15M Series A + $2.7M seed)",
        "confidence": "confirmed",
        "source": "https://techcrunch.com/2025/10/09/datacurve-raises-15-million-to-take-on-scaleai/ (accessed 2026-06-07); https://uwaterloo.ca/computer-science/news/cs-led-startup-secures-177m-transform-ai-training-data (accessed 2026-06-07)"
      },
      "last_round": {
        "value": "Series A, $15M, October 2025",
        "confidence": "confirmed",
        "source": "https://techcrunch.com/2025/10/09/datacurve-raises-15-million-to-take-on-scaleai/ (accessed 2026-06-07)"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "Chemistry (Mark Goldberg, lead Series A)",
          "Y Combinator",
          "Balaji Srinivasan (seed)",
          "angel investors who are employees of DeepMind, Vercel, Anthropic and OpenAI (individuals, not the companies)"
        ],
        "confidence": "reported",
        "source": "https://techcrunch.com/2025/10/09/datacurve-raises-15-million-to-take-on-scaleai/ (accessed 2026-06-07)"
      },
      "revenue_signals": {
        "value": "unknown (over $1M paid out in bounties to contributors, a payout figure, not revenue)",
        "confidence": "unknown",
        "source": "https://techcrunch.com/2025/10/09/datacurve-raises-15-million-to-take-on-scaleai/ (accessed 2026-06-07)"
      },
      "notable_customers": {
        "value": [],
        "confidence": "unknown",
        "source": "Vendor and press describe 'frontier AI labs / foundation model labs' and 'multimillion-dollar contracts with leading AI labs' but name no specific customer; not verifiable"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://datacurve.ai/ (accessed 2026-06-07); https://techcrunch.com/2025/10/09/datacurve-raises-15-million-to-take-on-scaleai/ (accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "DeepSWE, long-horizon agentic coding benchmark, 113 tasks across TypeScript/Go/Python/JavaScript/Rust with isolated test environments and program-based verifiers (github.com/datacurve-ai/deep-swe). Distinct from the Together AI/Agentica 'DeepSWE' coding agent of the same name."
        ],
        "confidence": "confirmed",
        "source": "https://github.com/datacurve-ai/deep-swe (accessed 2026-06-07)"
      },
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://datacurve.ai/",
          "accessed_date": "2026-06-07",
          "note": "Official site, 'data engine for frontier AI', DeepSWE, total funding $17.7M"
        },
        {
          "url": "https://datacurve.ai/careers",
          "accessed_date": "2026-06-07",
          "note": "3 open roles, SF location, team description"
        },
        {
          "url": "https://www.ycombinator.com/companies/datacurve",
          "accessed_date": "2026-06-07",
          "note": "Founded 2024, founders, W24 batch, SF, product description"
        },
        {
          "url": "https://www.linkedin.com/company/datacurveai",
          "accessed_date": "2026-06-07",
          "note": "Headcount ~36, size band 11-50, HQ San Francisco"
        },
        {
          "url": "https://techcrunch.com/2025/10/09/datacurve-raises-15-million-to-take-on-scaleai/",
          "accessed_date": "2026-06-07",
          "note": "$15M Series A, Chemistry lead, investor list, $1M bounties paid, frontier lab ties"
        },
        {
          "url": "https://sacra.com/c/datacurve/",
          "accessed_date": "2026-06-07",
          "note": "Funding, RLHF traces via private endpoints, repo-wide RL environments with unit tests, 14,000 vetted engineers"
        },
        {
          "url": "https://github.com/datacurve-ai/deep-swe",
          "accessed_date": "2026-06-07",
          "note": "DeepSWE benchmark, 113 tasks, 5 languages, 652 stars"
        },
        {
          "url": "https://api.github.com/repos/datacurve-ai/deep-swe",
          "accessed_date": "2026-06-07",
          "note": "Repo metadata: created 2026-05-15, pushed 2026-06-05, stars 652, license null"
        },
        {
          "url": "https://uwaterloo.ca/computer-science/news/cs-led-startup-secures-177m-transform-ai-training-data",
          "accessed_date": "2026-06-07",
          "note": "Founder backgrounds: Ge ex-Cohere, Lee ex-Google/RL research, Waterloo CS"
        },
        {
          "url": "https://www.chemistry.vc/post/staying-ahead-of-the-curve",
          "accessed_date": "2026-06-07",
          "note": "Lead investor Chemistry's Series A announcement"
        },
        {
          "url": "https://www.menlotimes.com/post/datacurve-is-taking-on-scale-ai-building-frontier-coding-data-for-foundation-model-labs",
          "accessed_date": "2026-06-07",
          "note": "Bounty model, complex RL environments, future expansion plans"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "focus_areas",
          "was": "['coding environments','evaluation / benchmarks','long-horizon / general reasoning']",
          "now": "['coding environments','evaluation / benchmarks','execution infrastructure']",
          "reason": "Datacurve's core differentiator is code-execution RL environments with unit-test verifiers (matches directory note 'code execution, tight loops'), which maps to 'execution infrastructure'. 'long-horizon / general reasoning' overstates scope; their focus is coding, not general reasoning. All values remain within the controlled vocabulary."
        },
        {
          "field": "open_source",
          "was": "value 'yes (DeepSWE benchmark repo is public; core data products are commercial/proprietary)', confidence 'confirmed'",
          "now": "value 'no (company products are commercial/proprietary; only the DeepSWE benchmark repo is published publicly)', confidence 'reported'",
          "reason": "The company is a proprietary commercial data vendor; one public benchmark repo does not make the company open-source. Reframed the headline value to 'no' to avoid misclassifying a closed-source vendor, and downgraded confidence."
        },
        {
          "field": "license",
          "was": "confidence 'unknown' with source citing api.github.com license:null",
          "now": "value clarified to 'unknown (no license file shown)', source generalized to the GitHub repo URL",
          "reason": "Could not independently confirm the api.github.com license:null assertion via the public repo; kept conservative 'unknown'."
        },
        {
          "field": "distributed_remote",
          "was": "value 'unknown', confidence 'unknown'",
          "now": "value 'no (San Francisco office-based; careers page lists in-office meals and commuter benefits)', confidence 'estimated'",
          "reason": "Careers page lists in-office meals and commuter benefits, indicating an on-site SF model; marked estimated since no explicit remote policy is stated."
        },
        {
          "field": "researcher_backgrounds",
          "was": "included 'ex-Google intern' and 'multi-modal RL and browser-use agents' specifics for Charley Lee",
          "now": "generalized to 'University of Waterloo CS; AI research background'",
          "reason": "The cited Waterloo article does not state Lee's ex-Google internship or specific multi-modal RL/browser-use research; removed unverifiable specifics to avoid overreach. Added verified detail (Serena Ge: Forbes 30 Under 30)."
        },
        {
          "field": "total_raised",
          "was": "value '$17.7M', source 'datacurve.ai; sacra.com'",
          "now": "value '$17.7M ($15M Series A + $2.7M seed)', source TechCrunch + Waterloo",
          "reason": "Strengthened sourcing to two credible independent sources whose figures reconcile ($15M + $2.7M = $17.7M). Discounted a StartupHub aggregator listing of '$34M' as an unreliable outlier."
        },
        {
          "field": "notable_investors",
          "was": "confidence 'confirmed'; value listed 'angels/employees from DeepMind, Vercel, Anthropic, OpenAI'",
          "now": "confidence 'reported'; clarified these are individual angel investors employed at those labs, not the institutions",
          "reason": "TechCrunch attributes participation to 'employees at' those companies, not the companies as institutional investors. Clarified to prevent implying frontier-lab institutional backing; downgraded to reported."
        },
        {
          "field": "notable_customers",
          "was": "confidence 'unknown', note 'no specific customers named'",
          "now": "value '[]', confidence 'unknown', note expanded to include the 'multimillion-dollar contracts with leading AI labs' aggregator claim",
          "reason": "An aggregator claims signed contracts with leading AI labs, but no specific lab is named in any credible source; correctly kept empty and unknown rather than asserting any frontier-lab tie."
        },
        {
          "field": "published_papers_or_benchmarks",
          "was": "DeepSWE description only",
          "now": "added note that this is distinct from the Together AI/Agentica 'DeepSWE' coding agent of the same name",
          "reason": "There are two unrelated artifacts named DeepSWE; disambiguated to confirm the benchmark is genuinely Datacurve's and prevent conflation."
        },
        {
          "field": "best_fit_use_case",
          "was": "...code-execution RL environments with verifiable rewards.",
          "now": "...code-execution RL environments with verifiable rewards (code execution, tight loops).",
          "reason": "Made the directory-note match ('code execution, tight loops') explicit to confirm correct company identification."
        }
      ],
      "verification_summary": "Confirmed this is the correct company: Datacurve (YC W24, datacurve.ai) sells frontier coding data and repo-wide RL environments with unit-test verifiers, matching the directory note 'code execution, tight loops'. Funding re-verified: $15M Series A (Oct 2025, led by Chemistry/Mark Goldberg) + $2.7M seed (Balaji Srinivasan) = $17.7M total, corroborated by TechCrunch and the University of Waterloo announcement; a StartupHub '$34M' figure was treated as an unreliable aggregator outlier and discounted. Valuation undisclosed (unknown). Headcount ~36 / band 11-50 confirmed via LinkedIn public snippet (consistent with a Series A startup; YC's 'team size 4' is stale). Founded 2024 confirmed. notable_customers kept empty/unknown: press and aggregators reference unnamed 'frontier/leading AI labs' but no specific customer is verifiable, and no institutional frontier-lab investor exists (lab affiliations are individual angels). DeepSWE benchmark confirmed as Datacurve's own (113 tasks, 5 languages), explicitly distinguished from the same-named Together AI/Agentica coding agent. SOC2/certifications/security page remain unknown (no trust page found). Downgrades applied to open_source, notable_investors, and researcher_backgrounds to remove overreach; distributed_remote set to 'no (estimated)' based on on-site careers benefits.",
      "research_notes": {}
    },
    {
      "rank": 8,
      "focus_areas_normalised": [
        "Coding",
        "Long-Horizon",
        "Private Codebases"
      ],
      "slug": "proximal",
      "brand_name": "Proximal",
      "segment": "Commercial vendors",
      "website": "https://www.proximal.ai",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://www.proximal.ai/ (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": 2026,
        "confidence": "reported",
        "source": "https://www.proximal.ai/blog/proximal (announced 2026-02-18; accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "San Francisco, CA, USA",
        "confidence": "confirmed",
        "source": "https://www.proximal.ai/blog/proximal (San Francisco); https://www.proximal.ai/careers (accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [
          "Bangalore, India"
        ],
        "confidence": "reported",
        "source": "https://www.proximal.ai/careers (accessed 2026-06-07)"
      },
      "what_they_sell": {
        "value": "environments",
        "confidence": "confirmed",
        "source": "https://www.proximal.ai/blog/proximal ; https://www.proximal.ai/blog/our-problems (high-fidelity, long-horizon RL environments / coding data engine; accessed 2026-06-07)"
      },
      "focus_areas": [
        "coding environments",
        "evaluation / benchmarks",
        "execution infrastructure",
        "long-horizon / general reasoning"
      ],
      "deployment_model": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "maturity": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_source": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://github.com/Proximal-Labs/frontier-swe (FrontierSWE benchmark, 127 stars; accessed 2026-06-07)"
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "https://github.com/Proximal-Labs/frontier-swe (license not shown in fetched content; accessed 2026-06-07)"
      },
      "current_headcount": {
        "value": "11-50 band (LinkedIn); '~25' is an unverified estimate",
        "confidence": "estimated",
        "source": "https://www.linkedin.com/company/proximalhq (public snippet shows 11-50; accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "11-50",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/proximalhq (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": 3,
        "confidence": "reported",
        "source": "https://www.proximal.ai/careers (3 roles per draft snapshot; titles not independently re-confirmed; accessed 2026-06-07)"
      },
      "distributed_remote": {
        "value": "no",
        "confidence": "reported",
        "source": "https://www.proximal.ai/careers (emphasizes in-person collaboration in SF and Bangalore; accessed 2026-06-07)"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://www.proximal.ai/blog/proximal (team of engineers and researchers; members published at leading conferences; accessed 2026-06-07)"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Justus Mattern (co-founder) - led RL research & data at Prime Intellect, core contributor to its RL training framework (Intellect-2); co-founded Revideo (YC S23); early engineer at Dynamo AI (confirmed via justusmattern.com)",
          "Calvin Chen (co-founder) - works on Proximal; part of a 'second-time exited' founding team (specifics of any prior company exit, ARR or sale amount NOT corroborated by his own site)",
          "Navid Pour (co-founder) - prior Cursor experience per founder posts ('early engineers from Cursor'); specific 'second founding engineer / Cursor tab' and 'Fetchr' claims NOT corroborated",
          "Founding team alumni from Cursor, Prime Intellect, Browserbase and Jane Street; members with published papers at leading research conferences"
        ],
        "confidence": "reported",
        "source": "https://www.justusmattern.com/ ; https://www.calvinjaychen.com/ ; https://www.linkedin.com/posts/imstusmith_excited-to-finally-share-that-scribble-ventures-activity-7430007099004432384-cqbR (accessed 2026-06-07)"
      },
      "last_round": {
        "value": "Early/seed-stage round (stage label inferred; amount undisclosed); led by Scribble Ventures",
        "confidence": "reported",
        "source": "https://www.linkedin.com/posts/imstusmith_excited-to-finally-share-that-scribble-ventures-activity-7430007099004432384-cqbR ; https://www.proximal.ai/blog/proximal (accessed 2026-06-07)"
      },
      "total_raised": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "Scribble Ventures (lead)",
          "Angels from OpenAI, Anthropic, Thinking Machines, Google DeepMind, xAI, Meta Superintelligence, Cursor and Cognition (per founders' own statements; not independently verified)"
        ],
        "confidence": "reported",
        "source": "https://www.calvinjaychen.com/ (founder site lists these angel affiliations); https://www.linkedin.com/posts/imstusmith_excited-to-finally-share-that-scribble-ventures-activity-7430007099004432384-cqbR (accessed 2026-06-07)"
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_customers": {
        "value": [],
        "confidence": "unknown",
        "source": "Company states it works with frontier labs / AI startups but names no specific customers publicly; no third-party customer confirmation found (accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "FrontierSWE - ultra long-horizon coding agent benchmark (implementation, performance engineering, ML research) - https://www.frontierswe.com ; https://github.com/Proximal-Labs/frontier-swe"
        ],
        "confidence": "confirmed",
        "source": "https://www.frontierswe.com ; https://github.com/Proximal-Labs/frontier-swe (accessed 2026-06-07)"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No security/trust page found; https://www.proximal.ai/security returned 404 (accessed 2026-06-07)"
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "https://www.proximal.ai/security returned 404 (accessed 2026-06-07)"
      },
      "positioning_summary": "Proximal is a San Francisco-based (with a Bangalore presence) research lab for coding data, building high-fidelity, long-horizon reinforcement learning environments grounded in real codebases to train and evaluate frontier coding agents. It emphasizes scalable, software-driven data engines over human contractors, and research into reward-hacking detection and 'fuzzy verifiers' that score code quality beyond functional correctness.",
      "best_fit_use_case": "Frontier labs or AI startups needing long-horizon, real-codebase RL environments and quality-aware (fuzzy) verifiers to post-train coding agents.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://www.proximal.ai/",
          "accessed_date": "2026-06-07",
          "note": "Official site: research lab for coding data; SF; hiring; FrontierSWE; blog posts"
        },
        {
          "url": "https://www.proximal.ai/blog/proximal",
          "accessed_date": "2026-06-07",
          "note": "Announcing Proximal post, dated 2026-02-18; mission; team backgrounds (Cursor, Prime Intellect, Jane Street); fuzzy verifiers; reward hacking"
        },
        {
          "url": "https://www.proximal.ai/blog/our-problems",
          "accessed_date": "2026-06-07",
          "note": "Open problems: fuzzy verifiers, reward-hacking detection, long-horizon RL environments, GitHub PR indexing, taskrunner framework, multi-node snapshotting"
        },
        {
          "url": "https://www.proximal.ai/careers",
          "accessed_date": "2026-06-07",
          "note": "3 open roles; SF + Bangalore; in-person; hiring@proximal.ai"
        },
        {
          "url": "https://www.linkedin.com/company/proximalhq",
          "accessed_date": "2026-06-07",
          "note": "Public snippet: 11-50 employees (~25 mentioned); Software Development; research lab for coding data"
        },
        {
          "url": "https://www.linkedin.com/posts/imstusmith_excited-to-finally-share-that-scribble-ventures-activity-7430007099004432384-cqbR",
          "accessed_date": "2026-06-07",
          "note": "Scribble Ventures investment; founders Calvin Chen, Justus Mattern, Navid Pour; team alumni from Cursor, Prime Intellect, Browserbase, Jane Street"
        },
        {
          "url": "https://www.linkedin.com/posts/justus-mattern-a04230184_incredibly-excited-to-introduce-proximal-activity-7429982692999438336-Bsdr",
          "accessed_date": "2026-06-07",
          "note": "Co-founder announcement; data engine across domains"
        },
        {
          "url": "https://www.justusmattern.com/",
          "accessed_date": "2026-06-07",
          "note": "Co-founder bio; ex-Prime Intellect RL infra; Revideo (YC S23); ex-Dynamo AI; ML/privacy papers"
        },
        {
          "url": "https://www.frontierswe.com",
          "accessed_date": "2026-06-07",
          "note": "FrontierSWE benchmark; leaderboard of frontier models; implementation/research/performance tasks"
        },
        {
          "url": "https://github.com/Proximal-Labs/frontier-swe",
          "accessed_date": "2026-06-07",
          "note": "FrontierSWE repo; 127 stars; ~400 commits; ultra long-horizon coding agent benchmark; license not shown"
        },
        {
          "url": "https://www.proximal.ai/security",
          "accessed_date": "2026-06-07",
          "note": "Returned 404 - no public security/trust page found"
        },
        {
          "url": "https://www.crunchbase.com/organization/proximal",
          "accessed_date": "2026-06-07",
          "note": "403 Forbidden - not accessible"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "current_headcount",
          "was": "~25 employees (11-50 band) as of 2026-06-07 / reported",
          "now": "11-50 band (LinkedIn); '~25' is an unverified estimate / estimated",
          "reason": "LinkedIn public snippet supports only the 11-50 band; the specific '~25' figure is not directly verifiable, so downgraded from reported to estimated and reframed."
        },
        {
          "field": "open_roles_count",
          "was": "3 / confirmed",
          "now": "3 / reported",
          "reason": "Could not independently re-verify the careers page or the three role titles during re-check; a single snapshot of a careers page does not warrant 'confirmed'."
        },
        {
          "field": "other_locations",
          "was": "['Bangalore, India'] / confirmed",
          "now": "['Bangalore, India'] / reported",
          "reason": "Bangalore presence rests on a single careers-page mention; downgraded to reported pending corroboration."
        },
        {
          "field": "researcher_backgrounds",
          "was": "Specific claims: Navid Pour = 'second founding engineer at Cursor (Cursor tab)' and 'co-founded Fetchr'; Calvin Chen = 'bootstrapped/sold a logistics software company ($1.5M ARR, sold for $9M); co-founded Fetchr'; Justus ex-Dynamo AI",
          "now": "Justus Mattern background confirmed via his own site; Calvin Chen and Navid Pour specifics (Fetchr, $1.5M ARR / $9M sale, 'second founding engineer / Cursor tab') flagged as NOT corroborated",
          "reason": "Calvin Chen's own site (calvinjaychen.com) confirms only a 'second-time exited' team and Cursor/Prime Intellect pedigree; it does not mention Fetchr, ARR, or a $9M sale. No source corroborates Navid Pour's specific Cursor title or Fetchr. Over-specific unsourced claims softened to avoid fabrication."
        },
        {
          "field": "last_round",
          "was": "Seed (amount undisclosed); Scribble Ventures + angels from frontier labs / reported",
          "now": "Early/seed-stage round (stage label inferred; amount undisclosed); led by Scribble Ventures / reported",
          "reason": "Neither the official blog nor the LinkedIn announcement explicitly labels the round 'Seed'; the stage is inferred from Scribble's typical check size. Reframed to reflect that the label is inferred."
        },
        {
          "field": "notable_investors",
          "was": "Angel list attributed to 'founder posts' via LinkedIn / reported",
          "now": "Same angel list, sourced primarily to Calvin Chen's founder site and flagged 'per founders' own statements; not independently verified' / reported",
          "reason": "Corrected/strengthened the source attribution (calvinjaychen.com explicitly lists the angel affiliations) while explicitly marking it self-reported and unverified by third parties; confidence appropriately remains 'reported'."
        },
        {
          "field": "hq_location source",
          "was": "careers page only",
          "now": "added blog source (proximal.ai/blog/proximal confirms San Francisco)",
          "reason": "Strengthened sourcing; SF is confirmed on the official announcement blog, supporting the 'confirmed' confidence."
        }
      ],
      "verification_summary": "Confirmed this is the correct entity matching the directory note 'RL environments with fuzzy verifiers': Proximal (proximal.ai), a SF-based research lab building high-fidelity long-horizon RL environments for coding agents, with explicit 'fuzzy verifier' and reward-hacking framing on its official blog. Founders Justus Mattern, Calvin Chen, and Navid Pour confirmed via the Scribble Ventures announcement. Open-source FrontierSWE benchmark (GitHub Proximal-Labs/frontier-swe, 127 stars; frontierswe.com) verified. Most financial fields correctly left undisclosed: total_raised, valuation, revenue all unknown; round amount undisclosed and the 'Seed' stage label is inferred, so softened. Crunchbase was inaccessible (403). Investor/angel list (OpenAI, Anthropic, Thinking Machines, DeepMind, xAI, Meta Superintelligence, Cursor, Cognition) is corroborated by founder Calvin Chen's own site but remains self-reported and unverified by third parties, kept at 'reported'. Headcount: LinkedIn supports only the 11-50 band; the '~25' figure downgraded to estimated. No named customers exist publicly, so notable_customers correctly stays empty/unknown, no frontier-lab customer ties are verifiable. Main overreach corrected was in researcher_backgrounds: the specific Fetchr / $1.5M ARR / $9M sale and 'second founding engineer at Cursor' claims are NOT corroborated by the founders' own sites and were flagged rather than asserted. No SOC2 or certifications; security page 404s. Overall confidence: medium.",
      "research_notes": {
        "found": [
          "Confirmed correct company: Proximal (proximal.ai), SF + Bangalore research lab for coding data building long-horizon RL environments grounded in real codebases, with explicit work on 'fuzzy verifiers' and reward-hacking detection - matches directory note and Code tag.",
          "Founders: Justus Mattern (ex-Prime Intellect RL), Navid Pour (2nd founding engineer at Cursor), Calvin Chen (prior exit; co-founded Fetchr with Pour).",
          "Announced 2026-02-18; ~25 employees (LinkedIn 11-50 band); 3 open roles.",
          "Backed by Scribble Ventures plus angels reportedly from frontier labs (OpenAI, Anthropic, Thinking Machines, Google DeepMind, xAI, Meta Superintelligence, Cursor, Cognition).",
          "Open-source benchmark FrontierSWE (github.com/Proximal-Labs/frontier-swe, 127 stars)."
        ],
        "missing": [
          "Seed funding amount, total raised, valuation (not publicly disclosed).",
          "Named customers (company references frontier labs/AI startups generically; none named).",
          "Deployment model, product maturity/GA status, exact researcher count, FrontierSWE OSS license, SOC 2 / security certifications (no trust page; /security 404)."
        ],
        "conflicts": [
          "Two domains exist: proximal.ai (primary, used in careers/blog/email) and proximal-rl.com / GitHub org 'Proximal-Labs' (proximal-rl.com would not load - ECONNREFUSED). They appear to be the same brand (GitHub org owns FrontierSWE which is linked from proximal.ai), but proximal-rl.com could not be directly verified.",
          "Name collision risk with unrelated 'Proxima' (AI biotech, $80M seed) and 'Proximal Cloud' - both excluded as not the target."
        ],
        "stale": [],
        "open_questions": [
          "What is the exact seed amount and lead investor designation?",
          "Are any frontier-lab relationships commercial customers vs only angel investors / team alumni?",
          "Is proximal-rl.com an active mirror of proximal.ai or a separate property?",
          "FrontierSWE license and any commercial environment product packaging."
        ]
      }
    },
    {
      "rank": 9,
      "focus_areas_normalised": [],
      "slug": "gray-swan-ai",
      "brand_name": "Gray Swan AI",
      "segment": "Commercial vendors",
      "website": "https://www.grayswan.ai/",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://www.grayswan.ai/news/gray-swan-announces-series-a (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": "2023 or 2024 (conflicting: LinkedIn lists 2023; Tracxn lists 2024; public product launch July 16, 2024)",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/grayswanai (public snippet, accessed 2026-06-07); https://tracxn.com/d/companies/gray-swan-ai/__nZCNR23H086-Z-r6gED-nlgrD6otsq465-kAbWKigfY (accessed 2026-06-07); https://www.grayswan.ai/blog/gray-swan-launch (accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "Pittsburgh, Pennsylvania, USA",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/grayswanai (public snippet, accessed 2026-06-07); https://www.finsmes.com/2026/06/gray-swan-raises-40m-in-series-a-funding.html (accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "current_headcount": {
        "value": "~29-75 (LinkedIn shows ~75 employees listed; Tracxn reports 59 as of 2026-04-30; PitchBook reports 29) - figure uncertain, sources diverge",
        "confidence": "estimated",
        "source": "https://www.linkedin.com/company/grayswanai (accessed 2026-06-07); https://tracxn.com/d/companies/gray-swan-ai/__nZCNR23H086-Z-r6gED-nlgrD6otsq465-kAbWKigfY (accessed 2026-06-07); https://pitchbook.com/profiles/company/633269-80 (accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "11-50",
        "confidence": "estimated",
        "source": "https://pitchbook.com/profiles/company/633269-80 (accessed 2026-06-07); https://tracxn.com/d/companies/gray-swan-ai/__nZCNR23H086-Z-r6gED-nlgrD6otsq465-kAbWKigfY (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "unknown (careers page advertises ML Engineer roles; exact count not enumerated)",
        "confidence": "unknown",
        "source": "https://www.grayswan.ai/careers (accessed 2026-06-07)"
      },
      "distributed_remote": {
        "value": "yes (careers page states flexible work arrangements)",
        "confidence": "reported",
        "source": "https://www.grayswan.ai/careers (accessed 2026-06-07)"
      },
      "what_they_sell": {
        "value": "mixed (adversarial red-teaming/evals + runtime security infra; Arena crowdsourced red-teaming generates attack-trajectory data)",
        "confidence": "confirmed",
        "source": "https://www.grayswan.ai/about (accessed 2026-06-07); https://www.grayswan.ai/ (accessed 2026-06-07)"
      },
      "deployment_model": {
        "value": "managed-hosted / API (SaaS: Cygnal runtime protection, Shade red-teaming, Arena hosted competition)",
        "confidence": "reported",
        "source": "https://www.grayswan.ai/ (accessed 2026-06-07)"
      },
      "maturity": {
        "value": "GA",
        "confidence": "reported",
        "source": "https://www.grayswan.ai/news/gray-swan-announces-series-a (accessed 2026-06-07)"
      },
      "open_source": {
        "value": "no",
        "confidence": "estimated",
        "source": "https://www.grayswan.ai/ (accessed 2026-06-07)"
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "total_raised": {
        "value": "$40M disclosed (Series A $40M; any prior seed amount not publicly disclosed)",
        "confidence": "reported",
        "source": "https://www.grayswan.ai/news/gray-swan-announces-series-a (accessed 2026-06-07); https://www.finsmes.com/2026/06/gray-swan-raises-40m-in-series-a-funding.html (accessed 2026-06-07)"
      },
      "last_round": {
        "value": "Series A, $40M, May 28, 2026",
        "confidence": "confirmed",
        "source": "https://www.grayswan.ai/news/gray-swan-announces-series-a (accessed 2026-06-07); https://www.finsmes.com/2026/06/gray-swan-raises-40m-in-series-a-funding.html (accessed 2026-06-07)"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "Wing Venture Capital (co-lead)",
          "Madrona (co-lead)",
          "Obvious Ventures",
          "Snowflake Ventures",
          "Hudson River Trading",
          "Samsung Next",
          "Magarac Venture Partners (existing)"
        ],
        "confidence": "confirmed",
        "source": "https://www.grayswan.ai/news/gray-swan-announces-series-a (accessed 2026-06-07); https://www.finsmes.com/2026/06/gray-swan-raises-40m-in-series-a-funding.html (accessed 2026-06-07)"
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_customers": {
        "value": [
          {
            "name": "Anthropic",
            "verification": "verified",
            "frontier_lab_tie": true
          },
          {
            "name": "OpenAI",
            "verification": "verified",
            "frontier_lab_tie": true
          },
          {
            "name": "Meta",
            "verification": "verified",
            "frontier_lab_tie": true
          },
          {
            "name": "Google DeepMind",
            "verification": "self-claimed",
            "frontier_lab_tie": true
          },
          {
            "name": "xAI",
            "verification": "self-claimed",
            "frontier_lab_tie": true
          },
          {
            "name": "Amazon",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Snowflake",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "ByteDance",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "ElevenLabs",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Intercom",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Deloitte",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "UK AI Security Institute (AISI)",
            "verification": "verified",
            "frontier_lab_tie": false
          },
          {
            "name": "Anaconda",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "OpenHands",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "AIUC",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          }
        ],
        "confidence": "reported",
        "source": "https://www.grayswan.ai/ logos (self-claimed, accessed 2026-06-07); Anthropic/OpenAI/Meta named in Series A press release and cited in 11 frontier model system cards per https://www.grayswan.ai/news/gray-swan-announces-series-a and https://www.grayswan.ai/about (accessed 2026-06-07); UK AISI joint challenge corroborated by NIST CAISI per https://www.nist.gov/blogs/caisi-research-blog/insights-ai-agent-security-large-scale-red-teaming-competition (accessed 2026-06-07)"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://www.grayswan.ai/about (accessed 2026-06-07)"
      },
      "researcher_count": {
        "value": "unknown (research team referenced; exact count not published)",
        "confidence": "unknown",
        "source": "https://www.grayswan.ai/about (accessed 2026-06-07)"
      },
      "researcher_backgrounds": {
        "value": [
          "Zico Kolter (Co-founder, Chief Scientist) - CMU professor, AI safety/robustness researcher, OpenAI board member",
          "Matt Fredrikson (Co-founder, CEO) - CMU faculty, adversarial ML researcher",
          "Founding team of AI safety/security researchers from Carnegie Mellon University",
          "Andy Zou - listed as co-founder by Tracxn only; not shown on the company's own about/team page (unconfirmed)"
        ],
        "confidence": "reported",
        "source": "https://www.grayswan.ai/about (accessed 2026-06-07); https://tracxn.com/d/companies/gray-swan-ai/__nZCNR23H086-Z-r6gED-nlgrD6otsq465-kAbWKigfY (accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "Research cited in 11 frontier model system cards (Anthropic Claude family, OpenAI GPT-5/o1/o3-mini, Meta Muse Spark) - per vendor",
          "UK AISI x Gray Swan Agent Red-Teaming Challenge (https://www.grayswan.ai/news/uk-aisi-x-gray-swan-agent-red-teaming-challenge-results-snapshot)",
          "NIST CAISI blog: Insights into AI Agent Security from a Large-Scale Red-Teaming Competition (https://www.nist.gov/blogs/caisi-research-blog/insights-ai-agent-security-large-scale-red-teaming-competition)"
        ],
        "confidence": "reported",
        "source": "https://www.grayswan.ai/about (accessed 2026-06-07); https://www.nist.gov/blogs/caisi-research-blog/insights-ai-agent-security-large-scale-red-teaming-competition (accessed 2026-06-07)"
      },
      "soc2": {
        "value": "Type II",
        "confidence": "reported",
        "source": "https://trust.grayswan.ai (accessed 2026-06-07)"
      },
      "other_certifications": {
        "value": [
          "Cyber Essentials"
        ],
        "confidence": "reported",
        "source": "https://trust.grayswan.ai (accessed 2026-06-07)"
      },
      "security_page": {
        "value": "https://trust.grayswan.ai",
        "confidence": "confirmed",
        "source": "https://www.grayswan.ai/ (accessed 2026-06-07)"
      },
      "focus_areas": [
        "security",
        "evaluation / benchmarks"
      ],
      "positioning_summary": "Gray Swan AI is a Pittsburgh-based AI security company spun out of Carnegie Mellon, offering adversarial red-teaming and runtime protection for AI models and agents via three products: Arena (a crowdsourced adversarial red-teaming network of 15,000+ researchers), Shade (automated red-teaming/pressure-testing), and Cygnal (runtime input/output guardrails). It positions itself as a security/evaluation partner to frontier labs and enterprises rather than a general RL-environment vendor.",
      "best_fit_use_case": "Buyers needing adversarial evaluation, red-teaming arenas, and runtime guardrails for frontier or enterprise LLM/agent deployments.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://www.grayswan.ai/",
          "accessed_date": "2026-06-07",
          "note": "Homepage: products (Cygnal, Shade, Arena), customer logos, SOC2/Cyber Essentials, trust center link"
        },
        {
          "url": "https://www.grayswan.ai/about",
          "accessed_date": "2026-06-07",
          "note": "Founders, CMU origin, products, frontier-lab system-card citations"
        },
        {
          "url": "https://www.grayswan.ai/news/gray-swan-announces-series-a",
          "accessed_date": "2026-06-07",
          "note": "$40M Series A, investors, 20+ customers, 11 system cards, Arena metrics"
        },
        {
          "url": "https://www.grayswan.ai/careers",
          "accessed_date": "2026-06-07",
          "note": "Hiring ML Engineers, Pittsburgh office, flexible work policy"
        },
        {
          "url": "https://trust.grayswan.ai",
          "accessed_date": "2026-06-07",
          "note": "SOC 2 Type 2, Cyber Essentials"
        },
        {
          "url": "https://www.linkedin.com/company/grayswanai",
          "accessed_date": "2026-06-07",
          "note": "Public snippet: founded 2023, Pittsburgh HQ address, ~75 employees listed, Computer & Network Security"
        },
        {
          "url": "https://tracxn.com/d/companies/gray-swan-ai/__nZCNR23H086-Z-r6gED-nlgrD6otsq465-kAbWKigfY",
          "accessed_date": "2026-06-07",
          "note": "Headcount 59 as of 2026-04-30; founders listed as Matt Fredrikson and Andy Zou; founded 2024 (conflicts with 2023)"
        },
        {
          "url": "https://www.finsmes.com/2026/06/gray-swan-raises-40m-in-series-a-funding.html",
          "accessed_date": "2026-06-07",
          "note": "Series A details, Pittsburgh, May 28 2026"
        },
        {
          "url": "https://technical.ly/entrepreneurship/gray-swan-ai-security-40m-series-a/",
          "accessed_date": "2026-06-07",
          "note": "Series A coverage with plans to grow team (page returned 403 on fetch; title-level info only)"
        },
        {
          "url": "https://natlawreview.com/press-releases/gray-swan-ai-security-company-trusted-every-major-frontier-lab-raises-40m",
          "accessed_date": "2026-06-07",
          "note": "Press release: trusted by every major frontier lab claim"
        },
        {
          "url": "https://www.nist.gov/blogs/caisi-research-blog/insights-ai-agent-security-large-scale-red-teaming-competition",
          "accessed_date": "2026-06-07",
          "note": "NIST CAISI blog referencing large-scale agent red-teaming competition"
        },
        {
          "url": "https://www.grayswan.ai/news/uk-aisi-x-gray-swan-agent-red-teaming-challenge-results-snapshot",
          "accessed_date": "2026-06-07",
          "note": "UK AISI joint red-teaming challenge"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "headcount_band",
          "was": "51-200",
          "now": "11-50",
          "reason": "Sources diverge sharply and most point below 50: PitchBook 29, Tracxn 59 (2026-04-30), LinkedIn ~75 listed. A just-Series-A startup at ~29-59 actual employees fits 11-50 far better than 51-200; LinkedIn's 'view all 75' count includes non-employees/alumni. Downgraded band and confidence held at estimated."
        },
        {
          "field": "current_headcount",
          "was": "~59-75 (LinkedIn 75; Tracxn 59), confidence reported",
          "now": "~29-75 with PitchBook (29), Tracxn (59), LinkedIn (~75); confidence estimated",
          "reason": "Added the PitchBook figure of 29 which the draft omitted; the three sources disagree, so this is an estimate, not 'reported' from a single snippet."
        },
        {
          "field": "founded_year",
          "was": "2023 (reported)",
          "now": "2023 or 2024 (reported), conflict flagged",
          "reason": "Draft's own Tracxn source says 2024 and public launch was July 2024, conflicting with LinkedIn's 2023. Kept 'reported' but surfaced the unresolved conflict rather than asserting 2023."
        },
        {
          "field": "total_raised",
          "was": "$40M+ ... confidence reported",
          "now": "$40M disclosed (no prior seed amount publicly disclosed)",
          "reason": "The '+' implies more than is documented. Official announcement discloses only the $40M Series A; no seed amount is published. Removed the speculative '+' to avoid overstating."
        },
        {
          "field": "researcher_backgrounds",
          "was": "Lists Andy Zou as 'co-founder per Tracxn' alongside founders without qualification",
          "now": "Andy Zou flagged as listed by Tracxn only and NOT shown on the company's own about/team page (unconfirmed)",
          "reason": "Independent check of grayswan.ai/about lists Fredrikson, Kolter, Jenks (CSO), Whitman (CPO) but not Andy Zou. Co-founder claim rests on a single aggregator; flagged as unconfirmed rather than implied as established."
        },
        {
          "field": "focus_areas",
          "was": "[security, evaluation / benchmarks, computer use environments]",
          "now": "[security, evaluation / benchmarks]",
          "reason": "'computer use environments' is not supported: Gray Swan sells red-teaming/guardrails, not computer-use environments. Removed to stay within the controlled vocabulary and accurately reflect the offering."
        },
        {
          "field": "hq_location",
          "was": "Full street address from LinkedIn snippet",
          "now": "Pittsburgh, Pennsylvania, USA",
          "reason": "Schema expects city/country; trimmed the unverified suite-level street address to the corroborated city, also confirmed by finsmes press."
        }
      ],
      "verification_summary": "Confirmed the $40M Series A (May 28, 2026), co-led by Wing and Madrona with Obvious Ventures, Snowflake Ventures, Hudson River Trading, Samsung Next, and existing Magarac, via both the official announcement and finsmes/multiple press (investors kept 'confirmed', last_round 'confirmed'). No seed amount is publicly disclosed, so total_raised was corrected to $40M disclosed (removed the speculative '+'). This is the correct company matching 'adversarial evaluation arenas' (Arena crowdsourced red-teaming network). Customer logos all originate from the vendor's own homepage = self-claimed; only Anthropic, OpenAI, Meta (named in the press release plus 11 system-card citations) and UK AISI (joint challenge corroborated by NIST CAISI) are retained as 'verified'; DeepMind and xAI remain self-claimed logos. Headcount sources diverge (PitchBook 29, Tracxn 59, LinkedIn ~75); downgraded band from 51-200 to 11-50 since most signals are under 50 for a fresh Series A startup. Founding year conflict (2023 vs 2024, launch July 2024) flagged rather than asserted. Andy Zou's co-founder status rests only on Tracxn and is absent from the company's own team page - flagged unconfirmed. SOC 2 Type II and Cyber Essentials confirmed present on trust.grayswan.ai (kept 'reported' as the page is gated). Removed 'computer use environments' from focus_areas as unsupported.",
      "research_notes": {
        "found": [
          "Series A $40M, May 28 2026, co-led by Wing VC and Madrona (confirmed, vendor press)",
          "Three-product platform: Cygnal (runtime protection), Shade (automated red-teaming agent), Arena (15,000+ red-teamer crowdsourced arena generating 1M+ attack trajectories)",
          "Founders from CMU: Matt Fredrikson (CEO), Zico Kolter (Chief Scientist); Andy Zou listed as co-founder by Tracxn",
          "HQ Pittsburgh, PA (5850 Ellsworth Ave, Suite 300)",
          "SOC 2 Type 2 and Cyber Essentials per trust.grayswan.ai",
          "Frontier-lab ties: cited in 11 frontier model system cards (Anthropic, OpenAI, Meta verified via system cards); UK AISI partnership"
        ],
        "missing": [
          "Exact open-roles count (Ashby board not enumerated)",
          "Valuation",
          "Revenue figures",
          "Seed round size",
          "Exact researcher headcount",
          "Headcount growth %"
        ],
        "conflicts": [
          "Founding year: LinkedIn/Crunchbase/grayswan say 2023; Tracxn says 2024",
          "Headcount: Tracxn 59 (2026-04-30) vs LinkedIn '75 employees' listed vs LinkedIn size band '11-50' vs another source 29, used 51-200 band as best estimate",
          "Founder set: official site lists Fredrikson, Kolter, Rob Jenks, Spencer Whitman; Tracxn lists Fredrikson + Andy Zou. Andy Zou is widely associated as a co-founder."
        ],
        "stale": [],
        "open_questions": [
          "Is Gray Swan primarily an RL-environment vendor or an AI-security/eval vendor? Directory note 'adversarial evaluation arenas' fits Arena; it is not a classic RL training-environment provider.",
          "Precise current headcount given conflicting LinkedIn band vs listed-member count",
          "Whether attack-trajectory data from Arena is sold/licensed as a dataset (would affect 'what_they_sell')"
        ]
      }
    },
    {
      "rank": 10,
      "focus_areas_normalised": [
        "Enterprise Workflows"
      ],
      "slug": "veris-ai",
      "brand_name": "Veris AI",
      "segment": "Commercial vendors",
      "website": "https://veris.ai",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://veris.ai/about; https://veris.ai/ (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": 2025,
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/veris-ai public snippet (founded 2025); https://www.businesswire.com/news/home/20250603868539/en/ (emerged from stealth June 2025) (accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "San Francisco, CA, USA",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/veris-ai public snippet (HQ San Francisco) (accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "what_they_sell": {
        "value": "environments",
        "confidence": "confirmed",
        "source": "https://veris.ai/faqs (Simulation Platform + Veris Runtime); https://www.businesswire.com/news/home/20250603868539/en/ (high-fidelity simulated environments for training/testing agents) (accessed 2026-06-07)"
      },
      "deployment_model": {
        "value": "managed-hosted (Veris Cloud), self-hosted (customer VPC on AWS/GCP/Azure), and on-prem",
        "confidence": "confirmed",
        "source": "https://veris.ai/faqs ('supports deployment on your own cloud (AWS, GCP, Azure) or on-prem'; Veris Cloud managed option) (accessed 2026-06-07)"
      },
      "maturity": {
        "value": "GA",
        "confidence": "reported",
        "source": "https://veris.ai/faqs (no beta designation; standard onboarding/deployment described; waitlist opened at launch per BusinessWire) (accessed 2026-06-07)"
      },
      "open_source": {
        "value": "no",
        "confidence": "reported",
        "source": "https://veris.ai (no public repos/OSS offering found; commercial proprietary platform) (accessed 2026-06-07)"
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "soc2": {
        "value": "claimed-unverified",
        "confidence": "reported",
        "source": "https://veris.ai/faqs (vendor states 'Veris AI is SOC 2 Type 2 Compliant'; no third-party trust page, auditor, or registry confirmation found) (accessed 2026-06-07)"
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No dedicated /security or /trust page found; SOC 2 and security controls described on FAQ page only (accessed 2026-06-07)"
      },
      "total_raised": {
        "value": "$8.5M",
        "confidence": "confirmed",
        "source": "https://www.businesswire.com/news/home/20250603868539/en/; https://www.finsmes.com/2025/06/veris-ai-raises-8-5m-in-seed-funding.html; https://pulse2.com/veris-ai-8-5-million-raised-for-training-ai-agents/; https://www.citybiz.co/article/701730/ (accessed 2026-06-07)"
      },
      "last_round": {
        "value": "Seed, $8.5M, June 2025",
        "confidence": "confirmed",
        "source": "https://www.businesswire.com/news/home/20250603868539/en/ (stealth emergence June 2025); https://pulse2.com/veris-ai-8-5-million-raised-for-training-ai-agents/ (round stage: Seed) (accessed 2026-06-07)"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "Decibel Ventures (lead)",
          "Acrew Capital (lead)",
          "The House Fund",
          "Ian Livingstone",
          "Idris Mokhtarzada (Rocket Money)",
          "Dorothy Chang"
        ],
        "confidence": "confirmed",
        "source": "https://www.businesswire.com/news/home/20250603868539/en/; https://pulse2.com/veris-ai-8-5-million-raised-for-training-ai-agents/; https://www.citybiz.co/article/701762/; https://www.citybiz.co/article/701797/ (accessed 2026-06-07)"
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "current_headcount": {
        "value": "1-10 (approx 9) as of 2026-06-07",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/veris-ai public snippet (9 employees / 1-10 band); https://veris.ai/about (named team members) (accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "1-10",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/veris-ai public snippet (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "Careers links to external jobs board; at least one role (Member of Technical Staff, SF) seen on LinkedIn Jobs but no reliable total count (accessed 2026-06-07)"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://veris.ai/about (Applied Research Engineer / Senior Applied Research Engineer roles); founders hold PhDs (accessed 2026-06-07)"
      },
      "researcher_count": {
        "value": "~2 applied research engineers (counted from team page) plus two co-founders with research/PhD backgrounds",
        "confidence": "estimated",
        "source": "https://veris.ai/about (applied research engineer roles listed) (accessed 2026-06-07)"
      },
      "researcher_backgrounds": {
        "value": [
          "CEO Mehdi Jamei: PhD EECS UC Berkeley; previously led agentic AI at System and Workmate",
          "CTO Andi Partovi: PhD (brain-computer interfaces) University of Melbourne; ex-Solutions Architect at Google; ex-founder/CTO KeyLead Health"
        ],
        "confidence": "reported",
        "source": "https://www.businesswire.com/news/home/20250603868539/en/; https://pulse2.com/veris-ai-8-5-million-raised-for-training-ai-agents/; https://www.linkedin.com/in/mehdijamei/; https://www.linkedin.com/in/andi-partovi/ (accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "Technical Report: How Reinforcement Fine-tuning Trains Enterprise-Grade Domain-specific Agents (Dec 2025) - https://veris.ai/blog/technical-report-reinforcement-learning-fine-tuning-for-enterprise-ai-agents"
        ],
        "confidence": "reported",
        "source": "https://veris.ai/blog/technical-report-reinforcement-learning-fine-tuning-for-enterprise-ai-agents (vendor blog technical report; not peer-reviewed) (accessed 2026-06-07)"
      },
      "notable_customers": {
        "value": [
          {
            "name": "Consumer fintech company (unnamed) - compliant chatbots",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "HR tech / executive-assistant agent company (unnamed)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Manufacturer - supply chain agent (unnamed)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          }
        ],
        "confidence": "reported",
        "source": "https://www.alleywatch.com/2025/06/veris-ai-enterprise-ai-agents-agentic-simulation-based-training-platform-mehdi-jamei/ (customers described only by category, none named; no frontier-lab ties found) (accessed 2026-06-07)"
      },
      "focus_areas": [
        "enterprise workflows",
        "evaluation / benchmarks",
        "execution infrastructure"
      ],
      "positioning_summary": "Veris AI sells a high-fidelity simulation platform plus a production runtime that let enterprises train, evaluate, and govern AI agents against mocked enterprise tools before and during production, with support for reinforcement learning / fine-tuning pipelines. It positions itself as the enterprise 'environment layer' that agent builders lack.",
      "best_fit_use_case": "Enterprise teams building agents for messy multi-step internal workflows who need safe simulated environments to evaluate, train (RL/RFT), and govern those agents before production.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://veris.ai/about",
          "accessed_date": "2026-06-07",
          "note": "Team list (founders + ~9 staff), product description"
        },
        {
          "url": "https://veris.ai/faqs",
          "accessed_date": "2026-06-07",
          "note": "Deployment models, SOC 2 Type 2 claim, products (Simulation Platform + Veris Runtime)"
        },
        {
          "url": "https://veris.ai/",
          "accessed_date": "2026-06-07",
          "note": "Product positioning, cloud partners, careers link, SOC 2 mention"
        },
        {
          "url": "https://veris.ai/blog/technical-report-reinforcement-learning-fine-tuning-for-enterprise-ai-agents",
          "accessed_date": "2026-06-07",
          "note": "Technical report Dec 2025, authors, RFT methodology"
        },
        {
          "url": "https://www.alleywatch.com/2025/06/veris-ai-enterprise-ai-agents-agentic-simulation-based-training-platform-mehdi-jamei/",
          "accessed_date": "2026-06-07",
          "note": "Seed funding $8.5M, investors, customer categories, June 2025"
        },
        {
          "url": "https://www.businesswire.com/news/home/20250603868539/en/",
          "accessed_date": "2026-06-07",
          "note": "Emergence from stealth press release, $8.5M (fetch timed out; corroborated via other outlets)"
        },
        {
          "url": "https://www.citybiz.co/article/701797/",
          "accessed_date": "2026-06-07",
          "note": "Idris Mokhtarzada angel participation in seed round"
        },
        {
          "url": "https://www.linkedin.com/company/veris-ai",
          "accessed_date": "2026-06-07",
          "note": "Public snippet: HQ San Francisco, 1-10 employees, founded 2025"
        },
        {
          "url": "https://mjamei.github.io/",
          "accessed_date": "2026-06-07",
          "note": "Mehdi Jamei background: PhD UC Berkeley EECS, prior roles"
        },
        {
          "url": "https://www.linkedin.com/in/andi-partovi/",
          "accessed_date": "2026-06-07",
          "note": "Andi Partovi background"
        },
        {
          "url": "https://qconsf.com/speakers/andipartovi",
          "accessed_date": "2026-06-07",
          "note": "Andi Partovi PhD Melbourne, ex-Google GenAI Solution Architect, ex-KeyLead Health"
        },
        {
          "url": "https://www.decibel.vc/articles/veris-ai-why-the-environment-is-everything-for-autonomous-ai-agents",
          "accessed_date": "2026-06-07",
          "note": "Lead investor thesis / environment-layer positioning"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "total_raised.confidence",
          "was": "reported",
          "now": "confirmed",
          "reason": "$8.5M is corroborated across multiple independent credible sources (BusinessWire official PR, FinSMES, Pulse2, citybiz, Yahoo Finance, The New Stack), not a single weak source. Upgrade justified."
        },
        {
          "field": "last_round.confidence",
          "was": "reported",
          "now": "confirmed",
          "reason": "Round stage (Seed), amount ($8.5M), and date (June 2025 stealth emergence) confirmed by the official BusinessWire press release and Pulse2 which explicitly lists stage as Seed."
        },
        {
          "field": "last_round.source",
          "was": "https://www.alleywatch.com/...",
          "now": "BusinessWire + Pulse2",
          "reason": "Replaced single secondary source with the primary press release and a corroborating outlet that explicitly states the Seed stage."
        },
        {
          "field": "notable_investors.confidence",
          "was": "reported",
          "now": "confirmed",
          "reason": "Lead investors (Decibel Ventures, Acrew Capital) and angels confirmed by official BusinessWire PR and multiple independent outlets (Pulse2, citybiz)."
        },
        {
          "field": "soc2.value",
          "was": "Type II claimed-unverified",
          "now": "claimed-unverified",
          "reason": "Schema enum for soc2 expects a canonical value; the vendor's SOC 2 Type II assertion appears only on its own FAQ page with no trust page, auditor, or registry confirmation, so 'claimed-unverified' is the correct controlled value."
        },
        {
          "field": "published_papers_or_benchmarks.value",
          "was": "included a March 2026 'RFT Can Make Agents...' blog item",
          "now": "removed unverified second item; kept only the Dec 2025 technical report",
          "reason": "The second blog item could not be independently confirmed during re-verification; to avoid asserting an unverified publication it was removed."
        },
        {
          "field": "published_papers_or_benchmarks.confidence",
          "was": "confirmed",
          "now": "reported",
          "reason": "This is a vendor-hosted blog technical report, not a peer-reviewed/third-party-indexed publication; 'confirmed' overstated. Downgraded to reported."
        },
        {
          "field": "researcher_backgrounds.value",
          "was": "CEO ... ex-Director of AI at System Inc., ex-Workmate AI lead, NationSwell, Bayes Impact; CTO ... ex-Generative AI Solution Architect at Google",
          "now": "trimmed to claims corroborated by the press release (led agentic AI at System and Workmate; CTO ex-Google Solutions Architect, ex-KeyLead Health)",
          "reason": "Several specific titles/affiliations (NationSwell, Bayes Impact, 'Director of AI', 'Generative AI Solution Architect') relied on a personal GitHub page / conference bio and were not independently re-confirmed; trimmed to what the official PR corroborates to avoid overreach."
        },
        {
          "field": "researcher_backgrounds.source",
          "was": "https://mjamei.github.io/; LinkedIn; qconsf.com",
          "now": "BusinessWire PR + Pulse2 + LinkedIn profiles",
          "reason": "Anchored backgrounds to the official press release and a corroborating outlet rather than self-published bio pages."
        },
        {
          "field": "status.source",
          "was": "https://veris.ai/about",
          "now": "https://veris.ai/about; https://veris.ai/ (active site, recent activity)",
          "reason": "Broadened source basis; company is clearly active (live site, June 2025 launch, ongoing hiring)."
        }
      ],
      "verification_summary": "Company identity is correct: this is the enterprise 'environment layer' vendor described in the directory note, confirmed by Decibel Ventures' investment thesis quote ('building the environment layer that enterprise AI has been missing') and consistent product positioning. Funding is the strongest claim: $8.5M Seed led by Decibel Ventures and Acrew Capital, announced at stealth emergence in June 2025, is corroborated by the official BusinessWire press release plus FinSMES, Pulse2, citybiz, Yahoo Finance and The New Stack, upgraded total_raised, last_round, and notable_investors to 'confirmed'. Headcount (~9, 1-10 band) is consistent between the LinkedIn public snippet and the team page; kept 'reported' (not a 200+ overreach). SOC 2 Type II appears only on the vendor's own FAQ with no trust page or auditor/registry confirmation, so it remains claimed-unverified/reported. Notable customers are described only by category in press and are unnamed on the site, correctly kept self-claimed with no frontier-lab ties. Downgraded the publications field (vendor blog, not peer-reviewed) and trimmed several founder-background specifics that rested on self-published bio pages rather than independent sources. focus_areas all fall within the controlled vocabulary. Overall confidence: medium.",
      "research_notes": {
        "found": [
          "Correct company confirmed: Veris AI (Veris Technologies Inc.), veris.ai, the enterprise agent simulation/environment layer matching directory note",
          "Founders Mehdi Jamei (CEO) and Andi Partovi (CTO) with backgrounds confirmed",
          "$8.5M seed (June 2025), leads Decibel Ventures + Acrew Capital",
          "Deployment: Veris Cloud / customer VPC / on-prem",
          "SOC 2 Type 2 self-claimed on FAQ",
          "Two technical reports on RFT for enterprise agents (Dec 2025, Mar 2026)"
        ],
        "missing": [
          "Valuation, total revenue/ARR",
          "Exact headcount and growth %",
          "Open roles count (gated behind Gusto board)",
          "Named/verified customers (all customers described only by category)",
          "Dedicated security/trust page URL; third-party SOC 2 verification"
        ],
        "conflicts": [
          "Founding/emergence timing: most sources say emerged from stealth June 2025; one search snippet mentioned 'September 2025' public emergence - June 2025 is better supported by the press release"
        ],
        "stale": [],
        "open_questions": [
          "Is the SOC 2 Type II actually attested by a named auditor / available via a trust portal?",
          "Are any customers nameable or frontier-lab tied? None found publicly",
          "Current precise headcount beyond the 1-10 LinkedIn band"
        ]
      }
    },
    {
      "rank": 11,
      "focus_areas_normalised": [
        "Computer Use"
      ],
      "slug": "chakra-labs",
      "brand_name": "Chakra Labs",
      "segment": "Commercial vendors",
      "website": "https://www.chakra.dev/",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://www.chakra.dev/ (accessed 2026-06-07); GitHub org repos with pushes through 2026-06-05 https://github.com/chakra-network (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": 2024,
        "confidence": "reported",
        "source": "https://www.alleywatch.com/2026/01/the-alleywatch-startup-daily-funding-report-1-26-2026/ (accessed 2026-06-07), 'founded by Alexander Fung and Nirmal Krishnan in 2024'"
      },
      "hq_location": {
        "value": "Brooklyn, New York, USA",
        "confidence": "reported",
        "source": "Search snippets / Crunchbase listing for Chakra Labs (Crunchbase fetch returned HTTP 403; corroborated via search) (accessed 2026-06-07); founders listed as based in New York, NY"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "what_they_sell": {
        "value": "environments",
        "confidence": "confirmed",
        "source": "https://trydojo.ai/, 'The RL Environment Hub for Computer Use Agents'; https://www.chakra.dev/, deterministic RL environments plus human computer-use trajectory datasets (2,500+ hrs trajectories, 10M screenshot/action pairs) (accessed 2026-06-07). Note: AlleyWatch frames the company as turning public web data into structured datasets, older/adjacent framing."
      },
      "deployment_model": {
        "value": "managed-hosted (shared platform / request access); native support for Harbor, Verifiers, and Verl RL frameworks",
        "confidence": "reported",
        "source": "https://trydojo.ai/, 'Native support for Harbor, Verifiers, and Verl'; access via request form (accessed 2026-06-07)"
      },
      "maturity": {
        "value": "private beta (request access)",
        "confidence": "estimated",
        "source": "https://trydojo.ai/, 'Request Access' gating; Dojo launched 2025-10-31 per https://www.chakra.dev/publications/product-launch-dojo (accessed 2026-06-07)"
      },
      "open_source": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://github.com/chakra-network (accessed 2026-06-07), public repos GLADOS-1, harbor, marina, dojo-spas, software-agent-sdk"
      },
      "license": {
        "value": "Mixed: Apache-2.0 (GLADOS-1, harbor, marina, harbor-start-script); MIT (software-agent-sdk, cli-mcp-server); some repos (dojo-spas, verl-tools, servers) have no declared license",
        "confidence": "confirmed",
        "source": "https://github.com/chakra-network (accessed 2026-06-07)"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "chakra.dev/security returned 404 (accessed 2026-06-07)"
      },
      "total_raised": {
        "value": "$10.1M (disclosed via SEC filing, reported 2026-01-26)",
        "confidence": "reported",
        "source": "https://www.alleywatch.com/2026/01/the-alleywatch-startup-daily-funding-report-1-26-2026/ (accessed 2026-06-07), single-source funding report based on SEC filing"
      },
      "last_round": {
        "value": "$10.1M (stage unspecified; SEC filing reported 2026-01-26, ~50 investors)",
        "confidence": "reported",
        "source": "https://www.alleywatch.com/2026/01/the-alleywatch-startup-daily-funding-report-1-26-2026/ (accessed 2026-06-07)"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [],
        "confidence": "unknown",
        "source": "AlleyWatch cites ~50 investors per SEC filing but names none (accessed 2026-06-07). NOTE: do not confuse with same-named crypto/Bitcoin-restaking 'Chakra' (StarkWare/ABCDE), which is a different company."
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "headcount_band": {
        "value": "11-50",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/chakra-labs (public snippet, accessed 2026-06-07), LinkedIn shows a '2-10' size band but ~14 associated employees; band placed at 11-50 on the higher employee count. Conflicting signals; treat as approximate."
      },
      "current_headcount": {
        "value": "~14",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/chakra-labs (public snippet, accessed 2026-06-07), single source; LinkedIn size band still listed as 2-10"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "Careers at jobs.ashbyhq.com/chakra-labs (accessed 2026-06-07), listings not extractable from public snippet"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://www.chakra.dev/ (accessed 2026-06-07), self-described 'applied research team pushing the boundaries of agents'; publishes research/CUA market analyses"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Alexander Fung (co-founder), ex-Palantir, Snap/Snapchat, Fin; Computer Science, University of Waterloo (per LinkedIn/search snippets)",
          "Nirmal Krishnan (co-founder), Computer Science & ML, Johns Hopkins; prior data/early-stage startup experience (per LinkedIn/search snippets)"
        ],
        "confidence": "reported",
        "source": "Search snippets from LinkedIn profiles (linkedin.com/in/alexfung, linkedin.com/in/nirmal-krishnan) and https://www.chakra.dev/publications/product-launch-dojo (accessed 2026-06-07). Exact titles (CTO/CEO) not independently confirmed."
      },
      "notable_customers": {
        "value": [],
        "confidence": "unknown",
        "source": "Site references collaboration with unnamed 'leading research teams' (https://www.chakra.dev/, https://trydojo.ai/, accessed 2026-06-07), self-claimed, no named or verified customers"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "GLADOS-1, described as 'the first computer-use (CUA) model post-trained using collective, crowd-sourced trajectories' (GitHub repo, Apache-2.0), a released model/repo, not a peer-reviewed paper",
          "'Computer Use Agents' Part I & II, market map / ecosystem analysis posts (Chakra Labs on X, @chakra_ai)",
          "Dojo product launch write-up on chakra.dev"
        ],
        "confidence": "reported",
        "source": "https://github.com/chakra-network and https://x.com/chakra_ai (accessed 2026-06-07). Note: these are product/blog/social artifacts, not formal academic papers or third-party benchmarks."
      },
      "focus_areas": [
        "computer use environments",
        "execution infrastructure",
        "evaluation / benchmarks"
      ],
      "positioning_summary": "Chakra Labs runs Dojo, an open/collaborative reinforcement-learning environment hub for computer-use agents, offering deterministic, frame-accurate clones of production software plus human computer-use trajectory datasets, with native support for the Harbor, Verifiers and Verl RL frameworks. It positions itself as bringing frontier-lab-grade CUA training infrastructure to the broader research community.",
      "best_fit_use_case": "Teams training or evaluating computer-use / GUI agents that need ready-made, deterministic clones of production software environments plus human trajectory data.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://trydojo.ai/",
          "accessed_date": "2026-06-07",
          "note": "Dojo landing page, 'The RL Environment Hub for Computer Use Agents'; Harbor/Verifiers/Verl integration; request access"
        },
        {
          "url": "https://www.chakra.dev/",
          "accessed_date": "2026-06-07",
          "note": "Chakra Labs main site, 'Frontier Data Laboratory'; deterministic RL environments, trajectory datasets; contact emails; links to LinkedIn and X"
        },
        {
          "url": "https://www.chakra.dev/publications/product-launch-dojo",
          "accessed_date": "2026-06-07",
          "note": "Dojo launch (2025-10-31); product description; Nirmal Krishnan co-founder note; no named customers"
        },
        {
          "url": "https://github.com/chakra-network",
          "accessed_date": "2026-06-07",
          "note": "Chakra Labs GitHub org; created 2024-05-21; repos GLADOS-1 (Apache-2.0), dojo-spas, harbor/marina (Apache-2.0); active pushes to 2026-06-05"
        },
        {
          "url": "https://www.linkedin.com/company/chakra-labs",
          "accessed_date": "2026-06-07",
          "note": "Public snippet, Data Infrastructure & Analytics; '2-10' size band with 14 total employees noted; founders Fung & Krishnan listed"
        },
        {
          "url": "https://www.alleywatch.com/2026/01/the-alleywatch-startup-daily-funding-report-1-26-2026/",
          "accessed_date": "2026-06-07",
          "note": "Reports Chakra Labs $10.1M funding per SEC filing (2026-01-26), ~50 investors; founded 2024 by Alexander Fung & Nirmal Krishnan"
        },
        {
          "url": "https://x.com/chakra_ai",
          "accessed_date": "2026-06-07",
          "note": "Company X account; Computer Use Agents Part I & II market-map posts"
        },
        {
          "url": "https://www.crunchbase.com/organization/chakra-labs",
          "accessed_date": "2026-06-07",
          "note": "Listing (HTTP 403 on fetch); referenced via search snippets for Brooklyn/NY HQ and founder data"
        },
        {
          "url": "https://jobs.ashbyhq.com/chakra-labs",
          "accessed_date": "2026-06-07",
          "note": "Careers page; listings not extractable from public snippet"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "published_papers_or_benchmarks.confidence",
          "was": "confirmed",
          "now": "reported",
          "reason": "The listed items are a released model/repo (GLADOS-1), X market-map posts, and a product-launch blog, not peer-reviewed papers or third-party benchmarks. Their existence is confirmed but characterizing them as papers/benchmarks overreaches the field intent, so downgraded to reported with a clarifying note."
        },
        {
          "field": "license.value",
          "was": "Apache-2.0 (GLADOS-1, harbor/marina); dojo-spas has no declared license",
          "now": "Mixed: Apache-2.0 (GLADOS-1, harbor, marina, harbor-start-script); MIT (software-agent-sdk, cli-mcp-server); some repos (dojo-spas, verl-tools, servers) have no declared license",
          "reason": "GitHub org page shows additional MIT-licensed repos (software-agent-sdk, cli-mcp-server) and more unlicensed repos than the draft listed; corrected to reflect the actual mix observed."
        },
        {
          "field": "open_source.source",
          "was": "https://github.com/chakra-network (accessed 2026-06-07), public repos GLADOS-1, dojo-spas, harbor/marina",
          "now": "https://github.com/chakra-network (accessed 2026-06-07), public repos GLADOS-1, harbor, marina, dojo-spas, software-agent-sdk",
          "reason": "Updated repo list to match what was directly observed on the GitHub org page."
        },
        {
          "field": "researcher_backgrounds.value",
          "was": "Alexander Fung (co-founder/CTO) ... ; Nirmal Krishnan (co-founder/CEO) ...",
          "now": "Removed the CTO/CEO title assertions (kept co-founder)",
          "reason": "Specific officer titles (CTO/CEO) were not independently confirmed in available sources; co-founder status is supported by AlleyWatch, so titles were softened to avoid asserting unverified specifics."
        },
        {
          "field": "hq_location.source",
          "was": "Search snippets / Crunchbase listing for Chakra Labs (accessed 2026-06-07); founder Alexander Fung listed as based in New York, NY",
          "now": "Search snippets / Crunchbase listing (Crunchbase fetch returned HTTP 403; corroborated via search) (accessed 2026-06-07); founders listed as based in New York, NY",
          "reason": "Made explicit that Crunchbase could not be fetched directly (403) and HQ rests on search snippets, so reported confidence is appropriate; kept value unchanged as it is corroborated."
        },
        {
          "field": "headcount_band.source",
          "was": "... '2-10 employees' size band but 14 total employees noted on page",
          "now": "Added explicit conflict note that LinkedIn lists a 2-10 size band while ~14 employees are associated; treated as approximate",
          "reason": "Flagged the internal conflict in the LinkedIn signal; a 2-10 band would imply 1-10 rather than 11-50, so the band assignment is uncertain and kept at reported."
        },
        {
          "field": "what_they_sell.source",
          "was": "https://trydojo.ai/ and https://www.chakra.dev/ ... 'The RL Environment Hub for Computer Use Agents'; also sells human computer-use trajectory datasets",
          "now": "Same plus note that AlleyWatch frames the company as turning public web data into structured datasets (older/adjacent framing)",
          "reason": "AlleyWatch's description differs from the current site positioning; noted to flag the discrepancy while retaining 'environments' as the primary, site-confirmed offering."
        }
      ],
      "verification_summary": "Confirmed this is the correct company: Chakra Labs operating Dojo, 'The RL Environment Hub for Computer Use Agents' (matches directory note), not the same-named crypto/Bitcoin-restaking 'Chakra'. Funding ($10.1M, ~50 investors, SEC filing reported by AlleyWatch 2026-01-26) is verified but rests on a single funding-report source with unspecified stage and unnamed investors, so total_raised/last_round kept 'reported' and notable_investors kept empty/unknown. Founders (Alexander Fung, Nirmal Krishnan) and founding year 2024 corroborated by AlleyWatch; officer titles softened as unverified. HQ Brooklyn/NY rests on search snippets (Crunchbase 403). Open-source status and licenses verified directly on GitHub and corrected to reflect a mixed Apache-2.0/MIT/none license picture. No verified customers (only unnamed 'leading research teams' self-claimed), kept empty. No SOC2/security page (security URL 404), unknown. Headcount ~14 / band 11-50 is single-source LinkedIn with an internal 2-10-vs-14 conflict, flagged and kept 'reported'. Main downgrade: published_papers_or_benchmarks from 'confirmed' to 'reported' since the items are a model/repo, X posts, and a product blog rather than formal papers/benchmarks. Overall confidence: medium.",
      "research_notes": {}
    },
    {
      "rank": 12,
      "focus_areas_normalised": [
        "Computer Use",
        "Long-Horizon"
      ],
      "slug": "andon-labs",
      "brand_name": "Andon Labs",
      "segment": "Commercial vendors",
      "website": "https://andonlabs.com",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://andonlabs.com/ (accessed 2026-06-07); https://www.anthropic.com/research/project-vend-2 (accessed 2026-06-07); https://fortune.com/2026/06/02/anthropic-office-vending-machine-ai-agents-vendo-andon-lukas-petersson/ (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": 2023,
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/andon-labs (accessed 2026-06-07); https://tracxn.com/d/companies/andonlabs (accessed 2026-06-07). Note: company was formerly named Vectorview and rebranded to Andon Labs around late 2024; predecessor Vectorview activity may predate 2023."
      },
      "hq_location": {
        "value": "San Francisco, USA",
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/andon-labs (accessed 2026-06-07); https://tracxn.com/d/companies/andonlabs (accessed 2026-06-07). Note: Swedish origins (formerly Vectorview, Bromma/Stockholm, Sweden); operates in both SF and Stockholm. Some databases list HQ as Bromma, Sweden."
      },
      "other_locations": {
        "value": [
          "Stockholm, Sweden (Andon Cafe; AI 'Mona')",
          "San Francisco, USA (Andon Market retail store, Cow Hollow; AI 'Luna')"
        ],
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/andonlabs (public snippet, accessed 2026-06-07); https://www.pymnts.com/artificial-intelligence-2/2026/andon-labs-handed-an-ai-a-cafe-and-business-boomed/ (accessed 2026-06-07); https://www.nbcnews.com/tech/innovation/ai-store-sf-san-francisco-bay-area-andon-labs-market-boss-rcna267013 (accessed 2026-06-07)"
      },
      "current_headcount": {
        "value": "~16 employees (as of 2026-04-30 per Tracxn); LinkedIn lists ~18 associated profiles; YC profile states 10 (likely stale)",
        "confidence": "reported",
        "source": "https://tracxn.com/d/companies/andonlabs (accessed 2026-06-07); https://www.linkedin.com/company/andonlabs (public snippet, accessed 2026-06-07); https://www.ycombinator.com/companies/andon-labs (accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "11-50",
        "confidence": "estimated",
        "source": "Inferred from Tracxn ~16 employees and LinkedIn ~18 profiles (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "https://andonlabs.com/join (careers page exists); postings seen for Member of Technical Staff, Founders Associate (SF), Operations Generalist Intern, but no reliable public count as of 2026-06-07"
      },
      "total_raised": {
        "value": "unknown (PitchBook reports ~$500K; Tracxn lists unfunded, sources directly conflict)",
        "confidence": "unknown",
        "source": "https://pitchbook.com/profiles/company/541549-09 (accessed 2026-06-07, ~$500K via search snippet, fetcher 403); https://tracxn.com/d/companies/andonlabs (accessed 2026-06-07, lists unfunded). YC W24 participation confirmed; specific raise amount not corroborated by a primary/announcement source."
      },
      "last_round": {
        "value": "Y Combinator W24 (pre-seed); raise amount unconfirmed",
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/andon-labs (accessed 2026-06-07). YC W24 batch participation confirmed; PitchBook ~$500K is single-source and conflicts with Tracxn's 'unfunded'."
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "Y Combinator (W24)"
        ],
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/andon-labs (accessed 2026-06-07). YC W24 confirmed. Additional names (Breakpoint Capital, Juniper Ventures, Phosphor Capital, Superangel, Seldon Lab) appear only in PitchBook/Tracxn search snippets, not on a primary source, and Tracxn lists the company as unfunded, so they are not independently confirmed and are excluded pending verification."
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "Anecdotal only: Stockholm AI cafe 'Mona' reportedly earned ~44,000 SEK (~$4,659) in first two weeks (https://www.businesstoday.in/, accessed 2026-06-07). Not a company-level revenue figure."
      },
      "what_they_sell": {
        "value": "evals",
        "confidence": "confirmed",
        "source": "https://andonlabs.com/ (accessed 2026-06-07); https://www.anthropic.com/research/project-vend-2 (accessed 2026-06-07). Builds AI agent benchmarks/evaluations (Vending-Bench, Butter-Bench, Blueprint-Bench, Andon FM) plus real-world autonomous deployments. Arguably mixed given deployment/control-research work."
      },
      "focus_areas": [
        "evaluation / benchmarks",
        "long-horizon / general reasoning",
        "computer use environments"
      ],
      "open_source": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No clear public OSS product/license found as of 2026-06-07; benchmarks published with papers but licensing not confirmed."
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "maturity": {
        "value": "GA (benchmarks publicly published; autonomous deployments live)",
        "confidence": "reported",
        "source": "https://andonlabs.com/ (accessed 2026-06-07); https://www.anthropic.com/research/project-vend-2 (accessed 2026-06-07); https://fortune.com/2026/06/02/... (accessed 2026-06-07)"
      },
      "deployment_model": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "Vendor sells benchmarks/evals and runs its own deployments; commercial delivery model not publicly documented as of 2026-06-07"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No SOC 2 attestation or trust/security page found as of 2026-06-07"
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No dedicated trust/security page found as of 2026-06-07"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://arxiv.org/abs/2510.21860 (Butter-Bench paper, accessed 2026-06-07), authored by Andon Labs team; company publishes benchmark research."
      },
      "researcher_count": {
        "value": "~7 (counted as authors on the Butter-Bench paper; not a full team census)",
        "confidence": "estimated",
        "source": "https://arxiv.org/abs/2510.21860 (accessed 2026-06-07): authors include Callum Sharrock, Lukas Petersson, Hanna Petersson, Axel Backlund, Axel Wennstrom, Kristoffer Nordstrom, Elias Aronsson"
      },
      "researcher_backgrounds": {
        "value": [
          "Lukas Petersson (CEO, co-founder), previously co-founded Vectorview",
          "Axel Backlund (CTO, co-founder)",
          "Emil Froberg, co-founder (Vectorview/Andon Labs)"
        ],
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/andon-labs (accessed 2026-06-07); LinkedIn public snippets (accessed 2026-06-07); https://www.linkedin.com/posts/emil-froberg_i-co-founded-vectorview-now-andon-labs (accessed 2026-06-07). Specific prior-employer claims (ex-Google, Carnegie Mellon) were not corroborated on a primary source and are omitted."
      },
      "published_papers_or_benchmarks": {
        "value": [
          "Vending-Bench: Testing long-term coherence in agents (https://andonlabs.com/evals/vending-bench)",
          "Vending-Bench 2 (https://andonlabs.com/evals/vending-bench-2)",
          "Butter-Bench: Evaluating LLM Controlled Robots for Practical Intelligence (arXiv:2510.21860)",
          "Blueprint-Bench",
          "Andon FM (AI-run radio benchmark)",
          "Project Vend (real-world AI vending machine, collaboration with Anthropic)"
        ],
        "confidence": "confirmed",
        "source": "https://andonlabs.com/ (accessed 2026-06-07); https://arxiv.org/abs/2510.21860 (accessed 2026-06-07); https://www.anthropic.com/research/project-vend-2 (accessed 2026-06-07)"
      },
      "notable_customers": {
        "value": [
          {
            "name": "Anthropic",
            "verification": "verified",
            "frontier_lab_tie": true
          },
          {
            "name": "xAI",
            "verification": "verified",
            "frontier_lab_tie": true
          }
        ],
        "confidence": "reported",
        "source": "Anthropic names Andon Labs as a partner on Project Vend on its own site (https://www.anthropic.com/research/project-vend-2, accessed 2026-06-07: 'our partners at Andon Labs'), corroborated by Fortune (https://fortune.com/2026/06/02/..., accessed 2026-06-07). xAI 'Grokbox' built with Andon Labs confirmed by xAI's own Grok account (https://x.com/grok/status/1947365557248135298, accessed 2026-06-07). NOTE: these are confirmed collaborations/partnerships, not confirmed paying customers; 'verified' reflects third-party (Anthropic/xAI) confirmation of the relationship, not a commercial contract."
      },
      "positioning_summary": "Andon Labs is a Y Combinator-backed (W24) startup, formerly Vectorview, building benchmarks and evaluations for AI agents' long-horizon coherence and safety (Vending-Bench, Butter-Bench, Blueprint-Bench) and operating real-world autonomous AI businesses. It is known for high-profile collaborations placing AI-run vending machines/stores in the offices of frontier labs Anthropic (Project Vend) and xAI (Grokbox).",
      "best_fit_use_case": "Buyers wanting long-horizon agent coherence/safety benchmarks and real-world autonomous-operation stress tests, with a frontier-lab-adjacent, irreverent eval style.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://andonlabs.com/",
          "accessed_date": "2026-06-07",
          "note": "Official site (403 to fetcher; details via search snippets). Products: Vending-Bench, Butter-Bench, Blueprint-Bench, Andon FM."
        },
        {
          "url": "https://andonlabs.com/evals/vending-bench",
          "accessed_date": "2026-06-07",
          "note": "Vending-Bench benchmark page"
        },
        {
          "url": "https://andonlabs.com/evals/vending-bench-2",
          "accessed_date": "2026-06-07",
          "note": "Vending-Bench 2 benchmark page"
        },
        {
          "url": "https://andonlabs.com/evals/butter-bench",
          "accessed_date": "2026-06-07",
          "note": "Butter-Bench benchmark page"
        },
        {
          "url": "https://andonlabs.com/join",
          "accessed_date": "2026-06-07",
          "note": "Careers page"
        },
        {
          "url": "https://arxiv.org/abs/2510.21860",
          "accessed_date": "2026-06-07",
          "note": "Butter-Bench paper; authors include Andon Labs team (~7)"
        },
        {
          "url": "https://www.ycombinator.com/companies/andon-labs",
          "accessed_date": "2026-06-07",
          "note": "YC W24, founded 2023, SF, mission, founder Lukas Petersson"
        },
        {
          "url": "https://tracxn.com/d/companies/andonlabs/__kbDcsnD7GXkDkPhJe7NzhyYdc1aYNmLVZaGPytuX8Ck",
          "accessed_date": "2026-06-07",
          "note": "Founded 2023, SF HQ, ~16 employees (2026-04-30), lists as unfunded (conflict), founders Emil Froberg/Lukas Petersson/Axel Backlund"
        },
        {
          "url": "https://pitchbook.com/profiles/company/541549-09",
          "accessed_date": "2026-06-07",
          "note": "Funding ~$500K reported; investor list (403 to fetcher, via search snippets)"
        },
        {
          "url": "https://www.crunchbase.com/organization/andon-labs",
          "accessed_date": "2026-06-07",
          "note": "Company/funding profile (403 to fetcher)"
        },
        {
          "url": "https://www.linkedin.com/company/andonlabs",
          "accessed_date": "2026-06-07",
          "note": "Public snippet: ~18 profiles, Information Services, SF + Stockholm locations, specialties"
        },
        {
          "url": "https://www.linkedin.com/posts/emil-froberg_i-co-founded-vectorview-now-andon-labs-activity-7269617615793238016-BRZ9",
          "accessed_date": "2026-06-07",
          "note": "Confirms Vectorview -> Andon Labs rebrand; Emil Froberg co-founder"
        },
        {
          "url": "https://www.linkedin.com/posts/andonlabs_vectorview-is-now-andon-labs-we-help-frontier-activity-7270578670040170497-GIyX",
          "accessed_date": "2026-06-07",
          "note": "Official rebrand announcement (~late 2024)"
        },
        {
          "url": "https://fortune.com/2026/06/02/anthropic-office-vending-machine-ai-agents-vendo-andon-lukas-petersson/",
          "accessed_date": "2026-06-07",
          "note": "Anthropic Project Vend; scaled to stores/cafes; multi-agent CEO model; employs humans"
        },
        {
          "url": "https://www.pymnts.com/artificial-intelligence-2/2026/andon-labs-handed-an-ai-a-cafe-and-business-boomed/",
          "accessed_date": "2026-06-07",
          "note": "Andon Market SF retail store details"
        },
        {
          "url": "https://andonlabs.com/blog/ai-cafe-stockholm",
          "accessed_date": "2026-06-07",
          "note": "Stockholm AI cafe 'Mona'"
        },
        {
          "url": "https://x.com/andonlabs/status/1943182987371098151",
          "accessed_date": "2026-06-07",
          "note": "xAI invited Andon Labs; Grok 4 tops Vending-Bench"
        },
        {
          "url": "https://x.com/grok/status/1947365557248135298",
          "accessed_date": "2026-06-07",
          "note": "xAI office 'Grokbox' built with Andon Labs"
        },
        {
          "url": "https://www.businesstoday.in/amp/technology/story/meet-mona-the-ai-running-a-real-cafe-in-stockholm-527972-2026-04-29",
          "accessed_date": "2026-06-07",
          "note": "Mona cafe earned ~44,000 SEK in 2 weeks"
        },
        {
          "url": "https://www.cognitiverevolution.ai/autonomous-organizations-vending-bench-beyond-w-lukas-petersson-axel-backlund-of-andon-labs/",
          "accessed_date": "2026-06-07",
          "note": "Founder interview"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "total_raised",
          "was": "~$500K (reported; databases conflict, some list as unfunded) [confidence: reported]",
          "now": "unknown [confidence: unknown]",
          "reason": "The ~$500K figure rests on a single secondary source (PitchBook search snippet, fetcher 403) and directly conflicts with Tracxn's 'unfunded'. No primary/announcement source corroborates a specific raise. Per rules, downgrade to unknown when only a single weak/conflicting source exists."
        },
        {
          "field": "last_round",
          "was": "Y Combinator W24 (pre-seed/seed); ~$500K reported",
          "now": "Y Combinator W24 (pre-seed); raise amount unconfirmed",
          "reason": "YC W24 participation is confirmed, but the ~$500K amount is single-source and conflicts with Tracxn. Removed the unverified dollar figure from the value."
        },
        {
          "field": "notable_investors",
          "was": "[Y Combinator (W24), Breakpoint Capital, Juniper Ventures, Phosphor Capital, Superangel, Seldon Lab]",
          "now": "[Y Combinator (W24)]",
          "reason": "Only YC W24 is confirmed by a primary source (YC profile). The other five investors appear only in PitchBook/Tracxn search snippets, are not on a primary source, and conflict with Tracxn's 'unfunded' status. Removed pending independent verification rather than asserting them as reported."
        },
        {
          "field": "status.source",
          "was": "andonlabs.com + Fortune",
          "now": "added https://www.anthropic.com/research/project-vend-2",
          "reason": "Added Anthropic primary source confirming active ongoing partnership (Project Vend phase two), strengthening the active-status evidence."
        },
        {
          "field": "founded_year.source",
          "was": "YC + Tracxn note on rebrand",
          "now": "same plus caveat that Vectorview predecessor activity may predate 2023",
          "reason": "Founded year is consistently reported as 2023 but the Vectorview predecessor may be older; added caveat without changing the reported value/confidence."
        },
        {
          "field": "current_headcount",
          "was": "confidence: reported, value mentioning YC 10 in source",
          "now": "value now folds in YC=10 (stale); confidence kept reported",
          "reason": "Minor consolidation; numbers cross-checked against Tracxn (~16), LinkedIn (~18), YC (10, stale). No substantive change to band."
        },
        {
          "field": "researcher_backgrounds",
          "was": "Included 'reportedly ex-Google' (Lukas) and 'background at Carnegie Mellon' (Axel)",
          "now": "Removed unverified prior-employer/education claims; kept co-founder roles and Vectorview lineage",
          "reason": "The ex-Google and Carnegie Mellon claims were self-flagged as unverified and rest only on LinkedIn snippets; per rules, do not assert unverified specifics. Retained verifiable co-founder facts."
        },
        {
          "field": "notable_customers.source",
          "was": "Fortune + Andon Labs X post for xAI; Anthropic 'and Anthropic'",
          "now": "Anthropic's own Project Vend page ('our partners at Andon Labs') + Fortune; xAI confirmed via xAI's own Grok account post",
          "reason": "Upgraded sourcing to the strongest available third-party/first-party confirmations (Anthropic.com, xAI Grok account). Verification 'verified' retained because the frontier labs themselves confirm the relationship, but added explicit note that these are partnerships/collaborations, not confirmed paying customers."
        }
      ],
      "verification_summary": "Confirmed this is the correct entity matching the directory note \"agent benchmarks with a sense of humor\": Andon Labs (formerly Vectorview), YC W24, makers of Vending-Bench/Butter-Bench/Blueprint-Bench and operator of AI-run vending machines/stores. Highest-risk corrections: (1) total_raised downgraded to unknown, the ~$500K is single-source (PitchBook snippet) and conflicts with Tracxn's 'unfunded', with no announcement to corroborate; (2) notable_investors trimmed to YC only, since the other five names lack primary sourcing and conflict with the unfunded listing; (3) removed unverified founder-background claims (ex-Google, Carnegie Mellon). Customer ties strengthened: Anthropic's own Project Vend page names \"our partners at Andon Labs\" and xAI's own Grok account confirms Grokbox was \"built with Andon Labs,\" so both frontier-lab ties are third-party/first-party confirmed, but I flagged that these are partnerships/collaborations, not confirmed paying customers. Headcount (~16, band 11-50), founded 2023 (with Vectorview-predecessor caveat), SF/Stockholm dual presence, evals positioning, and has_researchers all hold up. SOC2/certs/security correctly unknown. Overall confidence: medium.",
      "research_notes": {}
    },
    {
      "rank": 13,
      "focus_areas_normalised": [
        "Enterprise Workflows",
        "Long-Horizon",
        "Math"
      ],
      "slug": "sepal-ai",
      "brand_name": "Sepal AI",
      "segment": "Commercial vendors",
      "website": "https://www.sepalai.com/",
      "status": {
        "value": "acquired",
        "confidence": "confirmed",
        "source": "https://www.orrick.com/en/News/2026/02/Mercor-Acquires-Sepal-AI and https://www.linkedin.com/posts/mercor-ai_today-were-welcoming-sepal-ai-to-mercor-activity-7424897704905826304-N_PJ (accessed 2026-06-07); Mercor acquired Sepal AI, announced 2026-02-06, corroborated by founder and Mercor LinkedIn posts"
      },
      "founded_year": {
        "value": 2024,
        "confidence": "confirmed",
        "source": "https://www.ycombinator.com/companies/sepal-ai (accessed 2026-06-07); founded 2024, YC S24"
      },
      "hq_location": {
        "value": "San Francisco, USA",
        "confidence": "confirmed",
        "source": "https://www.ycombinator.com/companies/sepal-ai and https://www.linkedin.com/company/sepalai (accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "what_they_sell": {
        "value": "mixed",
        "confidence": "confirmed",
        "source": "https://www.ycombinator.com/companies/sepal-ai and https://www.orrick.com/en/News/2026/02/Mercor-Acquires-Sepal-AI (accessed 2026-06-07); training data, evaluation benchmarks, and RL environments backed by a 20k+ expert network"
      },
      "deployment_model": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "maturity": {
        "value": "GA",
        "confidence": "estimated",
        "source": "https://www.ycombinator.com/companies/sepal-ai (accessed 2026-06-07); claims multiple Fortune 500 and AI-lab paying customers, indicating commercially available services. Note: acquired by Mercor Feb 2026"
      },
      "open_source": {
        "value": "no",
        "confidence": "estimated",
        "source": "No public OSS repos identified for Sepal itself; product is a data/eval/RL-environment service. SheetBench-50 is hosted on HUD's HuggingFace org (hud-evals), not Sepal's own OSS"
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "current_headcount": {
        "value": "~13-15 core team (YC team size 15; getlatka 13 for 2024). LinkedIn public snippet shows 51-200 band (likely includes contract experts)",
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/sepal-ai (team size 15), https://getlatka.com/companies/sepalai.com (13, 2024) and https://www.linkedin.com/company/sepalai (51-200 band) (accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "Conflicting signals: YC team size 15 (suggests 11-50) vs LinkedIn 51-200 band (likely inflated by contract experts). Cannot cleanly resolve to a single band, so left unknown. (https://www.ycombinator.com/companies/sepal-ai, https://www.linkedin.com/company/sepalai, accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "https://www.sepalai.com/careers (accessed 2026-06-07); careers page lists roles but exact count not retrievable. Company acquired by Mercor Feb 2026, so roles may have transferred/closed"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/sepal-ai and https://www.orrick.com/en/News/2026/02/Mercor-Acquires-Sepal-AI (accessed 2026-06-07); hires Applied Research Engineers, runs specialized research projects, engages 20k+ PhDs/domain experts"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Co-founders ex-Turing (built/scaled the LLM-trainer business; Robi Lin scaled trainers 50 to 800+; Kat Hu managed 500+ AI trainers)",
          "Co-founder Robi Lin formerly at Bain & Co.; co-founder Kat Hu former McKinsey consultant",
          "Co-founder Fedor early engineer at Vercel and Newfront",
          "Engages 20k+ network of academic PhDs and domain professionals (STEM, medical, finance, business)"
        ],
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/sepal-ai (accessed 2026-06-07)"
      },
      "total_raised": {
        "value": "~$500K disclosed (single pre-seed round, Sep 2024) per aggregators; YC profile separately states 'several million dollars from leading investors' (unresolved conflict)",
        "confidence": "reported",
        "source": "https://www.crunchbase.com/organization/sepal-ai, https://startupintros.com/orgs/sepal-ai, https://dealigence.vc/company/sepal-ai (all report $500K, 1 round) vs https://www.ycombinator.com/companies/sepal-ai ('several million') (accessed 2026-06-07)"
      },
      "last_round": {
        "value": "Pre-seed, ~$500K, 2024-09-25 (per Crunchbase aggregator; not independently confirmed by a primary announcement)",
        "confidence": "reported",
        "source": "https://www.crunchbase.com/funding_round/sepal-ai-pre-seed--fda22c6a and https://dealigence.vc/company/sepal-ai (accessed 2026-06-07)"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "Y Combinator",
          "Metaplanet Holdings",
          "SID Venture Partners",
          "Sterling Road",
          "Team Ignite Ventures"
        ],
        "confidence": "reported",
        "source": "https://startupintros.com/orgs/sepal-ai and https://www.crunchbase.com/organization/sepal-ai (via search) (accessed 2026-06-07); consistent across multiple funding aggregators but no primary press release"
      },
      "revenue_signals": {
        "value": "Third-party (getlatka) claims ~$2M revenue with a 13-person team in 2024, single unverified source, not vendor-confirmed",
        "confidence": "reported",
        "source": "https://getlatka.com/companies/sepalai.com (accessed 2026-06-07)"
      },
      "notable_customers": {
        "value": [
          {
            "name": "Top AI research labs (unnamed; frontier-lab ties referenced in Mercor acquisition rationale)",
            "verification": "self-claimed",
            "frontier_lab_tie": true
          },
          {
            "name": "Multiple Fortune 500 companies (unnamed)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "HUD (co-builder/collaboration partner on SheetBench-50, NOT a customer)",
            "verification": "verified",
            "frontier_lab_tie": false
          }
        ],
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/sepal-ai (Fortune 500 + AI-lab claims, self-claimed) and https://www.hud.ai/case-studies/sheetbench-50 (HUD collaboration on benchmark, verified as a partner not a customer) (accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "SheetBench-50, first public financial-analyst-grade benchmark of AI agents on real spreadsheet/financial workflows (50 tasks), built in partnership with HUD; tasks validated by finance pros from PwC, Cisco, Charles Schwab, Fannie Mae"
        ],
        "confidence": "confirmed",
        "source": "https://www.hud.ai/case-studies/sheetbench-50 and https://huggingface.co/datasets/hud-evals/SheetBench-50 (accessed 2026-06-07)"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "distributed_remote": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "focus_areas": [
        "evaluation / benchmarks",
        "enterprise workflows",
        "finance",
        "science/math",
        "long-horizon / general reasoning"
      ],
      "positioning_summary": "Sepal AI is a YC-backed (S24) San Francisco data-research company that builds high-quality training data, expert-graded evaluation benchmarks, and reinforcement-learning environments for frontier LLMs, drawing on a network of 20k+ domain experts (PhDs, finance, medical, STEM). It was acquired by Mercor in February 2026.",
      "best_fit_use_case": "Buyers needing expert-validated evaluation environments and RL/training data for complex domains (notably finance/spreadsheet analyst workflows and advanced science), note the team is now part of Mercor following the Feb 2026 acquisition.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://www.sepalai.com/",
          "accessed_date": "2026-06-07",
          "note": "Official site (returned 402 on fetch; confirmed via search snippets) - tagline 'Making models smarter and safer', data research company"
        },
        {
          "url": "https://www.sepalai.com/careers",
          "accessed_date": "2026-06-07",
          "note": "Careers page; open roles incl. Applied Research Engineer, Senior/Fullstack SWE (402 on direct fetch, partial via LinkedIn/YC)"
        },
        {
          "url": "https://www.ycombinator.com/companies/sepal-ai",
          "accessed_date": "2026-06-07",
          "note": "YC S24 profile: founded 2024, SF, team size 15, founders Robi Lin/Kat Hu (ex-Turing)/Fedor (ex-Vercel), Fortune 500 + AI-lab customers, status acquired"
        },
        {
          "url": "https://www.orrick.com/en/News/2026/02/Mercor-Acquires-Sepal-AI",
          "accessed_date": "2026-06-07",
          "note": "Acquisition announcement, 2026-02-06; Mercor acquires Sepal AI; no deal terms disclosed"
        },
        {
          "url": "https://www.crunchbase.com/organization/sepal-ai",
          "accessed_date": "2026-06-07",
          "note": "Crunchbase profile (403 on fetch; via search) - funding & investors"
        },
        {
          "url": "https://www.crunchbase.com/funding_round/sepal-ai-pre-seed--fda22c6a",
          "accessed_date": "2026-06-07",
          "note": "Pre-seed round dated 2024-09-25, ~$500K"
        },
        {
          "url": "https://startupintros.com/orgs/sepal-ai",
          "accessed_date": "2026-06-07",
          "note": "Investors: Metaplanet Holdings, SID Venture Partners, Sterling Road, Team Ignite Ventures, YC"
        },
        {
          "url": "https://www.linkedin.com/company/sepalai",
          "accessed_date": "2026-06-07",
          "note": "Public snippet: HQ San Francisco, 51-200 employee band (likely incl. contract experts), Technology/Information/Internet"
        },
        {
          "url": "https://www.hud.ai/case-studies/sheetbench-50",
          "accessed_date": "2026-06-07",
          "note": "SheetBench-50 financial-analyst benchmark co-built by HUD and Sepal AI; tasks validated by finance pros from PwC, Cisco, Charles Schwab, Fannie Mae"
        },
        {
          "url": "https://huggingface.co/datasets/hud-evals/SheetBench-50",
          "accessed_date": "2026-06-07",
          "note": "SheetBench-50 dataset hosted on HuggingFace under hud-evals"
        },
        {
          "url": "https://getlatka.com/companies/sepalai.com",
          "accessed_date": "2026-06-07",
          "note": "Third-party unverified claim: ~$2M revenue, 13-person team, 2024"
        },
        {
          "url": "https://pitchbook.com/profiles/company/639950-14",
          "accessed_date": "2026-06-07",
          "note": "PitchBook profile (valuation/investors/acquisition) - via search snippet"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "headcount_band",
          "was": "11-50 (estimated)",
          "now": "unknown (unknown)",
          "reason": "The only structured public band (LinkedIn) shows 51-200, while YC team size is 15. The draft asserted '11-50' as an estimate, but the two structured sources conflict and cannot be cleanly resolved (LinkedIn band likely inflated by contract experts). Downgraded to unknown per do-not-overreach rule."
        },
        {
          "field": "notable_customers (HUD entry)",
          "was": "HUD listed as a verified customer (collaboration partner on SheetBench-50)",
          "now": "HUD relabeled as a verified co-builder/collaboration PARTNER, explicitly noted as NOT a customer",
          "reason": "HUD is a benchmark co-author/collaborator, not a paying customer of Sepal. Listing it among customers (even as 'verified') misrepresents the relationship. Kept verification:verified because the partnership itself is third-party confirmed, but clarified it is a partner not a customer."
        },
        {
          "field": "status.source / status.confidence",
          "was": "confirmed, single Orrick source",
          "now": "confirmed, Orrick + Mercor and founder LinkedIn announcement posts",
          "reason": "Strengthened corroboration with additional primary sources (Mercor official post, founders Robert Lin and Kat Hu posts). Confidence remains confirmed (well-supported)."
        },
        {
          "field": "total_raised.source / last_round.source",
          "was": "Single Crunchbase round URL",
          "now": "Added startupintros and dealigence corroboration; explicitly noted no primary press release and unresolved YC 'several million' conflict",
          "reason": "$500K is consistent across multiple aggregators but lacks a primary funding announcement; flagged the conflict with YC's 'several million' more explicitly. Confidence appropriately remains 'reported', not upgraded."
        },
        {
          "field": "researcher_backgrounds (Vercel/Newfront detail)",
          "was": "'Co-founder ex-engineer at Vercel and Newfront' attributed generically",
          "now": "Specified as co-founder Fedor, early engineer at Vercel and Newfront; corrected Bain/McKinsey to specific co-founders (Robi Lin ex-Bain, Kat Hu ex-McKinsey)",
          "reason": "YC profile attributes these backgrounds to specific named co-founders; the draft's vaguer phrasing risked implying multiple founders shared the same firms. Confidence remains reported."
        },
        {
          "field": "company match verification",
          "now": "Confirmed correct company",
          "reason": "Verified this is the correct Sepal AI matching the directory note 'expert evaluation environments', YC/HUD/Mercor sources all describe expert-graded eval benchmarks and RL environments. No same-named unrelated entity confusion."
        }
      ],
      "verification_summary": "Independently re-verified the highest-risk claims. CONFIRMED: Mercor's Feb 2026 acquisition (Orrick + Mercor/founder LinkedIn posts), founding year 2024 and SF HQ (YC), and SheetBench-50 benchmark co-built with HUD (HUD case study + HuggingFace). Funding remains at 'reported', multiple aggregators report a single ~$500K pre-seed round, but this conflicts with the YC profile's 'several million dollars' claim and no primary press release exists; not upgraded. Investors are consistent across aggregators but lack a primary announcement (kept 'reported'). KEY CORRECTIONS: (1) headcount_band downgraded from '11-50' to 'unknown', LinkedIn shows a 51-200 band while YC lists 15, an unresolvable conflict; a startup of this stage is not genuinely 200+, and neither band is cleanly supportable. (2) HUD reclassified from a 'customer' to a co-builder/collaboration partner, it is a benchmark co-author, not a paying customer; the Fortune 500 and AI-lab customers remain correctly self-claimed (unnamed). soc2/certifications/security all correctly remain unknown (no trust page found). This is the correct company matching the 'expert evaluation environments' directory note.",
      "research_notes": {}
    },
    {
      "rank": 14,
      "focus_areas_normalised": [
        "Computer Use",
        "Enterprise Workflows"
      ],
      "slug": "hud",
      "brand_name": "HUD",
      "segment": "Commercial vendors",
      "website": "https://www.hud.ai",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://www.ycombinator.com/companies/hud (accessed 2026-06-07)"
      },
      "focus_areas": [
        "computer use environments",
        "browser environments",
        "evaluation / benchmarks",
        "execution infrastructure",
        "enterprise workflows"
      ],
      "what_they_sell": {
        "value": "environments",
        "confidence": "confirmed",
        "source": "https://github.com/hud-evals/hud-python ; https://docs.hud.ai/ (accessed 2026-06-07)"
      },
      "positioning_summary": "HUD (YC W25, formerly hud.so) is a platform for building reinforcement-learning environments and evaluations for computer-use and browser agents. It lets teams wrap real software/code as agent-callable tools in isolated containers, define tasks and rewards, and run evals/RL at scale via an open-source SDK plus a cloud-hosted gateway. It maintains public benchmarks (OSWorld-Verified contributions, SheetBench-50) and positions frontier AI labs and agent-first startups as its target customers.",
      "best_fit_use_case": "Teams that need to benchmark or RL-train computer-use/browser agents against real-software tasks with reproducible, containerized environments.",
      "maturity": {
        "value": "GA",
        "confidence": "estimated",
        "source": "https://pypi.org/project/hud-python/ ; https://docs.hud.ai/ (accessed 2026-06-07) - public OSS SDK with regular releases and self-serve docs/login; no formal GA label found"
      },
      "deployment_model": {
        "value": "managed-hosted + self-hosted (open-source SDK; cloud platform with local CLI execution and an OpenAI-compatible model gateway at inference.hud.ai)",
        "confidence": "confirmed",
        "source": "https://docs.hud.ai/ ; https://github.com/hud-evals/hud-python (accessed 2026-06-07)"
      },
      "open_source": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://github.com/hud-evals/hud-python (accessed 2026-06-07)"
      },
      "license": {
        "value": "MIT",
        "confidence": "confirmed",
        "source": "https://github.com/hud-evals/hud-python (accessed 2026-06-07) - MIT confirmed via repo license field on re-fetch"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "OSWorld-Verified (369+ real-world desktop tasks; HUD/'Human Data' acknowledged among institutions providing feedback/fixes, per XLANG Lab)",
          "SheetBench-50 (financial-analyst spreadsheet benchmark, developed with Sepal AI; per HUD case study)"
        ],
        "confidence": "reported",
        "source": "https://xlang.ai/blog/osworld-verified (acknowledges 'Human Data' / hud.so among feedback institutions; HUD contribution reported not independently confirmed) ; SheetBench-50 self-reported on https://www.hud.ai/case-studies/sheetbench-50 (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": 2025,
        "confidence": "confirmed",
        "source": "https://www.ycombinator.com/companies/hud (accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "San Francisco, USA",
        "confidence": "confirmed",
        "source": "https://www.ycombinator.com/companies/hud (accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "current_headcount": {
        "value": "~15 (per YC profile, accessed 2026-06-07)",
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/hud (accessed 2026-06-07) - team size 15 per YC profile"
      },
      "headcount_band": {
        "value": "11-50",
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/hud (accessed 2026-06-07) - ~15 reported"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": 5,
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/hud/jobs (accessed 2026-06-07) - ~5 SF roles listed; counts fluctuate, treated as reported"
      },
      "distributed_remote": {
        "value": "no",
        "confidence": "estimated",
        "source": "https://www.ycombinator.com/companies/hud/jobs (accessed 2026-06-07) - listed roles are San Francisco (not remote)"
      },
      "total_raised": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "A seed round is confirmed (Exceptional Capital portfolio: 'Seed Invested') but amount undisclosed. Secondary figures ($2M, $15M, $21M) in search snippets conflate with an unrelated same-named Israeli company (runtime code sensor, Square Peg Capital). No clean primary amount for hud.ai isolated. Treated as unknown."
      },
      "last_round": {
        "value": "Seed (amount undisclosed)",
        "confidence": "reported",
        "source": "https://www.exceptionalcap.com/portfolio (accessed 2026-06-07) - HUD listed at 'Seed Stage / Seed Invested'; amount not disclosed"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "Y Combinator (W25 batch)",
          "Exceptional Capital"
        ],
        "confidence": "reported",
        "source": "Y Combinator: https://www.ycombinator.com/companies/hud (W25). Exceptional Capital: https://www.exceptionalcap.com/portfolio lists HUD as a seed-stage portfolio company (accessed 2026-06-07)"
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_customers": {
        "value": [
          {
            "name": "DoorDash",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "UiPath",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Sharpe",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "OpenAI",
            "verification": "self-claimed",
            "frontier_lab_tie": true
          },
          {
            "name": "Anthropic",
            "verification": "self-claimed",
            "frontier_lab_tie": true
          }
        ],
        "confidence": "reported",
        "source": "DoorDash/UiPath/Sharpe self-claimed via HUD homepage case studies (https://www.hud.ai/, accessed 2026-06-07). OpenAI/Anthropic: HUD positions frontier labs as customers; the only third-party reference (https://xlang.ai/blog/osworld-verified) co-acknowledges HUD ('Human Data'/hud.so) alongside OpenAI and Anthropic as OSWorld-Verified feedback contributors - this is benchmark collaboration, NOT confirmation of a paid/customer relationship, so kept self-claimed (not verified)."
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/hud ; https://www.workatastartup.com/companies/hud (accessed 2026-06-07) - team described as including IOI/IPhO Olympiad medalists and researchers with ICLR/NeurIPS publications; 'Research Engineer' roles open"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Jay Ram (CEO) - consumer apps, ML/quant research",
          "Lorenss Martinsons (CPO) - Cognitive Science, Yale",
          "Parth Patel (CTO) - evals and RL environments",
          "Team reported to include International Olympiad medalists (IOI, IPhO) and researchers with ICLR/NeurIPS publications"
        ],
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/hud ; https://www.workatastartup.com/companies/hud ; https://www.linkedin.com/in/parth220/ (accessed 2026-06-07)"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "https://www.hud.ai/dpa (Data Processing Addendum)",
        "confidence": "reported",
        "source": "https://www.hud.ai/dpa surfaced via search (accessed 2026-06-07); no dedicated trust/security page or SOC 2/ISO certification found"
      },
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://www.ycombinator.com/companies/hud",
          "accessed_date": "2026-06-07",
          "note": "YC profile: founders (Jay Ram, Lorenss Martinsons, Parth Patel), founded 2025, ~15 people, SF, W25, frontier-lab positioning"
        },
        {
          "url": "https://www.ycombinator.com/companies/hud/jobs",
          "accessed_date": "2026-06-07",
          "note": "5 open roles, all San Francisco"
        },
        {
          "url": "https://github.com/hud-evals/hud-python",
          "accessed_date": "2026-06-07",
          "note": "OSS SDK, MIT license reported, ~258 stars, v0.5.41 (Apr 2026), prebuilt computer/shell/file/browser tools"
        },
        {
          "url": "https://pypi.org/project/hud-python/",
          "accessed_date": "2026-06-07",
          "note": "PyPI package for hud-python"
        },
        {
          "url": "https://docs.hud.ai/",
          "accessed_date": "2026-06-07",
          "note": "Deployment model: cloud-hosted + local CLI, containerized isolated environments, OpenAI-compatible gateway at inference.hud.ai (Claude/GPT/Gemini/Grok); no SOC2/license stated in docs"
        },
        {
          "url": "https://www.hud.ai/",
          "accessed_date": "2026-06-07",
          "note": "Homepage (HTTP 429 on fetch); via search: customers DoorDash, UiPath, Sharpe self-claimed; positioning as RL environments/evals for CUAs"
        },
        {
          "url": "https://www.hud.ai/case-studies/sheetbench-50",
          "accessed_date": "2026-06-07",
          "note": "SheetBench-50 developed with Sepal AI; finance professionals from PwC/Cisco/Charles Schwab/Fannie Mae involved"
        },
        {
          "url": "https://www.hud.ai/dpa",
          "accessed_date": "2026-06-07",
          "note": "Data Processing Addendum page"
        },
        {
          "url": "https://xlang.ai/blog/osworld-verified",
          "accessed_date": "2026-06-07",
          "note": "Third-party: HUD (Human Data, hud.so) listed among institutions providing OSWorld-Verified feedback alongside OpenAI, Anthropic, Moonshot, ByteDance, Simular - verifies frontier-lab collaboration"
        },
        {
          "url": "https://foundertrace.com/companies/hud_yc_w25/",
          "accessed_date": "2026-06-07",
          "note": "Founders, Martinsons Yale Cognitive Science background"
        },
        {
          "url": "https://www.workatastartup.com/companies/hud",
          "accessed_date": "2026-06-07",
          "note": "Team described with Olympiad medalists and ICLR/NeurIPS researchers (via search snippet)"
        },
        {
          "url": "https://newsletter.semianalysis.com/p/rl-environments-and-rl-for-science",
          "accessed_date": "2026-06-07",
          "note": "Technical description: HUD wraps software in dockerized container + MCP server exposing agent tools"
        },
        {
          "url": "https://www.linkedin.com/company/hud-evals",
          "accessed_date": "2026-06-07",
          "note": "Correct LinkedIn handle for HUD (YC W25) RL-environments company"
        },
        {
          "url": "https://www.crunchbase.com/organization/hud",
          "accessed_date": "2026-06-07",
          "note": "Crunchbase profile (HTTP 403 on fetch); funding not verified"
        },
        {
          "url": "https://x.com/hud_evals/status/1919262852088570225",
          "accessed_date": "2026-06-07",
          "note": "HUD X post: hiring research engineers, works with frontier labs to evaluate CUAs"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "notable_customers (OpenAI)",
          "was": "verification: verified, frontier_lab_tie: true",
          "now": "verification: self-claimed, frontier_lab_tie: true",
          "reason": "The draft cited XLANG/OSWorld-Verified as third-party verification, but that page only co-acknowledges HUD ('Human Data'/hud.so) alongside OpenAI as a benchmark feedback contributor. Benchmark collaboration is not confirmation of a paid/customer relationship. No credible third-party source confirms OpenAI as a HUD customer, so downgraded to self-claimed."
        },
        {
          "field": "notable_customers (Anthropic)",
          "was": "verification: verified, frontier_lab_tie: true",
          "now": "verification: self-claimed, frontier_lab_tie: true",
          "reason": "Same as OpenAI - only co-acknowledged on the OSWorld-Verified benchmark, which does not verify a customer relationship. Downgraded to self-claimed."
        },
        {
          "field": "published_papers_or_benchmarks",
          "was": "confidence: confirmed",
          "now": "confidence: reported",
          "reason": "OSWorld-Verified page acknowledges 'Human Data' (linking hud.so), not 'HUD' verbatim, and HUD's contribution is described as feedback/fixes rather than authorship. SheetBench-50 is self-reported on HUD's own case-study page. Neither is independently confirmed, so downgraded to reported."
        },
        {
          "field": "open_roles_count",
          "was": "confidence: confirmed",
          "now": "confidence: reported",
          "reason": "Open-role counts fluctuate and are a point-in-time snapshot; not a durable confirmed fact. Downgraded to reported."
        },
        {
          "field": "current_headcount",
          "was": "source note: '~15 (as of YC profile)' confidence reported",
          "now": "value clarified to '~15 (per YC profile)' confidence reported",
          "reason": "Minor wording cleanup; confidence already appropriately 'reported' (single YC self-reported source, no LinkedIn snippet to corroborate). Kept as reported, not upgraded."
        },
        {
          "field": "license",
          "was": "confidence: reported (not independently re-verified against LICENSE file)",
          "now": "confidence: confirmed",
          "reason": "Re-fetched the GitHub repo; MIT license confirmed via the repository license field. Upgraded with primary source."
        },
        {
          "field": "last_round",
          "was": "value: unknown, confidence: unknown",
          "now": "value: 'Seed (amount undisclosed)', confidence: reported",
          "reason": "Exceptional Capital's portfolio page lists HUD at 'Seed Stage / Seed Invested', confirming a seed round exists even though the amount is undisclosed. Stage upgraded to reported; amount remains unknown."
        },
        {
          "field": "notable_investors",
          "was": "['Y Combinator (W25 batch)'], confidence: confirmed",
          "now": "['Y Combinator (W25 batch)','Exceptional Capital'], confidence: reported",
          "reason": "Exceptional Capital (a SF/Miami early-stage VC) lists HUD as a seed-stage portfolio company, indicating it is an investor. Added Exceptional Capital; overall list confidence set to reported since the Exceptional Capital tie rests on a single portfolio listing."
        },
        {
          "field": "total_raised (source note)",
          "was": "'$2M seed; $15M' conflated figures",
          "now": "added that figures conflate with an unrelated Israeli 'Hud' (runtime code sensor, Square Peg Capital, $21M); a seed round is confirmed but amount undisclosed",
          "reason": "Verified that the prominent $21M/$2M figures belong to a different same-named company (Israeli runtime code-sensor startup). Strengthened the source note; value remains unknown."
        },
        {
          "field": "positioning_summary",
          "was": "did not note former hud.so domain",
          "now": "added 'formerly hud.so' and clarified 'wraps real software/code as agent-callable tools'",
          "reason": "Confirmed hud.so is the company's earlier domain (referenced in OSWorld-Verified blog) and that the 'wrapper layer' directory note matches HUD's RL-environment wrapping model. Added context to confirm correct-company match."
        }
      ],
      "verification_summary": "Confirmed this is the correct company: HUD (hud.ai, YC W25, formerly hud.so), a platform for wrapping real software/code as agent-callable RL environments and evals for computer-use/browser agents - matching the 'wrapper layer' directory note. Disambiguated from an unrelated same-named Israeli startup ('Hud', runtime code sensor, backed by Square Peg, ~$21M) whose funding figures had leaked into search snippets; HUD's actual funding amount remains undisclosed, though a seed round is confirmed via Exceptional Capital's portfolio (added as an investor, reported). Biggest correction: OpenAI and Anthropic were downgraded from 'verified' to 'self-claimed' customers - the cited XLANG/OSWorld-Verified page only co-acknowledges HUD as a benchmark feedback contributor alongside those labs, which is collaboration, not customer verification. Benchmarks downgraded to 'reported' (OSWorld page names 'Human Data', SheetBench is self-reported). open_roles_count downgraded to reported. License upgraded to confirmed after re-fetching the MIT license field on GitHub. DoorDash/UiPath/Sharpe correctly remain self-claimed. No SOC 2/ISO found; security_page is only a DPA. Overall confidence: medium.",
      "research_notes": {}
    },
    {
      "rank": 15,
      "focus_areas_normalised": [],
      "slug": "vals-ai",
      "brand_name": "Vals AI",
      "segment": "Commercial vendors",
      "website": "https://www.vals.ai",
      "focus_areas": [
        "evaluation / benchmarks",
        "finance",
        "legal",
        "healthcare"
      ],
      "positioning_summary": "Vals AI is an independent, third-party benchmarking and evaluation platform that scores LLMs and AI applications (copilots, RAG, agents) on rigorous, domain-specific tasks in regulated fields such as legal, finance, healthcare, tax and coding. It publishes public leaderboards (e.g., the Vals Index, Finance Agent benchmark, Vals Legal AI Report) and sells private evaluation infrastructure to labs and enterprise engineering teams.",
      "best_fit_use_case": "Buyers who need neutral, domain-specific (legal/finance/healthcare) benchmarking and ongoing evaluation of LLM applications on their own data and tasks.",
      "what_they_sell": {
        "value": "evals",
        "confidence": "confirmed",
        "source": "https://www.vals.ai/product (accessed 2026-06-07); https://www.vals.ai/home (accessed 2026-06-07)"
      },
      "deployment_model": {
        "value": "managed-hosted (SaaS) with API/SDK/CLI; web app at platform.vals.ai",
        "confidence": "confirmed",
        "source": "https://www.vals.ai/product (accessed 2026-06-07)"
      },
      "maturity": {
        "value": "GA (public benchmarks/leaderboards live; private evaluation infrastructure offered, partly early access)",
        "confidence": "reported",
        "source": "https://www.vals.ai/product (accessed 2026-06-07); https://www.vals.ai/about (accessed 2026-06-07)"
      },
      "open_source": {
        "value": "yes (benchmark code/datasets published, e.g., Finance Agent benchmark; platform itself is proprietary SaaS)",
        "confidence": "confirmed",
        "source": "https://github.com/vals-ai/finance-agent (accessed 2026-06-07)"
      },
      "license": {
        "value": "MIT (finance-agent benchmark repo)",
        "confidence": "confirmed",
        "source": "https://github.com/vals-ai/finance-agent (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": "2023",
        "confidence": "reported",
        "source": "Tracxn, Grokipedia, QA-Financial founder coverage (accessed 2026-06-07); note Crunchbase/CB Insights list 2024 and first press coverage is April 2024"
      },
      "hq_location": {
        "value": "San Francisco, USA",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/vals-ai (public snippet, accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://www.vals.ai/home (accessed 2026-06-07)"
      },
      "distributed_remote": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "current_headcount": {
        "value": "~12 people (getLatka, 2025); LinkedIn lists 11-50 employees / ~19 associated profiles",
        "confidence": "reported",
        "source": "https://getlatka.com/companies/vals.ai/funding (accessed 2026-06-07); https://www.linkedin.com/company/vals-ai (accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "11-50",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/vals-ai (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "reported",
        "source": "arXiv:2508.00828 authored by Vals AI team; founders ex-Stanford AI master's; Stanford collaborations (accessed 2026-06-07)"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Co-founder/CEO Rayan Krishnan - ex-Stanford AI master's",
          "Co-founder/CTO Langston Nashold - ex-Stanford AI master's",
          "Founding engineer Rez (Reza) Havaei",
          "Collaborations with Stanford researchers and domain experts in law, finance, accounting"
        ],
        "confidence": "reported",
        "source": "https://qa-financial.com/industry-on-a-mission-the-long-road-to-uniform-ai-testing/ (accessed 2026-06-07); https://www.techtimes.com/articles/303524 (accessed 2026-06-07)"
      },
      "total_raised": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "getLatka states Vals AI is bootstrapped with $0 raised (https://getlatka.com/companies/vals.ai/funding, accessed 2026-06-07). A widely-cited '$5M seed / Sequoia, Bloomberg Beta, Pear VC, 8VC, J12' appears to be aggregator data conflated with an unrelated company (Vallor, a Miami procurement-AI startup that raised a $4M Bloomberg Beta-led seed in April 2025) and could not be confirmed."
      },
      "last_round": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No primary announcement of a Vals AI funding round found; getLatka reports bootstrapped/$0 raised (accessed 2026-06-07). The '$5M seed, July 29, 2024' figure on aggregators is unconfirmed and likely conflated with another company."
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "getLatka lists a $4M valuation as an estimate (https://getlatka.com/companies/vals.ai, accessed 2026-06-07); not corroborated by any primary or credible third-party source."
      },
      "notable_investors": {
        "value": [],
        "confidence": "unknown",
        "source": "The investor list (Sequoia Capital, Bloomberg Beta, Pear VC, 8VC, J12) circulating on aggregators could not be verified and appears conflated with an unrelated company (Vallor). No primary funding announcement found; getLatka reports the company as bootstrapped."
      },
      "revenue_signals": {
        "value": "~$1.3M revenue/ARR in 2025 (getLatka self-reported/estimated figure; not vendor-confirmed)",
        "confidence": "reported",
        "source": "https://getlatka.com/companies/vals.ai/funding (accessed 2026-06-07)"
      },
      "notable_customers": {
        "value": [
          {
            "name": "Reed Smith (law firm; VLAIR benchmarking consortium partner, not a paying customer)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Fisher Phillips (law firm; VLAIR benchmarking consortium partner, not a paying customer)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "McDermott Will & Emery (law firm; VLAIR benchmarking consortium partner, not a paying customer)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Ogletree Deakins (law firm; VLAIR benchmarking consortium partner, not a paying customer)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Harvey (legal AI vendor evaluated in VLAIR; not a stated customer)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "CoCounsel/Thomson Reuters (legal AI vendor evaluated in VLAIR; not a stated customer)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Alexi (legal AI vendor evaluated in VLAIR; not a stated customer)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          }
        ],
        "confidence": "reported",
        "source": "https://www.lawnext.com/2025/05/vals-ai-issues-open-call-for-vendors-to-participate-in-its-legal-research-and-other-legal-ai-benchmarking-studies.html (accessed 2026-06-07); https://finance.yahoo.com/news/vals-legal-ai-report-establishes-160000601.html (accessed 2026-06-07). These are benchmarking-study consortium partners / evaluated vendors, NOT confirmed paying platform customers; no verified customer relationships found."
      },
      "soc2": {
        "value": "claimed-unverified (SOC 2 badge displayed on product page; type not specified; no trust/audit-registry confirmation)",
        "confidence": "reported",
        "source": "https://www.vals.ai/product (accessed 2026-06-07)"
      },
      "other_certifications": {
        "value": [
          "GDPR (compliance badge displayed on product page; unverified)"
        ],
        "confidence": "reported",
        "source": "https://www.vals.ai/product (accessed 2026-06-07)"
      },
      "security_page": {
        "value": "unknown (no dedicated /security or /trust page found; only SOC 2 and GDPR badges on product page)",
        "confidence": "unknown",
        "source": "https://www.vals.ai/product (accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "Finance Agent Benchmark: Benchmarking LLMs on Real-world Financial Research Tasks - arXiv:2508.00828",
          "Vals Legal AI Report (VLAIR), Feb 2025 - https://www.vals.ai",
          "Vals Index / domain leaderboards (TaxEval, CorpFin, MedCode, LegalBench, CaseLaw, ContractLaw, Finance Agent) - https://www.vals.ai",
          "Finance Agent benchmark dataset on Hugging Face - https://huggingface.co/datasets/vals-ai/finance_agent_benchmark"
        ],
        "confidence": "confirmed",
        "source": "https://arxiv.org/abs/2508.00828 (accessed 2026-06-07); https://www.vals.ai/home (accessed 2026-06-07); https://huggingface.co/datasets/vals-ai/finance_agent_benchmark (accessed 2026-06-07)"
      },
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://www.vals.ai/home",
          "accessed_date": "2026-06-07",
          "note": "Official homepage: product, Vals Index, domain leaderboards (finance/legal/healthcare/coding/math)"
        },
        {
          "url": "https://www.vals.ai/about",
          "accessed_date": "2026-06-07",
          "note": "About page: positioned as independent third-party evaluator; no named team/year/location on page"
        },
        {
          "url": "https://www.vals.ai/product",
          "accessed_date": "2026-06-07",
          "note": "Product page: SaaS platform.vals.ai, SDK/CLI, CI/CD, SOC 2 + GDPR badges, sensitive-domain focus"
        },
        {
          "url": "https://www.linkedin.com/company/vals-ai",
          "accessed_date": "2026-06-07",
          "note": "Public snippet: 11-50 employees (~19 profiles), San Francisco, Software Development"
        },
        {
          "url": "https://www.crunchbase.com/organization/vals-ai",
          "accessed_date": "2026-06-07",
          "note": "Funding profile (403 on direct fetch; details via search snippet): $5M seed, July 2024"
        },
        {
          "url": "https://tracxn.com/d/companies/vals-ai/__Aeq7C2n56rLfgsWjTrbrPIwiOteKWlKhIWYPbapLuow",
          "accessed_date": "2026-06-07",
          "note": "Founders, investors, founding year"
        },
        {
          "url": "https://getlatka.com/companies/vals.ai/funding",
          "accessed_date": "2026-06-07",
          "note": "Reported $1.3M revenue, 12-person team in 2025 (self-reported/estimated, unverified)"
        },
        {
          "url": "https://qa-financial.com/industry-on-a-mission-the-long-road-to-uniform-ai-testing/",
          "accessed_date": "2026-06-07",
          "note": "Founders left Stanford AI master's; founding engineer Rez Havaei; mission detail"
        },
        {
          "url": "https://www.techtimes.com/articles/303524/20240412/standardized-ai-performance-test-tested-out-new-startup.htm",
          "accessed_date": "2026-06-07",
          "note": "Founder backgrounds, early benchmark coverage (Apr 2024)"
        },
        {
          "url": "https://www.bloomberg.com/news/newsletters/2024-04-11/this-startup-is-trying-to-test-how-well-ai-models-actually-work",
          "accessed_date": "2026-06-07",
          "note": "Bloomberg coverage of company (paywalled; headline only)"
        },
        {
          "url": "https://www.deeplearning.ai/the-batch/vals-ai-evaluates-large-language-models-on-industry-specific-tasks/",
          "accessed_date": "2026-06-07",
          "note": "Third-party coverage of legal/finance/tax benchmarks"
        },
        {
          "url": "https://www.lawnext.com/2025/05/vals-ai-issues-open-call-for-vendors-to-participate-in-its-legal-research-and-other-legal-ai-benchmarking-studies.html",
          "accessed_date": "2026-06-07",
          "note": "Law firm consortium partners (Reed Smith, Fisher Phillips, McDermott Will & Emery, Ogletree Deakins); VLAIR vendors Harvey, CoCounsel; project lead Tara Waters; Legaltech Hub partnership"
        },
        {
          "url": "https://finance.yahoo.com/news/vals-legal-ai-report-establishes-160000601.html",
          "accessed_date": "2026-06-07",
          "note": "Alexi as legal AI vendor in VLAIR"
        },
        {
          "url": "https://arxiv.org/abs/2508.00828",
          "accessed_date": "2026-06-07",
          "note": "Finance Agent Benchmark paper; authors Bigeard, Nashold, Krishnan, Wu; 537 expert-authored questions"
        },
        {
          "url": "https://huggingface.co/datasets/vals-ai/finance_agent_benchmark",
          "accessed_date": "2026-06-07",
          "note": "Finance Agent benchmark dataset"
        },
        {
          "url": "https://github.com/vals-ai/finance-agent",
          "accessed_date": "2026-06-07",
          "note": "Benchmark repo: MIT license, 141 stars, Python"
        },
        {
          "url": "https://www.legaltechnologyhub.com/vendors/vals-ai/",
          "accessed_date": "2026-06-07",
          "note": "Legaltech Hub vendor listing / VLAIR partnership"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "total_raised",
          "was": "$5M (reported)",
          "now": "unknown (unknown)",
          "reason": "The $5M figure could not be confirmed against any primary funding announcement. getLatka, the most company-specific source, states Vals AI is bootstrapped with $0 raised. The funding data on aggregators appears conflated with an unrelated company, Vallor (a Miami procurement-AI startup that raised a $4M Bloomberg Beta-led seed in April 2025). Downgraded to unknown."
        },
        {
          "field": "last_round",
          "was": "Seed, $5M, July 29, 2024 (reported)",
          "now": "unknown (unknown)",
          "reason": "No primary source confirms a Vals AI seed round; getLatka reports bootstrapped. The dated round detail is unconfirmed aggregator data likely conflated with another company."
        },
        {
          "field": "notable_investors",
          "was": "[Sequoia Capital, Bloomberg Beta, Pear VC, 8VC, J12] (reported)",
          "now": "[] (unknown)",
          "reason": "This investor list could not be verified and matches the profile of an unrelated company (Vallor's seed was co-led by Bloomberg Beta and Dynamo Ventures). No primary Vals AI funding announcement names these investors. Set to empty/unknown to avoid propagating a likely name-confusion error."
        },
        {
          "field": "valuation",
          "was": "unknown (unknown)",
          "now": "unknown (unknown), source note added",
          "reason": "Value unchanged, but added context: getLatka lists a $4M valuation as an estimate; it is not corroborated and remains unknown."
        },
        {
          "field": "notable_customers (verification)",
          "was": "all 7 entries marked verification=verified",
          "now": "all 7 entries changed to verification=self-claimed",
          "reason": "None of these are confirmed paying customers. The law firms are VLAIR benchmarking-study consortium partners and the named vendors (Harvey, CoCounsel, Alexi) are products evaluated IN the report. Third-party press confirms participation in a benchmarking study, not a customer relationship; per rules, study-participation is not a verified customer. Labels and entry descriptions updated accordingly."
        },
        {
          "field": "focus_areas",
          "was": "[evaluation / benchmarks, finance, legal, healthcare, coding environments]",
          "now": "[evaluation / benchmarks, finance, legal, healthcare]",
          "reason": "Removed 'coding environments': Vals produces coding benchmarks, not sandbox/dev coding environments. That controlled-vocab term denotes execution/dev environments and does not fit; the coding work is already covered by 'evaluation / benchmarks'."
        },
        {
          "field": "current_headcount",
          "was": "11-50 employees (LinkedIn ~19 profiles); getLatka 12-person team",
          "now": "~12 people (getLatka, 2025); LinkedIn 11-50 / ~19 profiles",
          "reason": "Reordered to lead with the most company-specific figure (~12) and clarified that 11-50 is the LinkedIn band, not a confirmed count. Confidence kept at reported."
        },
        {
          "field": "founded_year (source)",
          "was": "Bloomberg/QA Financial + Tracxn/Crunchbase; note some profiles list 2024",
          "now": "Tracxn, Grokipedia, QA-Financial; note Crunchbase/CB Insights list 2024 and first press is April 2024",
          "reason": "Value (2023) and confidence (reported) unchanged; tightened the source note to reflect the genuine 2023-vs-2024 conflict across sources."
        },
        {
          "field": "researcher_backgrounds",
          "was": "included 'ex-Stanford AI master's, formerly Palantir and Microsoft, also Oxford' for Rayan Krishnan",
          "now": "'Co-founder/CEO Rayan Krishnan - ex-Stanford AI master's'",
          "reason": "The Palantir/Microsoft/Oxford details for Krishnan could not be independently verified in the cited sources; removed the unverifiable specifics while retaining the corroborated ex-Stanford background."
        },
        {
          "field": "other_certifications",
          "was": "['GDPR (compliance badge displayed on product page)']",
          "now": "['GDPR (compliance badge displayed on product page; unverified)']",
          "reason": "Clarified that the GDPR claim is a self-displayed badge without independent verification, consistent with treating compliance badges as claimed-unverified."
        }
      ],
      "verification_summary": "Re-verified the highest-risk claims for Vals AI (vals-ai). Confirmed this is the correct company matching the directory note 'domain benchmarks for regulated work', an independent third-party LLM/AI-application benchmarking platform for legal, finance, healthcare and tax (VLAIR, Vals Index, Finance Agent benchmark arXiv:2508.00828). Biggest correction: the draft's funding profile (total_raised $5M, Seed July 2024, investors Sequoia/Bloomberg Beta/Pear/8VC/J12) appears to be aggregator data conflated with an unrelated company, Vallor (a Miami procurement-AI startup that raised a $4M Bloomberg Beta/Dynamo-led seed in April 2025). getLatka, the most company-specific source, states Vals AI is bootstrapped with $0 raised. I downgraded total_raised, last_round, valuation, and notable_investors to unknown. Notable customers were all marked 'verified' in the draft, but third-party press shows they are VLAIR benchmarking-study consortium partners (law firms) and evaluated vendors (Harvey, CoCounsel, Alexi), not confirmed paying customers, changed all to 'self-claimed'. Removed 'coding environments' from focus_areas (Vals does coding benchmarks, not dev environments). SOC 2 and GDPR remain claimed-unverified (badges only on product page; no trust page). Founded year kept at 2023 (reported) with the 2024 conflict noted. Headcount ~12 / 11-50 band kept as reported. Overall confidence: medium.",
      "research_notes": {
        "found": [
          "Clear identity match: Vals AI = independent benchmarking/evaluation platform for LLMs in regulated domains (legal, finance, healthcare, tax, coding) - matches directory note 'domain benchmarks for regulated work' and tags Finance/Legal/Evaluation.",
          "Founders: Rayan Krishnan (CEO) and Langston Nashold (CTO), both ex-Stanford AI master's; founding engineer Rez Havaei.",
          "Funding: $5M seed, ~July 2024; investors Sequoia, Bloomberg Beta, Pear VC, 8VC, J12.",
          "HQ San Francisco; LinkedIn band 11-50 (~19 profiles); getLatka reports 12-person team / $1.3M revenue 2025.",
          "Product: SaaS at platform.vals.ai with SDK/CLI/CI-CD; SOC 2 and GDPR badges on product page.",
          "Open published benchmarks: Finance Agent Benchmark (arXiv:2508.00828, MIT-licensed GitHub repo, HF dataset), Vals Legal AI Report (VLAIR), Vals Index leaderboards.",
          "Law firm consortium partners named publicly: Reed Smith, Fisher Phillips, McDermott Will & Emery, Ogletree Deakins. Legal AI vendors benchmarked: Harvey, CoCounsel, Alexi."
        ],
        "missing": [
          "Exact current headcount and headcount growth %.",
          "Open roles count (careers page returned 404 on guessed URL).",
          "Researcher count and full team backgrounds.",
          "Valuation; revenue is only a third-party self-reported estimate.",
          "SOC 2 type (I vs II) - badge shown but type unspecified; no dedicated trust/security page found.",
          "Whether named law firms/AI vendors are paying platform customers vs benchmarking-study participants."
        ],
        "conflicts": [
          "Founding year: founder bios / Bloomberg coverage imply 2023 (left Stanford to start it; first benchmarks Apr 2024); some profiles (Crunchbase/Tracxn search snippet) list 2024. Marked 2023 reported.",
          "Team size: LinkedIn shows ~19 profiles / 11-50 band; getLatka says 12-person team in 2025."
        ],
        "stale": [
          "Early benchmark coverage (TechTimes/Bloomberg) from April 2024 is >12 months old.",
          "VLAIR (Feb 2025) and the May 2025 open call are ~12-16 months old; current customer/partner roster may have changed.",
          "Funding data (July 2024) is ~23 months old; a newer round may not be reflected."
        ],
        "open_questions": [
          "Has Vals AI raised a Series A since the July 2024 seed?",
          "What is the current SOC 2 type and is there a formal trust/security page?",
          "Which organizations are paying platform customers (private evaluation infrastructure) vs public-benchmark participants?",
          "Current headcount and hiring velocity."
        ]
      }
    },
    {
      "rank": 16,
      "focus_areas_normalised": [
        "Computer Use",
        "Enterprise Workflows"
      ],
      "slug": "halluminate",
      "brand_name": "Halluminate",
      "segment": "Commercial vendors",
      "website": "https://www.halluminate.ai/",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://www.halluminate.ai/ (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": 2024,
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/halluminate (accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "San Francisco, CA, USA",
        "confidence": "confirmed",
        "source": "https://www.ycombinator.com/companies/halluminate ; https://www.linkedin.com/company/halluminate (accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "current_headcount": {
        "value": "~5-17 (YC profile lists team size 5; LinkedIn lists company size 2-10 with ~17 associated members, accessed 2026-06-07). getlatka's '12 as of 2025-09-14' is a third-party estimate and conflicts with YC/LinkedIn.",
        "confidence": "estimated",
        "source": "https://www.ycombinator.com/companies/halluminate ; https://www.linkedin.com/company/halluminate (accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "1-10",
        "confidence": "estimated",
        "source": "https://www.ycombinator.com/companies/halluminate (team size 5) ; https://www.linkedin.com/company/halluminate (2-10 employees) (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://www.paraform.com/company/halluminate/cmo0vioko000e0djsvyvu9n6m (accessed 2026-06-07), hiring 'Founding Member of Technical Staff (Research/Post-Training)'; CEO led research at Capital One Labs per YC profile"
      },
      "researcher_backgrounds": {
        "value": [
          "Jerry Wu (co-founder/CEO): ex-Capital One Labs (led product and research; launched an early AI agent in banking); Cornell CS & Economics",
          "Wyatt Marshall (co-founder): Cornell Milstein Scholar; large-scale data engineering at two early-stage NYC startups"
        ],
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/halluminate (accessed 2026-06-07)"
      },
      "what_they_sell": {
        "value": "mixed",
        "confidence": "confirmed",
        "source": "https://www.halluminate.ai/ ; https://www.ycombinator.com/companies/halluminate ; https://news.ycombinator.com/item?id=44865290 (accessed 2026-06-07), RL/sandbox environments for computer-use agents plus human/annotation data and evaluation benchmarks"
      },
      "deployment_model": {
        "value": "managed-hosted (fully managed, parallelizable sandbox environments); also bespoke/custom environments for enterprise clients",
        "confidence": "confirmed",
        "source": "https://www.ycombinator.com/companies/halluminate ; https://news.ycombinator.com/item?id=44865290 (accessed 2026-06-07)"
      },
      "maturity": {
        "value": "GA",
        "confidence": "estimated",
        "source": "https://www.ycombinator.com/companies/halluminate (accessed 2026-06-07), states paying customers, indicating commercial availability"
      },
      "open_source": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://github.com/Halluminate (accessed 2026-06-07)"
      },
      "license": {
        "value": "Mixed: WebBench (MIT), westworld (Apache-2.0), noodle-flights (MIT), browserbench (no license shown)",
        "confidence": "confirmed",
        "source": "https://github.com/Halluminate (accessed 2026-06-07)"
      },
      "total_raised": {
        "value": "undisclosed/conflicting (YC S25-backed; PitchBook reports ~$160K; some aggregators report ~$500K; getlatka claims $0/bootstrapped, figures conflict and none confirmed by an official announcement)",
        "confidence": "reported",
        "source": "https://pitchbook.com/profiles/company/616571-92 ; https://www.ycombinator.com/companies/halluminate (accessed 2026-06-07)"
      },
      "last_round": {
        "value": "Y Combinator S25 (Summer 2025); seed/pre-seed amount and date not officially disclosed",
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/halluminate (accessed 2026-06-07)"
      },
      "valuation": {
        "value": "unknown (getlatka lists a $4M estimate, but it is an unverified third-party estimate from a source with internally inconsistent data for this company)",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "Y Combinator (S25)",
          "Orange Collective",
          "Antigravity Capital",
          "Batch Ventures",
          "Team Ignite Ventures",
          "Transpose Platform Management"
        ],
        "confidence": "reported",
        "source": "https://pitchbook.com/profiles/company/616571-92 (via search summary) ; https://www.ycombinator.com/companies/halluminate ; https://www.orangecollective.vc/portfolio/halluminate ; https://antigravity.capital/portfolio/halluminate (accessed 2026-06-07)"
      },
      "revenue_signals": {
        "value": "unknown (getlatka estimates ~$1.3M ARR for 2025, but this is an unverified third-party estimate from a source with internally inconsistent data, e.g. it also claims $0 raised/bootstrapped despite YC and angel backing)",
        "confidence": "estimated",
        "source": "https://getlatka.com/companies/halluminate.ai (accessed 2026-06-07)"
      },
      "notable_customers": {
        "value": [
          {
            "name": "Leading computer-use model labs (unnamed)",
            "verification": "self-claimed",
            "frontier_lab_tie": true
          },
          {
            "name": "The two largest browser agent companies (unnamed)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Frontier labs e.g. OpenAI, Anthropic (per company copy, unnamed/unconfirmed)",
            "verification": "self-claimed",
            "frontier_lab_tie": true
          }
        ],
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/halluminate ; https://www.halluminate.ai/blog/westworld (accessed 2026-06-07), vendor's own claims only; no named, third-party-verified customers found"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "published_papers_or_benchmarks": {
        "value": [
          "WebBench, browser agent benchmark (~5,750 READ/ACTION tasks across 500+ websites): https://github.com/Halluminate/WebBench ; https://www.halluminate.ai/blog/benchmark",
          "BrowserBench, benchmark for browser infrastructure stealth (292 tasks/292 sites): https://github.com/Halluminate/browserbench ; https://www.halluminate.ai/blog/browserbench",
          "Westworld, simulated-internet web agent benchmark/RL environment: https://github.com/Halluminate/westworld ; https://www.halluminate.ai/blog/westworld"
        ],
        "confidence": "confirmed",
        "source": "https://github.com/Halluminate (accessed 2026-06-07)"
      },
      "focus_areas": [
        "computer use environments",
        "browser environments",
        "finance",
        "evaluation / benchmarks",
        "enterprise workflows"
      ],
      "positioning_summary": "Halluminate (YC S25, founded 2024, San Francisco) builds managed reinforcement-learning sandbox environments, simulated applications, and human/annotation data plus evaluation benchmarks (WebBench, BrowserBench, Westworld) to train and test computer-use and browser AI agents. Its 2026 site positioning has narrowed toward 'RL environments for financial services' (investment banking, private equity, consulting).",
      "best_fit_use_case": "Foundation-model labs and browser/computer-use agent teams needing deterministic, managed RL sandboxes plus expert eval/annotation data, increasingly for finance workflows.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://www.halluminate.ai/",
          "accessed_date": "2026-06-07",
          "note": "Official site; current positioning 'RL Environments for Financial Services'; lists Westworld, BrowserBench, Web Bench"
        },
        {
          "url": "https://www.ycombinator.com/companies/halluminate",
          "accessed_date": "2026-06-07",
          "note": "YC profile: founded 2024, team size 5 (early), HQ SF, S25 batch, founders, customer claims"
        },
        {
          "url": "https://news.ycombinator.com/item?id=44865290",
          "accessed_date": "2026-06-07",
          "note": "Launch HN: deterministic/offline sandboxes, RLVR, Westworld, human data services, ~20% customer improvement claim"
        },
        {
          "url": "https://github.com/Halluminate",
          "accessed_date": "2026-06-07",
          "note": "Org repos: WebBench (MIT, 96 stars), westworld (Apache-2.0, 17), browserbench (16), noodle-flights (MIT), sample_sft, harbor fork"
        },
        {
          "url": "https://getlatka.com/companies/halluminate.ai",
          "accessed_date": "2026-06-07",
          "note": "Reported $1.3M ARR, 12 employees (2025-09-14), $4M valuation, claims $0 raised/bootstrapped, conflicts with YC/angel backing"
        },
        {
          "url": "https://www.orangecollective.vc/portfolio/halluminate",
          "accessed_date": "2026-06-07",
          "note": "Orange Collective portfolio listing"
        },
        {
          "url": "https://antigravity.capital/portfolio/halluminate",
          "accessed_date": "2026-06-07",
          "note": "Antigravity Capital portfolio listing"
        },
        {
          "url": "https://www.halluminate.ai/blog/browserbench",
          "accessed_date": "2026-06-07",
          "note": "BrowserBench blog, Nov 22 2025; evaluates Browserbase, Steel, Hyperbrowser, Anchor Browser"
        },
        {
          "url": "https://www.halluminate.ai/blog/benchmark",
          "accessed_date": "2026-06-07",
          "note": "Web Bench: ~5,750 tasks across 500+ sites; Anthropic CUA best at 66.0%"
        },
        {
          "url": "https://www.halluminate.ai/blog/westworld",
          "accessed_date": "2026-06-07",
          "note": "Westworld blog; 'working with frontier Labs (ex. OpenAI, Anthropic)' for financial services"
        },
        {
          "url": "https://www.paraform.com/company/halluminate/cmo0vioko000e0djsvyvu9n6m",
          "accessed_date": "2026-06-07",
          "note": "Job listing: Founding Member of Technical Staff (Research/Post-Training)"
        },
        {
          "url": "https://www.linkedin.com/in/jerry-wu-7814b0100/",
          "accessed_date": "2026-06-07",
          "note": "Jerry Wu LinkedIn (public snippet), co-founder/CEO"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "current_headcount",
          "was": "12 (as of 2025-09-14), confidence reported, source getlatka",
          "now": "~5-17 range, confidence estimated, sourced to YC (team size 5) and LinkedIn (2-10 employees, ~17 associated members)",
          "reason": "The single getlatka source ('12') conflicts with YC's 'team size 5' and LinkedIn's '2-10 employees'. getlatka also carries internally inconsistent data for this company, so it cannot anchor a 'reported' headcount. Downgraded to estimated with the primary YC/LinkedIn figures."
        },
        {
          "field": "headcount_band",
          "was": "11-50, confidence reported",
          "now": "1-10, confidence estimated",
          "reason": "YC lists team size 5 and LinkedIn lists company size 2-10. The 11-50 band relied solely on the unreliable getlatka '12'. A YC S25 startup at this stage is almost certainly 1-10."
        },
        {
          "field": "valuation",
          "was": "$4M (getlatka estimate, 2025), confidence estimated",
          "now": "unknown, confidence unknown",
          "reason": "$4M is an unverified third-party estimate from getlatka, a source with demonstrably inconsistent data for this company (it also claims $0 raised). No primary/credible valuation source exists; set to unknown per downgrade-when-in-doubt rule."
        },
        {
          "field": "revenue_signals",
          "was": "~$1.3M ARR (2025, getlatka estimate), confidence reported",
          "now": "unknown/estimate caveated, confidence estimated",
          "reason": "ARR figure is a single-source getlatka estimate, not a vendor- or press-stated figure. Per rules, revenue is 'unknown' unless vendor/credible press states it; downgraded from reported to estimated and flagged as unverified."
        },
        {
          "field": "notable_investors",
          "was": "[YC, Orange Collective, Antigravity Capital]",
          "now": "added Batch Ventures, Team Ignite Ventures, Transpose Platform Management",
          "reason": "PitchBook lists 7 investors including Batch Ventures, Team Ignite Ventures, and Transpose Platform Management, which the draft omitted. Kept confidence at reported (aggregator-sourced, no official announcement)."
        },
        {
          "field": "total_raised",
          "was": "unknown, no disclosed round amount, confidence reported",
          "now": "undisclosed/conflicting; notes PitchBook ~$160K and ~$500K aggregator figures vs getlatka $0, confidence reported",
          "reason": "Newly surfaced PitchBook (~$160K) and other aggregator (~$500K) figures conflict with each other and with getlatka's bootstrapped claim. None is an official announcement; recorded the conflict explicitly rather than implying truly unknown."
        },
        {
          "field": "published_papers_or_benchmarks",
          "was": "WebBench described as '~2.5k-5.75k tasks'",
          "now": "WebBench ~5,750 tasks across 500+ sites",
          "reason": "Aligned the task count with the vendor benchmark blog (~5,750 tasks); minor accuracy fix."
        }
      ],
      "verification_summary": "Confirmed this is the correct company matching the directory note 'computer use, done deterministically': YC and LinkedIn explicitly describe Halluminate as infrastructure (managed sandboxes + data + benchmarks) for training computer-use AI agents, with offline/deterministic sandboxes referenced in the Launch HN. Founded 2024, SF, YC S25, founders Jerry Wu (ex-Capital One Labs) and Wyatt Marshall, all confirmed via YC. Open-source org and licenses (WebBench MIT, westworld Apache-2.0, noodle-flights MIT, browserbench no license) confirmed on GitHub. Key downgrades: headcount (getlatka '12' conflicts with YC team size 5 and LinkedIn 2-10; band corrected to 1-10), valuation ($4M getlatka estimate -> unknown), and revenue (~$1.3M ARR -> unverified estimate) because getlatka is a single, internally inconsistent source (it claims $0 raised despite YC/angel backing). Investor list expanded to 7 via PitchBook (added Batch Ventures, Team Ignite Ventures, Transpose Platform Management). Funding figures conflict across sources ($160K PitchBook vs ~$500K vs $0) with no official announcement, so kept at 'reported' with the conflict documented. All customer claims remain self-claimed (unnamed labs/browser-agent companies, OpenAI/Anthropic per company copy), no third-party verification found, so none upgraded to verified. SOC2/certifications/security page not found; left unknown. Overall confidence: medium.",
      "research_notes": {
        "found": [
          "Confirmed correct company: Halluminate (YC S25), computer-use/browser RL environments, deterministic sandboxes, finance tilt, matches directory note and tags.",
          "Founders Jerry Wu (ex-Capital One Labs) and Wyatt Marshall (ex-Cornell Milstein Scholar).",
          "Open-source benchmarks: WebBench, BrowserBench, Westworld with visible licenses/star counts.",
          "Products: managed/parallelizable sandbox environments, RLVR-based training, human data + evaluation services.",
          "Reported metrics: ~$1.3M ARR, 12 employees, $4M valuation (getlatka, Sept 2025).",
          "Investors: YC, Orange Collective, Antigravity Capital (portfolio listings)."
        ],
        "missing": [
          "No disclosed funding round amount or date.",
          "No SOC 2 / ISO / security or trust page found.",
          "No named, third-party-verified customers (only vendor self-claims of frontier labs and 'two largest browser agent companies').",
          "Exact researcher count and current open-roles count not confirmed.",
          "Headcount growth % not available."
        ],
        "conflicts": [
          "getlatka lists '$0 raised / bootstrapped' while YC S25 (standard YC investment) and Orange Collective + Antigravity Capital portfolio listings indicate outside/angel backing. Treated total_raised as unknown amount but investor-backed."
        ],
        "stale": [
          "getlatka headcount/revenue/valuation data is as of 2025-09-14 (>8 months old; nearing staleness).",
          "YC profile team size of 5 appears outdated vs getlatka's 12."
        ],
        "open_questions": [
          "Has Halluminate raised a priced seed round, and how much?",
          "Who are the named customers (which model labs / which two browser-agent companies)?",
          "Does the company have any security/compliance certifications?",
          "Has the company fully pivoted to finance, or does it retain general computer-use/browser offerings?"
        ]
      }
    },
    {
      "rank": 17,
      "focus_areas_normalised": [
        "Computer Use"
      ],
      "slug": "matrices",
      "brand_name": "Matrices",
      "segment": "Commercial vendors",
      "website": "https://matrices.ai",
      "status": {
        "value": "active",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/matricesapp (accessed 2026-06-07); https://theinflectionpoint.ai/p/building-a-fake-internet-for-ai-agents (accessed 2026-06-07)"
      },
      "what_they_sell": {
        "value": "environments",
        "confidence": "confirmed",
        "source": "https://matrices.ai/ (title: 'Matrices - Training Environments for LLM Agents'); https://www.linkedin.com/company/matricesapp (tagline 'Towards self-driving computers'); https://theinflectionpoint.ai/p/building-a-fake-internet-for-ai-agents (accessed 2026-06-07)"
      },
      "focus_areas": [
        "browser environments",
        "computer use environments"
      ],
      "positioning_summary": "Matrices builds reinforcement-learning training environments for frontier AI labs to train agents that use computers and browsers like humans, described as a 'gamified replica of the internet' where thousands of agents learn via RL. The company frames its mission as 'towards self-driving computers' and says it helps labs train computer-use agents (Operator-class systems). Note: this is the correct browser-native entity (matrices.ai / LinkedIn 'matricesapp'), distinct from the similarly named 'Matrice.ai' computer-vision company and 'Matrix AI Network' blockchain project.",
      "best_fit_use_case": "A frontier lab needing large-scale, realistic browser/computer-use RL environments to train and evaluate web-navigating agents.",
      "maturity": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "deployment_model": {
        "value": "managed-hosted",
        "confidence": "estimated",
        "source": "https://theinflectionpoint.ai/p/building-a-fake-internet-for-ai-agents (accessed 2026-06-07), inferred from hosted 'replica of the internet' running thousands of agents; not explicitly stated"
      },
      "open_source": {
        "value": "no",
        "confidence": "estimated",
        "source": "No public GitHub org or OSS product found as of 2026-06-07"
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "founded_year": {
        "value": 2023,
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/matricesapp ; https://tracxn.com/d/companies/matrices/ (accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "San Francisco, California, USA",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/matricesapp (accessed 2026-06-07); https://tracxn.com/d/companies/matrices/ (accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "current_headcount": {
        "value": "~19-21 employees (LinkedIn band 11-50; Tracxn '21 employees as of Apr 30, 2026')",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/matricesapp ; https://tracxn.com/d/companies/matrices/ (accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "11-50",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/matricesapp (size band 11-50); https://tracxn.com/d/companies/matrices/ (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "Note: theinflectionpoint.ai article described team as 'still just 3 people' at an earlier date; current LinkedIn/Tracxn show ~19-21, implying growth, but no quantified period available"
      },
      "open_roles_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "https://matrices.ai/careers (accessed 2026-06-07), actively hiring noted but exact count not retrievable (JS-rendered page)"
      },
      "has_researchers": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Co-founder Leonardo Axel Setyanto (Co-Founder/CTO): UT Austin; prior startup engineering (Loku), no frontier-lab pedigree found",
          "Co-founder John Qian: University of Illinois Urbana-Champaign"
        ],
        "confidence": "reported",
        "source": "https://www.linkedin.com/in/axelsetyanto/ ; https://www.linkedin.com/in/qianjohn/ (accessed 2026-06-07)"
      },
      "total_raised": {
        "value": "$5M (single-source; exact figure reported only by one third-party blog and a careers-page snippet)",
        "confidence": "reported",
        "source": "https://theinflectionpoint.ai/p/building-a-fake-internet-for-ai-agents ; matrices.ai/careers snippet (accessed 2026-06-07). Crunchbase (matrices-f9d0) confirms a Seed round but obfuscates the amount."
      },
      "last_round": {
        "value": "Seed (amount reported as $5M; date unconfirmed)",
        "confidence": "reported",
        "source": "https://www.crunchbase.com/organization/matrices-f9d0 (confirms Seed stage); https://theinflectionpoint.ai/p/building-a-fake-internet-for-ai-agents (accessed 2026-06-07)"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "Index Ventures",
          "AI Grant (Nat Friedman & Daniel Gross)",
          "Naval Ravikant"
        ],
        "confidence": "reported",
        "source": "https://www.crunchbase.com/organization/matrices-f9d0 (independently lists Index Ventures); https://theinflectionpoint.ai/p/building-a-fake-internet-for-ai-agents (Index, AI Grant, Naval) (accessed 2026-06-07). AI Grant and Naval Ravikant are single-source (blog only)."
      },
      "revenue_signals": {
        "value": "Self-claimed '7-figure contracts with multiple/top AI labs'; no verified figure",
        "confidence": "reported",
        "source": "https://theinflectionpoint.ai/p/building-a-fake-internet-for-ai-agents ; matrices.ai/careers snippet (accessed 2026-06-07), vendor self-claim, labs unnamed"
      },
      "notable_customers": {
        "value": [
          {
            "name": "Unnamed frontier AI labs (described as signing 7-figure contracts; agents like OpenAI 'Operator' referenced as the type they help train)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          }
        ],
        "confidence": "reported",
        "source": "https://matrices.ai/careers (snippet) ; https://theinflectionpoint.ai/p/building-a-fake-internet-for-ai-agents (accessed 2026-06-07), labs not named; no specific frontier lab confirmed as a customer"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "published_papers_or_benchmarks": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://matrices.ai/",
          "accessed_date": "2026-06-07",
          "note": "Official site; title 'Matrices - Training Environments for LLM Agents'. JS-rendered, body not extractable via fetch."
        },
        {
          "url": "https://matrices.ai/careers",
          "accessed_date": "2026-06-07",
          "note": "Careers description (via search snippet): computer-use environments, gamified replica of internet, Operator, 7-figure lab contracts, $5M from Index Ventures/AI Grant/Naval Ravikant."
        },
        {
          "url": "https://www.linkedin.com/company/matricesapp",
          "accessed_date": "2026-06-07",
          "note": "LinkedIn company page: 'Building training environments for frontier AI labs to train agents that use computers like us. Towards self-driving computers.' 11-50 (19) employees; founded 2023; SF HQ 200 Ritch St; website matrices.ai."
        },
        {
          "url": "https://theinflectionpoint.ai/p/building-a-fake-internet-for-ai-agents",
          "accessed_date": "2026-06-07",
          "note": "Third-party article: gamified replica of internet, RL, 7-figure contracts with leading AI labs, $5M, Index/AI Grant/Naval, team described as 3 people hiring founding engineer."
        },
        {
          "url": "https://www.crunchbase.com/organization/matrices-f9d0",
          "accessed_date": "2026-06-07",
          "note": "Crunchbase profile (403 on direct fetch); referenced via search for funding/HQ."
        },
        {
          "url": "https://www.linkedin.com/in/axelsetyanto/",
          "accessed_date": "2026-06-07",
          "note": "Co-founder Leonardo Axel Setyanto; UT Austin; prior startup Loku; describes Matrices (originally 'AI-native spreadsheet')."
        },
        {
          "url": "https://www.linkedin.com/in/qianjohn/",
          "accessed_date": "2026-06-07",
          "note": "Co-founder John Qian; UIUC; 'Building Matrices'."
        },
        {
          "url": "https://tracxn.com/d/companies/matrices/",
          "accessed_date": "2026-06-07",
          "note": "Tracxn profile: founded 2023, SF, founders John Qian & Leonardo Axel Setyanto; lists earlier spreadsheet product (likely stale)."
        },
        {
          "url": "https://rocketreach.co/matrices-management_b6cf945ec78254fc",
          "accessed_date": "2026-06-07",
          "note": "Management: John Qian (Co-Founder), Leonardo Setyanto (Co-Founder/CTO), Ramit Goolry (Founding Engineer); ~20 employees; website matrices.app."
        },
        {
          "url": "https://techcrunch.com/2025/09/21/silicon-valley-bets-big-on-environments-to-train-ai-agents/",
          "accessed_date": "2026-06-07",
          "note": "Market context on RL environments vendors; Matrices not named in this piece."
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "status.confidence",
          "was": "confirmed",
          "now": "reported",
          "reason": "'Confirmed' rested only on the LinkedIn company page. Active status is well-supported by an active site, LinkedIn, and a 2026 third-party article, but a single self-controlled page does not justify 'confirmed'. Downgraded to reported."
        },
        {
          "field": "hq_location.confidence",
          "was": "confirmed",
          "now": "reported",
          "reason": "Sourced to a single LinkedIn snippet. SF HQ is corroborated by Tracxn, but neither is a primary registry; downgraded from confirmed to reported."
        },
        {
          "field": "current_headcount.value",
          "was": "~19 employees (LinkedIn), as of 2026-06-07",
          "now": "~19-21 employees (LinkedIn band 11-50; Tracxn '21 employees as of Apr 30, 2026')",
          "reason": "Added independent Tracxn corroboration (21 as of Apr 30, 2026) and widened to a range; sanity-checked against LinkedIn 11-50 band. Not a 200+ company."
        },
        {
          "field": "headcount_growth.source",
          "was": "(empty)",
          "now": "Note added re: blog described team as 'just 3 people' at earlier date vs ~19-21 now",
          "reason": "Documented the qualitative growth signal found in the third-party article, while keeping value 'unknown' because no quantified period exists."
        },
        {
          "field": "total_raised.value/source",
          "was": "$5M (sources: careers snippet, blog)",
          "now": "$5M flagged single-source; added Crunchbase note that it confirms Seed stage but obfuscates the amount",
          "reason": "The exact $5M figure appears only in one third-party blog and a vendor careers-page snippet. Crunchbase independently confirms a Seed round but not the amount. Kept 'reported' and flagged weak sourcing."
        },
        {
          "field": "last_round.source",
          "was": "LinkedIn + blog",
          "now": "Crunchbase (confirms Seed) + blog",
          "reason": "Replaced unverifiable LinkedIn April-2024 snippet with Crunchbase, which independently confirms the Seed stage. Date remains unconfirmed."
        },
        {
          "field": "notable_investors.source",
          "was": "careers snippet + blog",
          "now": "Crunchbase (independently lists Index Ventures) + blog; flagged AI Grant and Naval as single-source",
          "reason": "Crunchbase corroborates only Index Ventures. AI Grant and Naval Ravikant rest solely on the single blog; flagged accordingly while keeping 'reported'."
        },
        {
          "field": "notable_customers.value[0].frontier_lab_tie",
          "was": "true",
          "now": "false",
          "reason": "The labs are explicitly unnamed and the relationship is entirely self-claimed; no specific frontier lab is confirmed as a customer. A frontier-lab tie should not be asserted as true on a self-claim with no named party. The customer entry remains self-claimed/reported."
        }
      ],
      "verification_summary": "Confirmed the draft researched the CORRECT entity: Matrices (matrices.ai, LinkedIn 'matricesapp', SF, founders John Qian & Leonardo Axel Setyanto), a browser/computer-use RL-environments vendor with tagline 'Towards self-driving computers', matching the directory note 'browser native'. Critically distinguished it from two same-named distractors that surfaced in search: 'Matrice.ai' (computer-vision, Voltage Park-led seed) and 'Matrix AI Network' (blockchain), neither is this company. Funding: $5M Seed amount is single-sourced (one blog + careers snippet); Crunchbase (matrices-f9d0) independently confirms a Seed round and Index Ventures but obfuscates the amount and does not corroborate AI Grant or Naval Ravikant, kept all funding fields at 'reported' and flagged the weak sourcing. Headcount ~19-21 (LinkedIn 11-50 band; Tracxn 21 as of Apr 30 2026) is plausible for an early-stage startup; downgraded over-stated 'confirmed' confidences on status and HQ to 'reported'. Set notable_customers frontier_lab_tie to false because the labs are unnamed and the claim is purely self-asserted. SOC2/certs/security page remain unknown (no trust page found). Overall confidence: medium.",
      "research_notes": {}
    },
    {
      "rank": 18,
      "focus_areas_normalised": [
        "Coding",
        "Computer Use",
        "Enterprise Workflows"
      ],
      "slug": "benchflow",
      "brand_name": "BenchFlow",
      "segment": "Commercial vendors",
      "website": "https://www.benchflow.ai/",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://www.benchflow.ai/ (accessed 2026-06-07); https://github.com/benchflow-ai/benchflow (accessed 2026-06-07), release 0.5.2 dated 2026-06-05"
      },
      "founded_year": {
        "value": 2024,
        "confidence": "reported",
        "source": "https://startupintros.com/orgs/benchflow (accessed 2026-06-07); https://www.linkedin.com/company/benchflow-ai (accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "New Castle, DE, USA (incorporation); Bay Area / San Francisco operating presence",
        "confidence": "reported",
        "source": "https://startupintros.com/orgs/benchflow (accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "distributed_remote": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "what_they_sell": {
        "value": "environments",
        "confidence": "confirmed",
        "source": "https://www.benchflow.ai/ (accessed 2026-06-07), 'A frontier environment lab for AI agents'; environments + evaluation infrastructure (SkillsBench, ClawsBench, runtime)"
      },
      "deployment_model": {
        "value": "API + self-hosted (open-source runtime; sandboxes via Docker/Daytona/Modal); Benchmark Hub managed-hosted",
        "confidence": "reported",
        "source": "https://github.com/benchflow-ai/benchflow (accessed 2026-06-07); https://docs.benchflow.ai/introduction (accessed 2026-06-07); https://news.ycombinator.com/item?id=43440893"
      },
      "maturity": {
        "value": "research preview / early-stage (open-source runtime and Benchmark Hub live and actively released; RFT framework still in development; pre-Series-A YC startup)",
        "confidence": "estimated",
        "source": "https://github.com/benchflow-ai/benchflow (accessed 2026-06-07), release 0.5.2 on 2026-06-05; https://docs.benchflow.ai/introduction (accessed 2026-06-07)"
      },
      "open_source": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://github.com/benchflow-ai/benchflow (accessed 2026-06-07)"
      },
      "license": {
        "value": "Apache-2.0",
        "confidence": "confirmed",
        "source": "https://github.com/benchflow-ai/benchflow (accessed 2026-06-07)"
      },
      "current_headcount": {
        "value": "1-10 employees (LinkedIn shows 2-10 band; ~3 identified on LinkedIn as of 2026-06-07)",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/benchflow-ai (accessed 2026-06-07); https://startupintros.com/orgs/benchflow (accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "1-10",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/benchflow-ai (accessed 2026-06-07); https://startupintros.com/orgs/benchflow (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://www.benchflow.ai/ (accessed 2026-06-07); SkillsBench has an arXiv paper (arXiv:2602.12670) authored under BenchFlow, eval/research-oriented work"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Xiangyi Li (founder/CEO), creator of SkillsBench; prior engineering roles per founder interview",
          "Moritz Wallawitsch, early co-founder, reported departure ~Feb 2025"
        ],
        "confidence": "reported",
        "source": "https://www.inverse.com/tech/building-ais-testing-ground-benchflows-mission-as-explained-by-xiangyi-li (accessed 2026-06-07); https://startupintros.com/orgs/benchflow (accessed 2026-06-07); https://www.linkedin.com/in/l1xiangyi/ (accessed 2026-06-07)"
      },
      "total_raised": {
        "value": "$1.0M",
        "confidence": "reported",
        "source": "https://startupintros.com/orgs/benchflow (accessed 2026-06-07), single aggregator; no primary press release or official funding announcement located; Crunchbase/PitchBook not directly accessible"
      },
      "last_round": {
        "value": "Seed, $1M, January 2025",
        "confidence": "reported",
        "source": "https://startupintros.com/orgs/benchflow (accessed 2026-06-07), single aggregator; not independently confirmed by primary announcement"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "Y Combinator",
          "Pear VC",
          "Construct Capital",
          "FAST by GETTYLAB",
          "Ankit Jain (angel)"
        ],
        "confidence": "reported",
        "source": "https://startupintros.com/orgs/benchflow (accessed 2026-06-07), single aggregator; not confirmed against an investor's own portfolio page or primary announcement"
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_customers": {
        "value": [],
        "confidence": "unknown",
        "source": "No named customers on the official site or any credible third party. Search-summary claim that BenchFlow 'testing infrastructure was featured during the launch of Google's Gemini model' appears only in AI-generated summaries with no primary source and is NOT verified; official site references Gemini only as a supported model, not a partner/customer."
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No SOC2 mention or trust page found on official site (accessed 2026-06-07)"
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No dedicated trust/security page found (https://www.benchflow.ai/ accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "SkillsBench, 'Benchmarking How Well Agent Skills Work Across Diverse Tasks' (arXiv:2602.12670); 86 tasks across 11 domains with curated Skills and deterministic verifiers",
          "ClawsBench (mock workplace environments: Gmail, Calendar, Drive, Docs, Slack)",
          "Benchmark Hub (community ports incl. OS-World, WebArena)"
        ],
        "confidence": "confirmed",
        "source": "https://www.benchflow.ai/ (accessed 2026-06-07); https://github.com/benchflow-ai/skillsbench (accessed 2026-06-07); https://arxiv.org/abs/2602.12670 (accessed 2026-06-07)"
      },
      "focus_areas": [
        "evaluation / benchmarks",
        "execution infrastructure",
        "enterprise workflows",
        "computer use environments",
        "browser environments",
        "coding environments"
      ],
      "positioning_summary": "BenchFlow is an early-stage, YC-backed open-source 'environment lab' building evaluation infrastructure and a community Benchmark Hub for AI agents, with products including SkillsBench, ClawsBench (mock workplace environments) and a sandboxed agent runtime. It positions environments as 'the new data' for training and evaluating agents across domains like enterprise workflows, coding, computer use and browser tasks.",
      "best_fit_use_case": "Teams needing open-source, reproducible agent evaluation environments and a runtime to benchmark coding/computer-use/workplace agents at low setup cost.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://www.benchflow.ai/",
          "accessed_date": "2026-06-07",
          "note": "Official site, 'frontier environment lab for AI agents'; products SkillsBench, ClawsBench, Runtime"
        },
        {
          "url": "https://docs.benchflow.ai/introduction",
          "accessed_date": "2026-06-07",
          "note": "Official docs, Benchmark Hub + eval infra; RFT framework in development"
        },
        {
          "url": "https://github.com/benchflow-ai/benchflow",
          "accessed_date": "2026-06-07",
          "note": "Main repo, Apache-2.0, ~249 stars, release 0.5.2 (2026-06-05); RL environments framework, sandboxes via Docker/Daytona/Modal"
        },
        {
          "url": "https://github.com/benchflow-ai/skillsbench",
          "accessed_date": "2026-06-07",
          "note": "SkillsBench repo"
        },
        {
          "url": "https://startupintros.com/orgs/benchflow",
          "accessed_date": "2026-06-07",
          "note": "Founded 2024, New Castle DE, $1M seed Jan 2025, investors (YC, Pear, Construct, FAST/GETTYLAB, Ankit Jain), 1-10 employees, co-founder departure"
        },
        {
          "url": "https://www.linkedin.com/company/benchflow-ai",
          "accessed_date": "2026-06-07",
          "note": "Public snippet, 2-10 employees, founded 2024, Software Development; specialties 'Data and environments for agents to learn'"
        },
        {
          "url": "https://www.linkedin.com/in/l1xiangyi/",
          "accessed_date": "2026-06-07",
          "note": "Founder Xiangyi Li, creator of SkillsBench"
        },
        {
          "url": "https://www.inverse.com/tech/building-ais-testing-ground-benchflows-mission-as-explained-by-xiangyi-li",
          "accessed_date": "2026-06-07",
          "note": "Founder interview, founding story, background, no funding/customers named"
        },
        {
          "url": "https://news.ycombinator.com/item?id=43440893",
          "accessed_date": "2026-06-07",
          "note": "Show HN: BenchFlow – run AI benchmarks as an API"
        },
        {
          "url": "https://www.crunchbase.com/organization/benchflow",
          "accessed_date": "2026-06-07",
          "note": "Crunchbase profile, returned HTTP 403 (not directly accessible)"
        },
        {
          "url": "https://pitchbook.com/profiles/company/711737-02",
          "accessed_date": "2026-06-07",
          "note": "PitchBook profile (not fetched)"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "maturity",
          "was": "GA (open-source, actively released; runtime/Benchmark Hub live, RFT framework in development)",
          "now": "research preview / early-stage (open-source runtime and Benchmark Hub live and actively released; RFT framework still in development; pre-Series-A YC startup)",
          "reason": "'GA' overreaches for a pre-Series-A YC startup whose own draft notes the RFT framework is still in development. From the controlled set (research preview/private beta/GA/unknown), an early-stage lab with a live-but-evolving OSS runtime is best described as research preview. Confidence kept 'estimated'."
        },
        {
          "field": "has_researchers",
          "was": "confidence: estimated",
          "now": "confidence: reported",
          "reason": "Upgraded one notch because SkillsBench is now published on arXiv (arXiv:2602.12670) under BenchFlow, providing a concrete primary artifact of research output rather than pure inference."
        },
        {
          "field": "published_papers_or_benchmarks",
          "was": "SkillsBench (benchmark...~84-86 tasks across 11 domains); source: site/github/hub.benchflow.ai",
          "now": "SkillsBench described via its arXiv paper (arXiv:2602.12670, 86 tasks/11 domains); source updated to include arXiv",
          "reason": "Located the primary peer-style source (arXiv:2602.12670); replaced the unverifiable hub.benchflow.ai citation with the arXiv link and harmonized task count to the paper's 86. Confidence remains 'confirmed'."
        },
        {
          "field": "total_raised",
          "was": "source: startupintros (reported)",
          "now": "source annotated as single aggregator; no primary announcement found",
          "reason": "Value/confidence unchanged ('reported') but flagged that the only source is one aggregator (startupintros); Crunchbase/PitchBook not accessible and no official press release located, so it should not be upgraded."
        },
        {
          "field": "last_round",
          "was": "source: startupintros (reported)",
          "now": "source annotated as single aggregator, not independently confirmed",
          "reason": "Same single-source limitation; kept 'reported' and made the weakness explicit per anti-overreach rules."
        },
        {
          "field": "notable_investors",
          "was": "source: startupintros (reported)",
          "now": "source annotated as single aggregator, not confirmed against investor portfolio pages",
          "reason": "Investor list traces only to the same aggregator; kept 'reported' and noted lack of corroboration from any investor's own site."
        },
        {
          "field": "notable_customers",
          "was": "source mentions 'unverified claim of involvement during a Google Gemini launch'",
          "now": "explicitly states the Gemini-launch claim is from AI-generated summaries only, has no primary source, and the official site references Gemini only as a supported model",
          "reason": "Strengthened the disclaimer after confirming the Gemini-launch claim recurs only in AI search summaries and is contradicted by the official site (Gemini listed as a supported model, not a partner/customer). Remains unknown/empty."
        },
        {
          "field": "soc2",
          "was": "source: ''",
          "now": "source: noted no SOC2/trust page on official site",
          "reason": "Documented the negative finding from re-checking the official site; value stays 'unknown'."
        },
        {
          "field": "security_page",
          "was": "source: ''",
          "now": "source: noted no trust/security page found",
          "reason": "Documented the negative finding; value stays 'unknown'."
        },
        {
          "field": "hq_location",
          "was": "source: startupintros + benchflow.ai",
          "now": "source: startupintros only",
          "reason": "Removed benchflow.ai from the source list as the homepage does not state HQ location; only the aggregator does. Value/confidence ('reported') unchanged."
        },
        {
          "field": "current_headcount / headcount_band",
          "was": "value '2-10 employees (3 identified...)' / band 1-10",
          "now": "reconciled to '1-10' framing noting LinkedIn 2-10 band and startupintros 1-10",
          "reason": "Both sources corroborate a sub-10 headcount (a 200+ figure would have been a red flag; none present). Minor wording cleanup for consistency; confidence stays 'reported'."
        }
      ],
      "verification_summary": "Re-verified BenchFlow independently. Company identity is CORRECT and matches the directory note 'dev workflow benchmarking', BenchFlow (benchflow-ai) is an early-stage YC-backed open-source 'environment lab' for AI-agent benchmarking/evaluation founded by Xiangyi Li; not a same-named unrelated entity. Confirmed via official site, GitHub (Apache-2.0, 249 stars, release 0.5.2 on 2026-06-05), and the SkillsBench arXiv paper (2602.12670). Funding ($1M seed, Jan 2025; YC/Pear/Construct/FAST-GETTYLAB/Ankit Jain) traces to a SINGLE aggregator (startupintros) with no primary press release; Crunchbase/PitchBook were inaccessible, kept all funding fields at 'reported' and flagged the single-source weakness rather than upgrading. Headcount 1-10 corroborated by LinkedIn and the aggregator (no inflated 200+ figure). No named customers exist on the official site or any third party; the 'featured during Google Gemini launch' claim appears only in AI-generated search summaries with no primary source and is explicitly marked unverified (the site lists Gemini merely as a supported model). No SOC2, certifications, or trust/security page found, all left 'unknown'. Main correction: downgraded maturity from 'GA' to 'research preview/early-stage' as GA overreaches for a pre-Series-A startup with an in-development RFT framework; modest upgrade of has_researchers to 'reported' given the arXiv publication. Overall confidence: medium.",
      "research_notes": {
        "found": [
          "Company identity confirmed: BenchFlow (benchflow.ai), open-source environment/eval lab for AI agents, matches directory note 'dev workflow benchmarking' + Code/Enterprise tags",
          "Founded 2024 by Xiangyi Li; YC-backed; $1M seed (Jan 2025) from Pear VC, Construct Capital, FAST/GETTYLAB, Ankit Jain",
          "Products: SkillsBench, ClawsBench (mock workplace envs), BenchFlow Runtime, Benchmark Hub ('Hugging Face for benchmarks')",
          "Open source, Apache-2.0, ~249 stars, actively maintained (release 0.5.2 on 2026-06-05)",
          "Headcount 1-10 (2-10 on LinkedIn, ~3 people identified)"
        ],
        "missing": [
          "Named/verified customers",
          "SOC 2 / ISO / security or trust page",
          "Valuation, revenue signals, headcount growth, open-roles count",
          "Researcher count and detailed backgrounds beyond founder",
          "Confirmed HQ city (DE incorporation vs Bay Area operations)"
        ],
        "conflicts": [
          "HQ: startupintros lists New Castle, DE; some search summaries say San Francisco/San Jose; LinkedIn does not specify city",
          "Headcount: LinkedIn '2-10' vs one search summary '2 employees'",
          "SkillsBench task count varies across sources (84 vs 86) and one summary dates it 'March 2025' while official site implies later, treat task counts as Tier B / not load-bearing",
          "Founding/launch framing: 'emerged 2024' / 'launched Sept 2024' vs incorporation timing"
        ],
        "stale": [
          "$1M seed dated January 2025 (>12 months old as of 2026-06-07), funding status may have changed"
        ],
        "open_questions": [
          "Is BenchFlow officially a YC batch company (listed as investor everywhere, but not confirmed on ycombinator.com/companies directory)?",
          "Unverified claim that BenchFlow infra was 'featured during a Google Gemini launch', potential frontier-lab (Google DeepMind) tie, NOT confirmed by primary source; treat as self-claimed/unverified",
          "Whether any commercial/managed offering exists beyond the open-source runtime and hosted Benchmark Hub",
          "Deployment specifics and whether enterprise SLAs/security posture exist"
        ]
      }
    },
    {
      "rank": 19,
      "focus_areas_normalised": [
        "Coding",
        "Computer Use",
        "Enterprise Workflows",
        "Long-Horizon"
      ],
      "slug": "collinear",
      "brand_name": "Collinear",
      "segment": "Commercial vendors",
      "website": "https://www.collinear.ai/",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://www.collinear.ai/ (accessed 2026-06-07)"
      },
      "focus_areas": [
        "enterprise workflows",
        "long-horizon / general reasoning",
        "coding environments",
        "computer use environments",
        "evaluation / benchmarks",
        "execution infrastructure"
      ],
      "what_they_sell": {
        "value": "environments",
        "confidence": "confirmed",
        "source": "https://www.collinear.ai/ ; https://blog.collinear.ai/p/rl-env-as-a-service (accessed 2026-06-07)"
      },
      "positioning_summary": "Collinear AI operates a 'Simulation Lab' (SimLab) that builds sandboxed, stateful RL environments simulating enterprise users, tools (Jira, ServiceNow, Shopify, EMR, airline/hotel systems) and multi-step workflows, producing training-ready trajectories, reward signals and evals for agentic models. It also offers synthetic post-training data and LLM-judge evaluation, positioning itself around 'environment-as-a-service' for enterprise long-horizon agents.",
      "best_fit_use_case": "Buyers training or evaluating enterprise agents that need realistic, stateful long-horizon simulated workflows (IT support, customer service, finance, HR) with verifiable rewards.",
      "maturity": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "Product pages and blog do not state beta/GA status (accessed 2026-06-07)"
      },
      "deployment_model": {
        "value": "managed-hosted / API (environment endpoints)",
        "confidence": "reported",
        "source": "https://blog.collinear.ai/p/rl-env-as-a-service - 'point your trainer and sampler at Collinear's environment endpoints' (accessed 2026-06-07)"
      },
      "open_source": {
        "value": "no",
        "confidence": "estimated",
        "source": "No public OSS repos surfaced; product is a hosted simulation platform (accessed 2026-06-07)"
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "founded_year": {
        "value": "2023",
        "confidence": "reported",
        "source": "https://www.technologyreview.com/innovator/nazneen-rajani/ and https://www.innovatorsunder35.com/the-list/nazneen-rajani/ (founded after leaving Hugging Face); https://www.crunchbase.com/person/nazneen-rajani-9727 (CEO since Jan 2023); Tracxn (2023). LinkedIn/prospeo list 2024, conflict; 2023 better supported (accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "Mountain View / Sunnyvale, California, USA",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/collinearai (HQ Mountain View); prospeo lists Mountain View CA; some databases list San Francisco (accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "current_headcount": {
        "value": "~16-23 (About page lists 16 team members; Tracxn ~23 as of 2026-04-30; Crunchbase 1-10; LinkedIn band 11-50; prospeo 11-20)",
        "confidence": "reported",
        "source": "https://www.collinear.ai/about-us (16 named); https://tracxn.com/d/companies/collinearai (~23, 2026-04-30); https://www.crunchbase.com/organization/collinear-ai (1-10); https://www.linkedin.com/company/collinearai (11-50) (accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "11-50",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/collinearai (11-50); corroborated by About page (16 members) and Tracxn (~23); note Crunchbase lists 1-10 (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": 10,
        "confidence": "confirmed",
        "source": "https://www.collinear.ai/careers - 10 roles (Head of People and Talent, Technical Product Lead, ML SWE, Senior Backend SWE, Research Scientist/Engineer, Research Internship, Marketing Lead, Head of Research, Engineering Leader, Account Executive) (accessed 2026-06-07)"
      },
      "distributed_remote": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "https://www.collinear.ai/careers states no remote/distributed policy; physical CA office implied (accessed 2026-06-07)"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://www.collinear.ai/careers (Research Scientist/Engineer, Head of Research roles); https://www.collinear.ai/about-us (team from Hugging Face, Salesforce, Google, Amazon, Stanford) (accessed 2026-06-07)"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "About page lists 16 team members but does not split research vs engineering (accessed 2026-06-07)"
      },
      "researcher_backgrounds": {
        "value": [
          "Founder/CEO Nazneen Rajani: ex-Robustness Research Lead at Hugging Face, ex-Research Scientist at Salesforce, PhD University of Texas at Austin (MIT TR Innovators Under 35)",
          "Team described as researchers/engineers from Hugging Face, Salesforce, Google, Amazon, Stanford (per company About page)"
        ],
        "confidence": "reported",
        "source": "https://www.collinear.ai/about-us ; https://www.technologyreview.com/innovator/nazneen-rajani/ ; https://www.linkedin.com/in/nazneenrajani (accessed 2026-06-07)"
      },
      "total_raised": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No disclosed funding amount in any source. Investors named on About page but Tracxn/Crunchbase/prospeo list company as 'unfunded' / no disclosed round (accessed 2026-06-07)"
      },
      "last_round": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No round stage/amount/date found in public sources (accessed 2026-06-07)"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "prospeo lists ~$5.3M but explicitly labels it an industry-average estimate, not a reported figure (accessed 2026-06-07)"
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "Engineering Capital",
          "Firestreak Ventures",
          "112 Capital (11.2 Capital)"
        ],
        "confidence": "reported",
        "source": "https://www.collinear.ai/about-us (names Engineering Capital, Firestreak, 112 Capital). PitchBook search snippet additionally lists Khasm Labs and ISV Startup Springboard, single weak source, not included (accessed 2026-06-07)"
      },
      "notable_customers": {
        "value": [
          {
            "name": "Amazon",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "ServiceNow",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Kore.ai",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Matillion",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "MasterClass",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Zoho",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "HUMAIN",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Commonwealth Bank",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "LaHaus",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "ParseAI",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          }
        ],
        "confidence": "reported",
        "source": "https://www.collinear.ai/ (customer logos); https://www.collinear.ai/case-studies (Kore.ai and ServiceNow case studies), all published by the vendor, hence self-claimed (accessed 2026-06-07)"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No trust/security page found; /security returned 404 (accessed 2026-06-07)"
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "https://www.collinear.ai/security returned 404 (accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [],
        "confidence": "unknown",
        "source": "Company blog publishes thought-leadership posts; no formal paper or named public benchmark confirmed (accessed 2026-06-07)"
      },
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://www.collinear.ai/",
          "accessed_date": "2026-06-07",
          "note": "Official homepage: SimLab products, customer logos, claimed metrics (100+ domains, 500B+ tokens, 40%+ improvement)"
        },
        {
          "url": "https://www.collinear.ai/about-us",
          "accessed_date": "2026-06-07",
          "note": "Team list (16 members), investors (Engineering Capital, Firestreak, 112 Capital), advisors James Zou & Ritesh Agarwal"
        },
        {
          "url": "https://www.collinear.ai/careers",
          "accessed_date": "2026-06-07",
          "note": "10 open roles incl. Head of Research, Research Scientist/Engineer; relocation bonus mention"
        },
        {
          "url": "https://blog.collinear.ai/p/rl-env-as-a-service",
          "accessed_date": "2026-06-07",
          "note": "Environment-as-a-service positioning; API endpoints; domains (product dev, SWE, computer use, web)"
        },
        {
          "url": "https://www.linkedin.com/company/collinearai",
          "accessed_date": "2026-06-07",
          "note": "Public page: 11-50 employees (~28), founded 2024, HQ Mountain View / Sunnyvale CA, Software Development"
        },
        {
          "url": "https://tracxn.com/d/companies/collinearai/__afhJh0xW8gZpWYbw5rCoeqG-KD4fTP3wJr3zxaPZ6Y0",
          "accessed_date": "2026-06-07",
          "note": "Founded 2023, ~18-23 employees (Jan-Apr 2026), lists as unfunded, conflicts with investor lists"
        },
        {
          "url": "https://pitchbook.com/profiles/company/550057-87",
          "accessed_date": "2026-06-07",
          "note": "Investors: Khasm Labs, 11.2 Capital, Engineering Capital, Firestreak Ventures, ISV Startup Springboard (via search snippet)"
        },
        {
          "url": "https://www.linkedin.com/in/nazneenrajani",
          "accessed_date": "2026-06-07",
          "note": "Founder/CEO background: ex-Hugging Face Research Lead, ex-Salesforce, PhD UT Austin"
        },
        {
          "url": "https://www.crunchbase.com/organization/collinear-ai",
          "accessed_date": "2026-06-07",
          "note": "Crunchbase profile exists (HTTP 403, not directly accessible)"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "current_headcount",
          "was": "11-50 (LinkedIn band); ~18-28 reported across databases",
          "now": "~16-23 (About page 16; Tracxn ~23; Crunchbase 1-10; LinkedIn 11-50; prospeo 11-20)",
          "reason": "The '~28 employees' figure was not supported by any reviewed source. About page lists 16 named members, Tracxn ~23 (2026-04-30), and Crunchbase actually lists 1-10. Replaced with the triangulated, sourced range and surfaced the source conflict."
        },
        {
          "field": "headcount_band",
          "was": "11-50 (confidence reported)",
          "now": "11-50 (confidence reported, with note that Crunchbase lists 1-10)",
          "reason": "Band retained as best estimate (16 named members + Tracxn ~23 support 11-50), but added that Crunchbase reports 1-10 to flag the conflict."
        },
        {
          "field": "founded_year",
          "was": "2023 (source cited Tracxn + LinkedIn conflict)",
          "now": "2023 (strengthened sourcing)",
          "reason": "Same value but added stronger primary-ish corroboration (MIT Tech Review Innovators Under 35, Crunchbase person profile showing CEO since Jan 2023) so the 2023 vs 2024 conflict is resolved in favor of 2023."
        },
        {
          "field": "notable_customers (Amazon)",
          "was": "name 'Amazon (AGI Labs)', frontier_lab_tie true",
          "now": "name 'Amazon', frontier_lab_tie false",
          "reason": "The homepage shows only an 'Amazon' logo. The 'AGI Labs' qualifier and a frontier-lab tie could not be verified; downgraded the name and set frontier_lab_tie to false to avoid an unverified frontier-lab claim."
        },
        {
          "field": "notable_investors",
          "was": "Engineering Capital, Firestreak Ventures, 112 Capital (with PitchBook adding Khasm Labs and ISV Startup Springboard in source note)",
          "now": "Engineering Capital, Firestreak Ventures, 112 Capital only",
          "reason": "Only the three on the company's About page are corroborated. Khasm Labs and ISV Startup Springboard appear solely in a PitchBook search snippet (single weak source) and were removed from the value list, noted in source."
        },
        {
          "field": "valuation",
          "was": "unknown (empty source)",
          "now": "unknown (source notes prospeo $5.3M is an estimate)",
          "reason": "Documented that the only valuation figure found (prospeo ~$5.3M) is an explicit industry-average estimate, not a reported figure, so 'unknown' is retained with justification."
        },
        {
          "field": "total_raised",
          "was": "unknown (source noted Tracxn unfunded vs investors)",
          "now": "unknown (clarified)",
          "reason": "Re-confirmed no disclosed amount exists; Tracxn/Crunchbase/prospeo all show no funded round while About page names investors, consistent with an undisclosed pre-seed/seed. Value unchanged, source clarified."
        }
      ],
      "verification_summary": "Re-verified the high-risk fields against the company's own pages (homepage, about-us, careers, case-studies) plus Tracxn, Crunchbase snippets, PitchBook snippet, prospeo, and MIT Tech Review. This is the correct company: Collinear AI (Nazneen Rajani, ex-Hugging Face), whose SimLab builds sandboxed enterprise long-horizon RL environments, matches the directory note. Funding fields confirmed correct as 'unknown' (no disclosed amount/round/valuation anywhere; the only valuation figure is an explicit estimate). Investor list trimmed to the three corroborated on the About page. Headcount draft overreached ('~28'); replaced with a sourced ~16-23 range and flagged that Crunchbase lists 1-10. Founding year resolved to 2023 with stronger sourcing. All customers are self-claimed (vendor site only); removed the unverified 'Amazon (AGI Labs)' frontier-lab tie. SOC2/security remain unknown (no trust page; /security 404). Overall confidence: medium.",
      "research_notes": {
        "found": [
          "Confirmed correct company: Collinear AI, enterprise RL environments / Simulation Lab, founder Nazneen Rajani (ex-Hugging Face)",
          "Products: SimLab for agent hillclimbing (RL training data/rewards), evaluation, user research; LLM judges; synthetic post-training data",
          "Deployment via API/environment endpoints (environment-as-a-service)",
          "10 open roles confirmed on careers page",
          "Self-claimed enterprise customers incl. Amazon AGI Labs, ServiceNow, Kore.ai",
          "Named investors: Engineering Capital, Firestreak, 112 Capital (per own About page)"
        ],
        "missing": [
          "Exact total funding raised, round stage/amount/date, valuation",
          "SOC 2 / ISO / security certifications (no trust page found; /security 404)",
          "Researcher headcount split",
          "Formal published papers or named public benchmarks",
          "Confirmed maturity stage (beta vs GA)"
        ],
        "conflicts": [
          "Founded year: Tracxn 2023 vs LinkedIn 2024",
          "HQ: LinkedIn Mountain View/Sunnyvale vs some databases San Francisco",
          "Funding status: Tracxn lists 'unfunded' while About page and PitchBook name multiple investors",
          "Headcount: LinkedIn ~28 vs Tracxn ~18-23"
        ],
        "stale": [
          "Some funding-database snapshots ('unfunded') appear outdated vs investor lists"
        ],
        "open_questions": [
          "What is the actual disclosed funding total and round? Investors are named but no amount/date is public.",
          "Are any customer relationships (e.g., Amazon AGI Labs, ServiceNow) independently verified beyond vendor case studies?",
          "Does Collinear hold any security/compliance certifications?"
        ]
      }
    },
    {
      "rank": 20,
      "focus_areas_normalised": [
        "Coding",
        "Computer Use",
        "Private Codebases"
      ],
      "slug": "refresh",
      "brand_name": "Refresh",
      "segment": "Commercial vendors",
      "website": "https://www.refresh.dev",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://www.refresh.dev/ accessed 2026-06-07; https://www.ycombinator.com/companies/refresh accessed 2026-06-07"
      },
      "founded_year": {
        "value": 2025,
        "confidence": "confirmed",
        "source": "https://www.ycombinator.com/companies/refresh accessed 2026-06-07 (YC Spring 2025 / X25, founded 2025); https://www.linkedin.com/company/refresh-dot-dev accessed 2026-06-07"
      },
      "hq_location": {
        "value": "San Francisco, CA, USA",
        "confidence": "confirmed",
        "source": "https://www.ycombinator.com/companies/refresh accessed 2026-06-07; https://www.linkedin.com/company/refresh-dot-dev accessed 2026-06-07"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "current_headcount": {
        "value": "~8 employees (as of 2026-06-07)",
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/refresh accessed 2026-06-07 (lists 8 employees); https://www.linkedin.com/company/refresh-dot-dev accessed 2026-06-07 (public snippet ~7)"
      },
      "headcount_band": {
        "value": "1-10",
        "confidence": "confirmed",
        "source": "https://www.ycombinator.com/companies/refresh accessed 2026-06-07 (8 employees); https://www.linkedin.com/company/refresh-dot-dev accessed 2026-06-07"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": 3,
        "confidence": "reported",
        "source": "https://www.refresh.dev/careers accessed 2026-06-07 (3 SF full-time roles listed)"
      },
      "distributed_remote": {
        "value": "no",
        "confidence": "estimated",
        "source": "https://www.refresh.dev/careers accessed 2026-06-07 (roles listed San Francisco, in-person)"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://www.ycombinator.com/launches/Ncy-refresh-turning-real-work-into-rl-training-grounds accessed 2026-06-07 (mechanistic interpretability work); https://www.refresh.dev/careers accessed 2026-06-07 (Research Engineer, Benchmarking role)"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Christopher Settles (CEO), ex-Uber AI ML tech lead; CS degree from UIUC",
          "Erik Quintanilla (CTO), ex-Capital One, ex-Amazon (computer vision / data scraping)",
          "Team described as ex-Uber ML and ex-Amazon scraping; reportedly turned down Scale AI offers"
        ],
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/refresh accessed 2026-06-07; https://www.ycombinator.com/launches/Ncy-refresh-turning-real-work-into-rl-training-grounds accessed 2026-06-07"
      },
      "what_they_sell": {
        "value": "environments",
        "confidence": "confirmed",
        "source": "https://www.refresh.dev/ accessed 2026-06-07 (simulation engines for coding and computer use); https://www.ycombinator.com/companies/refresh accessed 2026-06-07"
      },
      "focus_areas": [
        "coding environments",
        "computer use environments",
        "evaluation / benchmarks",
        "execution infrastructure"
      ],
      "deployment_model": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "maturity": {
        "value": "private beta",
        "confidence": "estimated",
        "source": "https://www.ycombinator.com/companies/refresh accessed 2026-06-07 (early-stage YC X25; partners directly with frontier labs; no self-serve GA product visible)"
      },
      "open_source": {
        "value": "no",
        "confidence": "estimated",
        "source": "No public GitHub org found for refresh.dev as of 2026-06-07; no open-source product on https://www.refresh.dev/ accessed 2026-06-07"
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No security/trust page found; https://www.refresh.dev/security returns 404 (accessed 2026-06-07)"
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "https://www.refresh.dev/security returns 404 (accessed 2026-06-07)"
      },
      "total_raised": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "YC-backed (X25). No total disclosed in any primary source; an unverified ~$500K pre-seed figure surfaced in a LinkedIn snippet only, not confirmed."
      },
      "last_round": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "YC investment implied by batch participation; specific round stage/amount/date not in any primary source as of 2026-06-07."
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "Y Combinator"
        ],
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/refresh accessed 2026-06-07 (YC X25 batch participation). Reported angel/Weekend Fund participation could not be confirmed against any primary source."
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_customers": {
        "value": [
          {
            "name": "Frontier AI labs (unnamed)",
            "verification": "self-claimed",
            "frontier_lab_tie": true
          }
        ],
        "confidence": "reported",
        "source": "https://www.refresh.dev/ accessed 2026-06-07 (self-described partnerships with 'frontier labs and enterprises'); https://www.ycombinator.com/launches/Ncy-refresh-turning-real-work-into-rl-training-grounds accessed 2026-06-07 (self-claimed). No specific customer named or third-party verified."
      },
      "published_papers_or_benchmarks": {
        "value": [],
        "confidence": "unknown",
        "source": "SWE-Bench, Terminal-Bench, OS-World on YC launch page are external benchmarks they target, not their own publications. https://www.ycombinator.com/launches/Ncy-refresh-turning-real-work-into-rl-training-grounds accessed 2026-06-07"
      },
      "positioning_summary": "Refresh (YC X25) builds simulation engines / RL environments with verifiable rewards for coding and computer use, partnering with frontier labs and enterprises to train AI software-engineering and computer-use 'coworker' capabilities across terminal and GUI.",
      "best_fit_use_case": "Frontier labs needing custom RL training environments and datasets for software-engineering and computer-use agent capabilities.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://www.refresh.dev/",
          "accessed_date": "2026-06-07",
          "note": "Official homepage, simulation engines for coding and computer use, verifiable rewards, frontier lab/enterprise partnerships"
        },
        {
          "url": "https://www.withrefresh.com/",
          "accessed_date": "2026-06-07",
          "note": "Mirror/alternate official site with same positioning"
        },
        {
          "url": "https://www.refresh.dev/careers",
          "accessed_date": "2026-06-07",
          "note": "3 open roles, all San Francisco full-time; stack Vercel/Supabase/Render"
        },
        {
          "url": "https://www.ycombinator.com/companies/refresh",
          "accessed_date": "2026-06-07",
          "note": "YC profile, Spring 2025 (X25), founders Christopher Settles & Erik Quintanilla, ~8 employees, SF"
        },
        {
          "url": "https://www.ycombinator.com/launches/Ncy-refresh-turning-real-work-into-rl-training-grounds",
          "accessed_date": "2026-06-07",
          "note": "YC launch, archived web for computer-use environments, mechanistic interpretability, targets SWE-Bench/Terminal-Bench/OS-World"
        },
        {
          "url": "https://www.linkedin.com/company/refresh-dot-dev",
          "accessed_date": "2026-06-07",
          "note": "LinkedIn public snippet, ~7 employees, founded 2025, San Francisco, Software Development"
        },
        {
          "url": "https://www.linkedin.com/in/erikquintanilla/",
          "accessed_date": "2026-06-07",
          "note": "Co-founder & CTO profile"
        },
        {
          "url": "https://www.linkedin.com/in/christopher-settles",
          "accessed_date": "2026-06-07",
          "note": "Co-founder & CEO profile"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "notable_investors.confidence",
          "was": "confirmed",
          "now": "reported",
          "reason": "Only YC batch participation is verifiable; the value list also references YC alone. Confidence downgraded to 'reported' because the broader investor picture (angels / Weekend Fund) is unverified, and to avoid implying a fully confirmed cap table from a single batch-listing source."
        },
        {
          "field": "current_headcount.value",
          "was": "~7 employees (as of 2026-06-07)",
          "now": "~8 employees (as of 2026-06-07)",
          "reason": "YC profile (primary) explicitly lists 8 employees; LinkedIn snippet shows ~7. Aligned headline value to the primary YC figure while noting the LinkedIn range in source."
        },
        {
          "field": "headcount_band.confidence",
          "was": "reported",
          "now": "confirmed",
          "reason": "Both YC (8) and LinkedIn (~7) place the company firmly in the 1-10 band; the band itself is corroborated by a primary source (YC), so 'confirmed' is justified for the band even though exact count is 'reported'."
        },
        {
          "field": "open_roles_count.confidence",
          "was": "confirmed",
          "now": "reported",
          "reason": "Careers-page counts are point-in-time and self-published; not independently verifiable, so downgraded from 'confirmed' to 'reported'."
        },
        {
          "field": "founded_year.source",
          "was": "https://www.linkedin.com/company/refresh-dot-dev accessed 2026-06-07; YC Spring 2025 (X25) batch https://www.ycombinator.com/companies/refresh accessed 2026-06-07",
          "now": "https://www.ycombinator.com/companies/refresh accessed 2026-06-07 (YC Spring 2025 / X25, founded 2025); https://www.linkedin.com/company/refresh-dot-dev accessed 2026-06-07",
          "reason": "Re-ordered to lead with the primary YC source, which I independently confirmed shows founded 2025 and Spring 2025 (X25) batch."
        },
        {
          "field": "open_source.source",
          "was": "No public GitHub org found for refresh.dev as of 2026-06-07 (github.com/refresh-dev belongs to unrelated refresh.cv)",
          "now": "No public GitHub org found for refresh.dev as of 2026-06-07; no open-source product on https://www.refresh.dev/ accessed 2026-06-07",
          "reason": "Removed the unverifiable claim that github.com/refresh-dev belongs to refresh.cv (could not confirm this attribution); kept the defensible 'no OSS product visible' basis."
        }
      ],
      "verification_summary": "Re-verified against primary sources (YC company profile, YC launch page, refresh.dev homepage) plus web search. Confirmed this is the CORRECT company matching the directory note ('code and computer use environments'): Refresh (YC X25, Spring 2025), founders Christopher Settles and Erik Quintanilla, building RL simulation environments for coding and computer use, partnering with frontier labs. Founded year (2025), HQ (San Francisco), and small headcount (YC lists 8; LinkedIn ~7, 1-10 band) all confirmed. Funding remains unknown: no primary source confirms total raised, round stage, or valuation; the ~$500K pre-seed figure is a single weak LinkedIn snippet, correctly left unknown. Downgraded notable_investors to 'reported' (only YC batch participation verifiable; angel/Weekend Fund unconfirmed). Customers are self-claimed 'frontier labs', not third-party verified. No trust page / SOC2 (security URL 404). Corrected headcount headline to ~8 (primary YC), upgraded headcount_band to confirmed, downgraded open_roles_count to reported, and removed an unverifiable GitHub-org attribution. Overall confidence: medium.",
      "research_notes": {
        "found": [
          "Confirmed correct company: Refresh (refresh.dev), YC X25 Spring 2025, builds RL/simulation environments for coding and computer use, matches directory note and tags.",
          "Founders: Christopher Settles (CEO, ex-Uber AI ML tech lead, UIUC CS), Erik Quintanilla (CTO, ex-Capital One/Amazon).",
          "HQ San Francisco; founded 2025; ~7 employees (LinkedIn), 8 (YC).",
          "3 open roles, all SF full-time (Ex-Founder, GTM Engineer, Research Engineer Benchmarking).",
          "Self-described partnerships with frontier labs and enterprises; YC-backed.",
          "Has research/benchmarking function (mechanistic interpretability, benchmark engineering)."
        ],
        "missing": [
          "Confirmed funding total / round stage / amount / date / valuation.",
          "Named or verified customers.",
          "Deployment model, SOC 2 / ISO / security page (none found; /security 404).",
          "Researcher headcount and full backgrounds; revenue signals.",
          "Any open-source repos (none found)."
        ],
        "conflicts": [
          "Headcount: LinkedIn ~7 vs YC profile 8.",
          "Multiple unrelated 'Refresh' companies exist: refresh.cv (AI resume builder, github.com/refresh-dev), and a 2011 Refresh acquired by LinkedIn in 2015 (Tracxn/PitchBook). Neither is the target; excluded.",
          "An unverified $500K pre-seed (July 2025) figure appeared in a LinkedIn-derived snippet; not corroborated by a primary source, left as unknown for total_raised."
        ],
        "stale": [],
        "open_questions": [
          "What is the actual disclosed funding amount and lead investor beyond YC?",
          "Which frontier labs (OpenAI/Anthropic/etc.) are actual customers, any verifiable?",
          "Is any product self-serve/GA, or fully bespoke lab engagements only?",
          "Are the reported angels (David Cramer, Spencer Kimball, Weekend Fund) confirmed?"
        ]
      }
    },
    {
      "rank": 21,
      "focus_areas_normalised": [
        "Coding",
        "Long-Horizon"
      ],
      "slug": "vmax",
      "brand_name": "Vmax",
      "segment": "Commercial vendors",
      "website": "https://vmax.ai/",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://vmax.ai/ (accessed 2026-06-07); https://job-boards.greenhouse.io/vmax actively hiring (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": 2025,
        "confidence": "reported",
        "source": "https://www.southparkcommons.com/companies/vmax/ (accessed 2026-06-07); corroborated by PitchBook (https://pitchbook.com/profiles/company/907262-38) and search aggregators, no primary incorporation record reviewed"
      },
      "hq_location": {
        "value": "San Francisco, USA",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/vmax-ai (public snippet: Brannan St, San Francisco, CA 94107; accessed 2026-06-07); https://www.southparkcommons.com/companies/vmax/"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "distributed_remote": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "All 8 Greenhouse roles are listed San Francisco (https://job-boards.greenhouse.io/vmax), suggests onsite/SF-based, but remote policy not stated"
      },
      "what_they_sell": {
        "value": "environments",
        "confidence": "confirmed",
        "source": "https://vmax.ai/ (accessed 2026-06-07), reinforcement learning company converting proprietary data and evals into RL environments for long-horizon LLM-agent tasks"
      },
      "deployment_model": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "maturity": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "Early-stage (founded 2025); no GA product page or pricing found as of 2026-06-07"
      },
      "open_source": {
        "value": "no",
        "confidence": "estimated",
        "source": "https://vmax.ai/ (accessed 2026-06-07), no public OSS product repo identified; unix-ctf released as a research paper (arXiv:2605.29115), not a maintained product repo"
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "current_headcount": {
        "value": "~11 (LinkedIn 'all 11 employees' link, 2026-06-07); official LinkedIn size band 2-10",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/vmax-ai (accessed 2026-06-07). Note: one search source cited ~5 employees, so exact count is uncertain"
      },
      "headcount_band": {
        "value": "1-10",
        "confidence": "estimated",
        "source": "https://www.linkedin.com/company/vmax-ai (accessed 2026-06-07), official size band '2-10 employees'; ~11 surfaced via profile but within small-team range"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": 8,
        "confidence": "confirmed",
        "source": "https://job-boards.greenhouse.io/vmax (accessed 2026-06-07), 8 roles, all San Francisco: MTS Applied RL, MTS Mechanistic Interpretability, MTS Open Endedness, MTS RL Algorithms, MTS RL Infrastructure, Open Application, Research Fellowship Mechanistic Interpretability, Research Fellowship Open Endedness"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://vmax.ai/ (accessed 2026-06-07), founders are RL PhDs; unix-ctf paper (arXiv:2605.29115) authored by Vmax-affiliated researchers; multiple research/MTS roles open"
      },
      "researcher_count": {
        "value": "Small research-led team (~11 total). 3 co-founders are RL/robotics PhDs; named researchers from unix-ctf paper: Geoffrey Bradway, Roger Creus Castanyer, Lorenz Wolf",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/vmax-ai (accessed 2026-06-07); https://arxiv.org/abs/2605.29115 (author list)"
      },
      "researcher_backgrounds": {
        "value": [
          "Matthew Sargent, RL PhD, University College London (2019-2024); co-founder",
          "Augustine Mavor-Parker, RL PhD, University College London; CTO; previously Redwood Research (AI safety), Cold Spring Harbor Laboratory (NeuroAI), Illumina (AI for genomics)",
          "Heejin Jeong, PhD in ESE/robotics, University of Pennsylvania (GRASP Lab, 2020); co-founder; off-policy TD learning for robotics/autonomous systems",
          "Founding team described on vmax.ai as 3 RL PhDs from UCL and UPenn with publications at NeurIPS, ICML, AAAI"
        ],
        "confidence": "reported",
        "source": "https://uk.linkedin.com/in/matthewjsargent ; https://www.linkedin.com/in/augustine-mavor-parker/ ; https://www.grasp.upenn.edu/people/heejin-jeong/ ; https://repository.upenn.edu/edissertations/3836/ ; https://vmax.ai/ ; https://www.southparkcommons.com/companies/vmax/ (accessed 2026-06-07)"
      },
      "total_raised": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No disclosed amount on PitchBook (https://pitchbook.com/profiles/company/907262-38), Crunchbase, or press as of 2026-06-07, funding unannounced/undisclosed"
      },
      "last_round": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No round stage/date disclosed publicly as of 2026-06-07"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "Race Capital",
          "South Park Commons"
        ],
        "confidence": "reported",
        "source": "https://www.southparkcommons.com/companies/vmax/ (SPC self-lists as backer); PitchBook lists Race Capital and South Park Commons (accessed 2026-06-07), backers named, round/amount undisclosed"
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_customers": {
        "value": [
          {
            "name": "Martian / ARES team (withmartian), partnership: jointly releasing ~1k JavaScript coding tasks in the Harbor format (Harbor = Terminal-Bench task format)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          }
        ],
        "confidence": "reported",
        "source": "https://withmartian.com/post/ares-open-source-infrastructure-for-online-rl-on-coding-agents (accessed 2026-06-07), described as a partnership/collaboration, not a confirmed paying customer. (Draft incorrectly attributed this to 'Harbor / Laude Institute'; the releasing partner is Martian/ARES.)"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No trust/security page found as of 2026-06-07"
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "published_papers_or_benchmarks": {
        "value": [
          "unix-ctf: Procedural Environments for Unix-Competence Reinforcement Learning, arXiv:2605.29115 (https://arxiv.org/abs/2605.29115); procedural generator of CTF tasks for shell agents (656 portable variants); authors include Geoffrey Bradway, Roger Creus Castanyer, Lorenz Wolf",
          "Collaboration releasing ~1k JavaScript coding tasks in Harbor format (with Martian/ARES, compatible with the Terminal-Bench ecosystem)"
        ],
        "confidence": "confirmed",
        "source": "https://arxiv.org/abs/2605.29115 ; https://withmartian.com/post/ares-open-source-infrastructure-for-online-rl-on-coding-agents (accessed 2026-06-07)"
      },
      "focus_areas": [
        "coding environments",
        "long-horizon / general reasoning",
        "security"
      ],
      "positioning_summary": "Vmax is a San Francisco reinforcement-learning startup (founded 2025 by three RL/robotics PhDs from UCL and UPenn) that automates the conversion of proprietary data and evals into RL environments for LLM-based agents, targeting long-horizon and coding tasks. Its public research includes unix-ctf, a procedural generator of capture-the-flag tasks for Unix/shell competence.",
      "best_fit_use_case": "Teams needing custom, research-grade RL environments to train coding and long-horizon shell/terminal agents from proprietary data.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://vmax.ai/",
          "accessed_date": "2026-06-07",
          "note": "Official site, RL environments, open-ended learning, hiring researchers, unix-ctf research link, Greenhouse careers link"
        },
        {
          "url": "https://job-boards.greenhouse.io/vmax",
          "accessed_date": "2026-06-07",
          "note": "Careers, 8 open roles, all San Francisco"
        },
        {
          "url": "https://www.linkedin.com/company/vmax-ai",
          "accessed_date": "2026-06-07",
          "note": "Public LinkedIn snippet, ~11 employees, San Francisco (Brannan St 94107), 'Open ended task generation', named team members"
        },
        {
          "url": "https://www.linkedin.com/in/augustine-mavor-parker/",
          "accessed_date": "2026-06-07",
          "note": "Co-founder/CTO; RL PhD UCL; ex-Redwood Research, CSHL, Illumina"
        },
        {
          "url": "https://www.linkedin.com/in/matthewjsargent/",
          "accessed_date": "2026-06-07",
          "note": "Co-founder; RL PhD UCL"
        },
        {
          "url": "https://www.southparkcommons.com/companies/vmax/",
          "accessed_date": "2026-06-07",
          "note": "SPC portfolio, founders, founded 2025, SF, description; SPC as backer"
        },
        {
          "url": "https://arxiv.org/abs/2605.29115",
          "accessed_date": "2026-06-07",
          "note": "unix-ctf research paper, procedural Unix-competence CTF environments"
        },
        {
          "url": "https://withmartian.com/post/ares-open-source-infrastructure-for-online-rl-on-coding-agents",
          "accessed_date": "2026-06-07",
          "note": "Mentions Vmax partnership releasing ~1k JavaScript tasks in Harbor format"
        },
        {
          "url": "https://newsletter.semianalysis.com/p/rl-environments-and-rl-for-science",
          "accessed_date": "2026-06-07",
          "note": "SemiAnalysis, lists Vmax among RL-environment vendors"
        },
        {
          "url": "https://pitchbook.com/profiles/company/907262-38",
          "accessed_date": "2026-06-07",
          "note": "PitchBook profile (403 blocked); no disclosed funding amount per search snippets"
        },
        {
          "url": "https://www.crunchbase.com/organization/vmax",
          "accessed_date": "2026-06-07",
          "note": "Crunchbase listing referenced; funding unannounced"
        },
        {
          "url": "https://x.com/MavorParker/status/1868009967518880183",
          "accessed_date": "2026-06-07",
          "note": "Founder post describing agents leveraging large-company dataset structure for multistep RL on long-horizon tasks"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "researcher_backgrounds",
          "was": "Listed only 2 founders (Sargent, Mavor-Parker) and stated founders are 'RL PhDs from UCL'",
          "now": "Added third co-founder Heejin Jeong (UPenn ESE/robotics PhD, GRASP Lab); clarified founding team is 3 PhDs from UCL AND UPenn",
          "reason": "vmax.ai and South Park Commons state 3 RL PhDs from UCL and UPenn; UPenn GRASP page, dissertation, and search results confirm Heejin Jeong as a co-founder. Draft undercounted founders and misattributed all to UCL."
        },
        {
          "field": "researcher_count",
          "was": "'~3 core (2 founders + early MTS/fellows)'",
          "now": "Reframed: small research-led team (~11), 3 co-founders are RL/robotics PhDs, plus named paper authors",
          "reason": "Founder count corrected from 2 to 3; '2 founders' phrasing was factually wrong."
        },
        {
          "field": "notable_customers",
          "was": "'Harbor / Laude Institute (Terminal-Bench team), released ~1k JavaScript coding tasks in Harbor format'",
          "now": "'Martian / ARES team (withmartian), partnership releasing ~1k JavaScript tasks in Harbor format'",
          "reason": "The cited source (withmartian.com ARES post) attributes the partnership to Martian/ARES, not Laude Institute. Harbor is the Terminal-Bench task format; Laude Institute was an unsupported attribution. Kept self-claimed and frontier_lab_tie=false."
        },
        {
          "field": "focus_areas",
          "was": "['coding environments','long-horizon / general reasoning','execution infrastructure','security']",
          "now": "['coding environments','long-horizon / general reasoning','security']",
          "reason": "Removed 'execution infrastructure', Vmax sells/builds RL environments, not execution infrastructure as a product; no source supports an execution-infra offering. Remaining tags are supported (coding via JS tasks, long-horizon per site, security via unix-ctf CTF work) and all from the controlled vocabulary."
        },
        {
          "field": "published_papers_or_benchmarks",
          "was": "unix-ctf entry without authors; generic Harbor entry",
          "now": "Added unix-ctf authors and result (656 variants) and clarified Harbor collaboration is with Martian/ARES",
          "reason": "Verification confirmed arXiv:2605.29115 authors (Bradway, Creus Castanyer, Wolf) and Vmax affiliation; corrected partner attribution for consistency."
        },
        {
          "field": "distributed_remote",
          "was": "value 'unknown' with empty source",
          "now": "value 'unknown' with note that all 8 roles are SF-listed",
          "reason": "Added supporting context from Greenhouse; value remains unknown (remote policy not stated)."
        },
        {
          "field": "founded_year (source)",
          "was": "Cited Tracxn/PitchBook as corroboration",
          "now": "Noted PitchBook corroboration and flagged a same-named unrelated 'VMAX' (Shenzhen EV-charging firm, founded 2005) on Tracxn to avoid confusion",
          "reason": "During verification, a Tracxn 'VMAX' profile matched a different company (electric-vehicle charging, China). Confirmed the correct entity is vmax.ai (SF RL company); flagged to prevent cross-contamination of data."
        }
      ],
      "verification_summary": "Re-verified the draft against vmax.ai, the company's Greenhouse, LinkedIn, South Park Commons, PitchBook, the unix-ctf arXiv paper, and the withmartian ARES post. Confirmed this is the CORRECT entity (SF RL-environments startup, vmax.ai) matching the 'research-grade RL environments' note, and ruled out two same-named decoys: the V-Max autonomous-driving framework (arXiv:2503.08388, valeoai) and a Shenzhen EV-charging 'VMAX' (Tracxn, founded 2005). Confirmed: what_they_sell=environments, 8 SF open roles, ~11 employees / band 1-10, unix-ctf paper (arXiv:2605.29115) with Vmax-affiliated authors, and investors Race Capital + South Park Commons (kept 'reported'). Corrected the founder set to THREE co-founders (added Heejin Jeong, UPenn), the draft listed only two and misattributed all to UCL. Corrected the customer attribution from 'Harbor / Laude Institute' to Martian/ARES (a collaboration, not a paying customer; remains self-claimed). Removed 'execution infrastructure' from focus_areas as unsupported. All funding amount/round/valuation correctly remain unknown; no SOC2/certifications/security page found. Overall confidence: medium.\"",
      "research_notes": {
        "found": [
          "Correct company disambiguated: vmax.ai, RL-environment startup (NOT valeoai/V-Max autonomous-driving framework, NOT VMAX Telecom)",
          "Founders Matthew Sargent and Augustine Mavor-Parker (both RL PhDs, UCL); Mavor-Parker is CTO with ex-Redwood Research/CSHL/Illumina background",
          "Founded 2025, HQ San Francisco (Brannan St, 94107)",
          "Product: automates conversion of proprietary data + evals into RL environments for LLM agents; long-horizon and coding focus",
          "8 open roles (Greenhouse), all SF, RL/interpretability/open-endedness",
          "Backers: Race Capital and South Park Commons (Founder Fellowship)",
          "Research: unix-ctf paper (arXiv:2605.29115); Harbor-format JS coding tasks partnership",
          "Headcount ~11 (LinkedIn)"
        ],
        "missing": [
          "Funding amount, round stage/date, valuation (undisclosed)",
          "SOC 2 / ISO certifications, security/trust page",
          "Deployment model, product maturity stage, licensing",
          "Verified (third-party-confirmed) paying customers",
          "Detailed open-source product status / repos / stars"
        ],
        "conflicts": [
          "Founder count: most sources name 2 founders (Sargent, Mavor-Parker); some LinkedIn/aggregator snippets say '3 RL PhDs from UCL and UPenn', third founder not individually identified",
          "Headcount: LinkedIn shows ~11 discoverable but official size band 2-10; SPC/early snippets implied ~3 core",
          "Funding: one snippet says 'never raised', others name Race Capital + SPC as investors, amount unannounced either way"
        ],
        "stale": [],
        "open_questions": [
          "Is there a third co-founder (UPenn RL PhD) and who?",
          "Has Vmax raised a priced round, and from whom beyond Race Capital / South Park Commons?",
          "Are any frontier labs direct customers (Harbor partnership is ecosystem-level, not a confirmed lab customer)?",
          "What is the commercial deployment/delivery model for environments?"
        ]
      }
    },
    {
      "rank": 22,
      "focus_areas_normalised": [
        "Long-Horizon"
      ],
      "slug": "andromede",
      "brand_name": "Andromede",
      "segment": "Commercial vendors",
      "website": "https://andromede.ai/",
      "focus_areas": [
        "long-horizon / general reasoning",
        "evaluation / benchmarks"
      ],
      "what_they_sell": {
        "value": "environments",
        "confidence": "confirmed",
        "source": "https://andromede.ai/ (accessed 2026-06-07)"
      },
      "positioning_summary": "Andromede is an early-stage RL data lab that programmatically generates RL environments, tasks, and verifiers from real-world data for post-training and evaluation of frontier agents, with an emphasis on long-horizon sequential reasoning tasks. As of mid-2026 it is in private beta, working with a small set of partners. It was co-founded by Guillaume Allegre (Founder & President) and Alexandre Sallinen (an EPFL-affiliated researcher who contributed to the Meditron medical-LLM project), and is backed by Unusual Ventures.",
      "best_fit_use_case": "Buyers needing custom RL environments and verifiers derived from real-world data for post-training/evaluating long-horizon agentic models.",
      "maturity": {
        "value": "private beta",
        "confidence": "confirmed",
        "source": "https://andromede.ai/ (accessed 2026-06-07)"
      },
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://andromede.ai/ (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": 2025,
        "confidence": "reported",
        "source": "https://pitchbook.com/profiles/company/1158131-35 (search snippet; PitchBook page returns HTTP 403, accessed 2026-06-07); corroborated by https://www.unusual.vc/portfolio/ (accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "Lausanne, Switzerland",
        "confidence": "reported",
        "source": "https://pitchbook.com/profiles/company/1158131-35 (search snippet; page returns HTTP 403, accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [
          "New York, USA (founder/president Guillaume Allegre is LinkedIn-listed as New York-based; no confirmed office)"
        ],
        "confidence": "estimated",
        "source": "https://www.linkedin.com/in/guillaume-allegre/ (search snippet, accessed 2026-06-07)"
      },
      "current_headcount": {
        "value": "2-10 employees (LinkedIn size band); PitchBook snippet states ~2 total employees, as of 2026-06-07",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/andromedeai ; https://pitchbook.com/profiles/company/1158131-35 (search snippet, accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "1-10",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/andromedeai (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "https://andromede.ai/careers returns HTTP 404; no LinkedIn jobs visible (accessed 2026-06-07)"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "reported",
        "source": "Co-founder Alexandre Sallinen is an EPFL-affiliated researcher (contributor to the Meditron medical-LLM project; Google Scholar profile); GitHub org self-describes as 'Research Data Lab focused on RL environment generation', https://scholar.google.com/citations?user=-ajWSEcAAAAJ ; https://github.com/Andromede-AI (accessed 2026-06-07)"
      },
      "researcher_count": {
        "value": "~1 (co-founder Alexandre Sallinen, an EPFL/Meditron researcher); no public team page to confirm others",
        "confidence": "estimated",
        "source": "https://www.linkedin.com/in/alexandre-sallinen-033359294/ ; https://scholar.google.com/citations?user=-ajWSEcAAAAJ (accessed 2026-06-07)"
      },
      "researcher_backgrounds": {
        "value": [
          "Alexandre Sallinen (co-founder) - EPFL; contributor to the Meditron open-source medical LLM project; RL/LLM research background",
          "Guillaume Allegre (co-founder & president) - ex-BCG X; MIT (Machine Learning & Operations Research), engineering/applied mathematics"
        ],
        "confidence": "reported",
        "source": "https://www.linkedin.com/in/alexandre-sallinen-033359294/ ; https://www.linkedin.com/in/guillaume-allegre/ ; https://scholar.google.com/citations?user=-ajWSEcAAAAJ (search snippets, accessed 2026-06-07)"
      },
      "notable_investors": {
        "value": [
          "Unusual Ventures"
        ],
        "confidence": "reported",
        "source": "https://www.unusual.vc/portfolio/ (Andromede listed in Unusual Ventures' own portfolio page, accessed 2026-06-07)"
      },
      "last_round": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No round stage/amount/date disclosed; PitchBook funding data paywalled (HTTP 403), accessed 2026-06-07"
      },
      "total_raised": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No disclosed funding figure found; PitchBook funding data paywalled (accessed 2026-06-07)"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_customers": {
        "value": [],
        "confidence": "unknown",
        "source": "Site states it works with 'a small set of partners' but names none (accessed 2026-06-07)"
      },
      "deployment_model": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_source": {
        "value": "no",
        "confidence": "reported",
        "source": "https://github.com/Andromede-AI states 'This organization has no public repositories' (accessed 2026-06-07)"
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "Site lists a security@andromede.ai contact but no SOC 2 / trust page found (accessed 2026-06-07)"
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No /security or /trust page found; only security@andromede.ai contact listed (accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [],
        "confidence": "unknown",
        "source": "No Andromede-branded papers or benchmarks found; co-founder Sallinen's research (e.g. Meditron) predates and is separate from the company (accessed 2026-06-07)"
      },
      "distributed_remote": {
        "value": "yes",
        "confidence": "estimated",
        "source": "Co-founders are split across Lausanne (Sallinen/EPFL) and New York (Allegre per LinkedIn), suggesting a distributed team; not explicitly stated (accessed 2026-06-07)"
      },
      "overall_confidence": "low",
      "sources": [
        {
          "url": "https://andromede.ai/",
          "accessed_date": "2026-06-07",
          "note": "Official site. 'RL data lab for frontier agents'; 'Programmatic generation of RL environments for post-training and evaluation'; generates environments, tasks, and verifiers from real-world data; sits between training and deployment for long-horizon tasks; private beta; works with a small set of partners; contacts hello@ and security@andromede.ai; '© 2026 Andromede AI, Inc.'"
        },
        {
          "url": "https://www.linkedin.com/company/andromedeai",
          "accessed_date": "2026-06-07",
          "note": "Public LinkedIn snippet: size band 2-10 employees; industry 'Technology, Information and Internet'; tagline 'RL data lab for frontier AI research'; specialties 'Programmatic generation of RL environments for post-training and evaluation. Real data in => reliable environments out.'; 'Currently in private beta.'"
        },
        {
          "url": "https://pitchbook.com/profiles/company/1158131-35",
          "accessed_date": "2026-06-07",
          "note": "Search snippet only (page returns HTTP 403): founded 2025; HQ Lausanne, Switzerland; ~2 total employees. Funding/investors paywalled and not disclosed."
        },
        {
          "url": "https://github.com/Andromede-AI",
          "accessed_date": "2026-06-07",
          "note": "GitHub org 'Research Data Lab focused on RL environment generation'; no public repositories; no public members; links to andromede.ai and hello@andromede.ai."
        },
        {
          "url": "https://alignlist.com/guides/top-40-rl-environments-startups-and-companies",
          "accessed_date": "2026-06-07",
          "note": "Directory listing: domain 'Long Horizon'; 'Develops long-horizon RL environment approaches for complex sequential reasoning tasks.' No funding/founder/customer detail."
        },
        {
          "url": "https://aimultiple.com/rl-environments",
          "accessed_date": "2026-06-07",
          "note": "Lists Andromede among managed-environment RL vendors (with AfterQuery, Collinear, Deeptune, Halluminate, Refresh, etc.)."
        },
        {
          "url": "https://andromede.ai/careers",
          "accessed_date": "2026-06-07",
          "note": "HTTP 404, no public careers page / open roles."
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "notable_investors",
          "was": "[] (unknown)",
          "now": "[\"Unusual Ventures\"] (reported)",
          "reason": "Andromede is listed on Unusual Ventures' own portfolio page (unusual.vc/portfolio) with an RL-environments description matching the company. This is a credible first-party investor source, so the field should not be empty/unknown. Round amount/valuation still not disclosed, so confidence kept at reported."
        },
        {
          "field": "researcher_count",
          "was": "unknown / unknown",
          "now": "~1 (co-founder Alexandre Sallinen) / estimated",
          "reason": "Public LinkedIn and Google Scholar identify co-founder Alexandre Sallinen as an EPFL-affiliated researcher (Meditron contributor). At least one researcher is identifiable; no team page to count more, so estimated."
        },
        {
          "field": "researcher_backgrounds",
          "was": "[] / unknown",
          "now": "Sallinen (EPFL, Meditron LLM) and Allegre (ex-BCG X, MIT) / reported",
          "reason": "Two co-founders surfaced via LinkedIn/Scholar search snippets with notable prior affiliations; the draft left this empty despite the info being discoverable."
        },
        {
          "field": "has_researchers",
          "was": "yes / reported (sourced only to GitHub/LinkedIn taglines)",
          "now": "yes / reported (sourced to a named researcher co-founder)",
          "reason": "Same value but strengthened with a concrete named researcher (Sallinen) and Google Scholar profile rather than only self-description taglines."
        },
        {
          "field": "other_locations",
          "was": "[] / unknown",
          "now": "[\"New York, USA (founder Allegre LinkedIn-based; no confirmed office)\"] / estimated",
          "reason": "Co-founder/president Guillaume Allegre is listed as New York-based on LinkedIn, indicating possible US presence beyond the Lausanne HQ. Flagged as estimated since no office is confirmed."
        },
        {
          "field": "distributed_remote",
          "was": "unknown / unknown",
          "now": "yes / estimated",
          "reason": "Co-founders appear split between Lausanne (Sallinen/EPFL) and New York (Allegre), implying a distributed team. Marked estimated as it is inferred, not stated."
        },
        {
          "field": "founded_year (source)",
          "was": "PitchBook search snippet only",
          "now": "PitchBook snippet (page 403) corroborated by Unusual Ventures portfolio",
          "reason": "Added corroborating source and noted PitchBook page is paywalled/403; value 2025 and 'reported' confidence retained."
        },
        {
          "field": "last_round / total_raised (source notes)",
          "was": "empty/blank sources",
          "now": "explicit note that funding is undisclosed and PitchBook is paywalled (403)",
          "reason": "Clarified why these remain unknown; values and 'unknown' confidence preserved (no funding amount found in any credible source)."
        },
        {
          "field": "published_papers_or_benchmarks (source)",
          "was": "blank",
          "now": "note added",
          "reason": "Clarified that co-founder Sallinen's prior research (Meditron) is separate from and predates Andromede, so it is not attributed to the company. Value remains empty."
        },
        {
          "field": "positioning_summary",
          "was": "No founder/investor detail",
          "now": "Added co-founders (Allegre, Sallinen) and investor (Unusual Ventures)",
          "reason": "Incorporated newly verified, decision-relevant facts about founders and backing."
        }
      ],
      "verification_summary": "Confirmed this is the correct entity: Andromede / andromede.ai, an early-stage RL data lab (Lausanne, founded 2025) that programmatically generates RL environments, tasks, and verifiers for long-horizon agentic tasks, matching the directory note on 'long-horizon sequential reasoning.' Not confused with the unrelated 'Andromeda AI' (GPU/compute startup) or 'AndromedAI' on PitchBook. Official site and LinkedIn snippet corroborate private-beta status, ~2-10 headcount, and 'small set of partners' (no named customers; notable_customers correctly left empty). PitchBook returns HTTP 403, so founded-year and HQ rest on a search snippet plus the Unusual Ventures portfolio corroboration (kept 'reported'). KEY CORRECTION: the draft missed that Andromede is in Unusual Ventures' published portfolio, a credible first-party investor source, so notable_investors now lists Unusual Ventures (reported); however no round amount or valuation is disclosed anywhere, so total_raised/last_round/valuation remain unknown. Also surfaced two co-founders via LinkedIn/Scholar: Alexandre Sallinen (EPFL, Meditron medical-LLM contributor, a genuine researcher) and Guillaume Allegre (ex-BCG X, MIT, NY-based, Founder & President), enabling researcher_backgrounds, researcher_count (~1 researcher), and a likely Lausanne/NY distributed setup. No SOC 2, trust page, certifications, papers, or named customers were found; those stay unknown/empty. Overall confidence remains low given paywalled financials and absence of a public team page.",
      "research_notes": {
        "found": [
          "Official site confirms what they sell: programmatic generation of RL environments, tasks, and verifiers from real-world data for post-training and evaluation of frontier agents; focus on long-horizon tasks (matches directory note).",
          "Maturity: private beta (official site + LinkedIn).",
          "LinkedIn size band 2-10; PitchBook snippet: founded 2025, HQ Lausanne Switzerland, ~2 employees.",
          "GitHub org exists but has no public repos/members (not open source).",
          "Self-describes as a research data lab (has researchers).",
          "Identified as a managed RL-environment vendor in third-party directories (AIMultiple, AlignList)."
        ],
        "missing": [
          "Founders / named team members for the correct RL data lab entity.",
          "Funding, investors, valuation, total raised.",
          "Named customers/partners (site only says 'a small set of partners').",
          "Deployment model, SOC 2 / ISO certifications, trust page.",
          "Open roles count, headcount growth, revenue signals.",
          "Published papers or benchmarks."
        ],
        "conflicts": [
          "PitchBook snippet (~2 employees) vs LinkedIn band (2-10), consistent at the low end.",
          "Severe NAME COLLISIONS: (1) 'Andromeda AI' is an on-demand GPU compute startup (Wil Moushey, Paradigm/NFDG, $1.5B valuation, SF), UNRELATED. (2) A separate 'Andromede AI' run by Guillaume Allègre builds painting-quote software, UNRELATED. (3) Andromeda Robotics, Andromeda Surgical, AndromedAI, all UNRELATED. The correct target is the RL data lab at andromede.ai / linkedin.com/company/andromedeai / github.com/Andromede-AI."
        ],
        "stale": [],
        "open_questions": [
          "Who founded Andromede (the RL data lab) and what are their backgrounds? Possible EPFL/Lausanne ties given HQ, but unverified.",
          "Has Andromede raised any funding, and from whom?",
          "Which frontier labs or partners are the 'small set of partners' it works with?",
          "Deployment model (managed-hosted vs API) and any security/compliance posture."
        ]
      }
    },
    {
      "rank": 23,
      "focus_areas_normalised": [
        "Computer Use",
        "Enterprise Workflows"
      ],
      "slug": "plato",
      "brand_name": "Plato",
      "segment": "Commercial vendors",
      "website": "https://plato.so",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://plato.so 2026-06-07; NYT/Cade Metz coverage Dec 2025 via https://theoutpost.ai/news-story/silicon-valley-startups-clone-amazon-and-gmail-to-train-ai-agents-on-complex-tasks-22091/ 2026-06-07"
      },
      "focus_areas": [
        "browser environments",
        "computer use environments",
        "enterprise workflows",
        "evaluation / benchmarks",
        "execution infrastructure"
      ],
      "what_they_sell": {
        "value": "environments",
        "confidence": "confirmed",
        "source": "https://docs.plato.so 2026-06-07; Crunchbase 'Evals & datasets for web agents' snippet 2026-06-07"
      },
      "positioning_summary": "Plato (plato.so, Plato Technologies, Inc.) builds simulated worlds for training and evaluating browser and computer-use agents, recreating real websites/software (e.g. Amazon/Airbnb/Gmail-style replicas) as reinforcement-learning environments with structured APIs for interaction, state tracking and scoring. It also offers a 'Computer Use' capability driving a full Linux desktop, positioning at the intersection of browser interaction and enterprise workflow simulation.",
      "best_fit_use_case": "AI labs/teams needing high-fidelity replica web/enterprise environments to train and evaluate browser and computer-use agents via RL.",
      "maturity": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "deployment_model": {
        "value": "managed-hosted (SaaS at plato.so with dedicated tenant nodes at {tenant}.plato.so) plus API/Python SDK access",
        "confidence": "confirmed",
        "source": "https://docs.plato.so 2026-06-07"
      },
      "open_source": {
        "value": "no",
        "confidence": "estimated",
        "source": "https://plato.so 2026-06-07; https://docs.plato.so 2026-06-07, hosted platform with proprietary Python SDK (plato-sdk-v2), no public OSS repo identified"
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "founded_year": {
        "value": 2025,
        "confidence": "reported",
        "source": "PitchBook 'Plato (United States)' and Crunchbase (plato-379d / plato-d5c7) snippets, accessed 2026-06-07"
      },
      "hq_location": {
        "value": "San Francisco, CA, USA",
        "confidence": "reported",
        "source": "PitchBook/Crunchbase snippets accessed 2026-06-07; founders' LinkedIn list San Francisco"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "current_headcount": {
        "value": "~10 employees",
        "confidence": "estimated",
        "source": "PitchBook/Wellfound search snippets (US Plato) accessed 2026-06-07; not directly confirmed via LinkedIn public snippet"
      },
      "headcount_band": {
        "value": "1-10",
        "confidence": "estimated",
        "source": "PitchBook snippets cite ~10 total employees; consistent with seed/pre-seed-stage startup, accessed 2026-06-07"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "distributed_remote": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "estimated",
        "source": "Co-founder/CTO Pranav Putta has AI research background (MultiOn, Georgia Tech); LinkedIn snippets accessed 2026-06-07"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Pranav Putta (Co-founder/CTO), prior MultiOn, Georgia Institute of Technology, Tonic.ai",
          "Robert Farlow (Co-founder/CEO)"
        ],
        "confidence": "reported",
        "source": "LinkedIn public snippets for Pranav Putta (in/pranav-putta-3512b47a) and Robert Farlow (in/robfarlow), accessed 2026-06-07"
      },
      "total_raised": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No plato.so-specific funding amount disclosed. Widely circulated $21M/$14.5M/$6.5M figures belong to an unrelated Berlin wholesale-AI Plato; £260k belongs to an unrelated UK edtech Plato. Crunchbase lists US browser-agents Plato as Pre-Seed only with no disclosed amount, accessed 2026-06-07"
      },
      "last_round": {
        "value": "Pre-Seed (amount and date not disclosed)",
        "confidence": "reported",
        "source": "Crunchbase (plato-379d / plato-d5c7) lists the US browser-agents Plato at Pre-Seed stage; no amount/date found, accessed 2026-06-07"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [],
        "confidence": "unknown",
        "source": "No verified investors for plato.so. Do not confuse with Berlin Plato (Atomico/Cherry Ventures) or UK edtech Plato (SFC Capital), accessed 2026-06-07"
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_customers": {
        "value": [],
        "confidence": "unknown",
        "source": "NYT (Cade Metz, Dec 2025) names OpenAI/Google/Amazon/Anthropic as users of the replica-website RL technique generally, but does not name them as Plato's confirmed customers; no named, verified Plato customer found, accessed 2026-06-07"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "published_papers_or_benchmarks": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "overall_confidence": "low",
      "sources": [
        {
          "url": "https://plato.so/",
          "accessed_date": "2026-06-07",
          "note": "Official homepage, 'Simulated Worlds for Agents' (JS-rendered; limited extractable text)"
        },
        {
          "url": "https://docs.plato.so",
          "accessed_date": "2026-06-07",
          "note": "Official docs, product concepts (sessions, environments, sims, scoring), Computer Use (full Linux desktop), Python SDK, multi-tenant/dedicated tenant deployment"
        },
        {
          "url": "https://www.linkedin.com/in/robfarlow/",
          "accessed_date": "2026-06-07",
          "note": "Robert Farlow, Co-founder & CEO, Plato (public snippet)"
        },
        {
          "url": "https://www.linkedin.com/in/pranav-putta-3512b47a",
          "accessed_date": "2026-06-07",
          "note": "Pranav Putta, Co-founder & CTO; prior MultiOn, Georgia Tech, Tonic.ai (public snippet)"
        },
        {
          "url": "https://www.crunchbase.com/organization/plato-379d",
          "accessed_date": "2026-06-07",
          "note": "Crunchbase profile (browser agents Plato); 403 on fetch, used search snippet"
        },
        {
          "url": "https://pitchbook.com/profiles/company/752634-64",
          "accessed_date": "2026-06-07",
          "note": "PitchBook 'Plato (United States)', founded 2025, SF, ~10 employees (snippet; 403 on fetch)"
        },
        {
          "url": "https://tracxn.com/d/companies/plato/__f-KUHjIqU9_tRivtPsI5QjHBWL3AFdLMphYurLed-uw/funding-and-investors",
          "accessed_date": "2026-06-07",
          "note": "Tracxn funding page, NOTE: data conflates with Berlin wholesale Plato"
        },
        {
          "url": "https://wellfound.com/company/platoteam/people",
          "accessed_date": "2026-06-07",
          "note": "Wellfound team page (snippet), ~10 employees; /people and main page 403 on fetch"
        },
        {
          "url": "https://theoutpost.ai/news-story/silicon-valley-startups-clone-amazon-and-gmail-to-train-ai-agents-on-complex-tasks-22091/",
          "accessed_date": "2026-06-07",
          "note": "Syndicated NYT (Cade Metz) coverage, Plato/Farlow build replica-site training environments; quote captured; no Plato funding figure given"
        },
        {
          "url": "https://www.techmeme.com/251203/p10",
          "accessed_date": "2026-06-07",
          "note": "Techmeme summary of NYT piece naming AGI and Plato as replica-website training-environment startups"
        },
        {
          "url": "https://alignlist.com/guides/top-40-rl-environments-startups-and-companies",
          "accessed_date": "2026-06-07",
          "note": "Directory listing, Plato blends browser interaction environments with enterprise workflow use cases"
        },
        {
          "url": "https://podcasts.apple.com/us/podcast/robert-farlow-from-plato/id1634787423?i=1000740840151",
          "accessed_date": "2026-06-07",
          "note": "Podcast interview with Robert Farlow of Plato (existence confirms founder/role)"
        },
        {
          "url": "https://www.eu-startups.com/2026/02/from-germany-for-the-world-plato-secures-e12-2-million-to-automate-sales-and-erp-workflows-in-distribution/",
          "accessed_date": "2026-06-07",
          "note": "UNRELATED Berlin Plato (wholesale AI, $14.5M/€12.2M seed), included to document the name collision; data NOT attributed to plato.so"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "last_round",
          "was": "unknown / confidence unknown",
          "now": "Pre-Seed (amount and date not disclosed) / confidence reported",
          "reason": "Crunchbase consistently lists the correct US browser-agents entity (Plato Technologies, plato-379d/plato-d5c7) at Pre-Seed stage. The stage itself is reportable even though no amount/date is disclosed; upgraded value to name the stage while keeping amount unknown."
        },
        {
          "field": "current_headcount",
          "was": "~10 employees / confidence reported",
          "now": "~10 employees / confidence estimated",
          "reason": "The ~10 figure comes only from PitchBook/Wellfound aggregator snippets, not a direct LinkedIn public headcount snippet. Per fact-check rules this is an estimate, not a reported figure; downgraded confidence."
        },
        {
          "field": "status.source",
          "was": "https://plato.so 2026-06-07",
          "now": "https://plato.so + NYT/Cade Metz Dec 2025 coverage",
          "reason": "Added independent third-party confirmation (NYT replica-website coverage naming Plato/Farlow) that the company is operating, strengthening the active status."
        },
        {
          "field": "what_they_sell.source",
          "was": "https://plato.so; https://docs.plato.so",
          "now": "https://docs.plato.so; Crunchbase 'Evals & datasets for web agents' snippet",
          "reason": "plato.so homepage is JS-rendered and returns no extractable product text on fetch; replaced with the docs page (which describes environments/sims/scoring/Computer Use) and the Crunchbase descriptor, which actually substantiate 'environments'."
        }
      ],
      "verification_summary": "Re-verified the highest-risk claims independently. CONFIRMED the draft's central disambiguation: plato.so = Plato Technologies, Inc. (San Francisco, founded 2025), building simulated/replica-website RL environments for browser and computer-use agents (Omnizon/Amazon, Staynb/Airbnb, Go Mail/Gmail replicas), matching the directory note 'browser meets enterprise workflow.' The $21M/$14.5M/$6.5M funding figures belong to an unrelated Berlin wholesale-AI Plato (Atomico/Cherry Ventures), and £260k belongs to an unrelated UK edtech Plato (SFC Capital), correctly excluded; total_raised, valuation, and notable_investors remain unknown. Crunchbase lists the correct entity as Pre-Seed only with no disclosed amount, so last_round value was made specific (Pre-Seed) at 'reported' confidence. Customers: NYT names frontier labs as users of the technique generally, not as named Plato customers, so notable_customers stays empty, no verified frontier-lab tie. Founders confirmed: Robert Farlow (CEO), Pranav Putta (CTO, ex-MultiOn/Georgia Tech/Tonic.ai). Headcount (~10) is aggregator-sourced only, downgraded to estimated. SOC2/certifications/security page unconfirmed (unknown). focus_areas all fall within the controlled vocabulary. Overall confidence remains low given thin primary funding/customer data. Relevant sources: https://plato.so, https://docs.plato.so, https://www.crunchbase.com/organization/plato-379d, https://www.crunchbase.com/organization/plato-d5c7, https://pitchbook.com/profiles/company/752634-64, https://theoutpost.ai/news-story/silicon-valley-startups-clone-amazon-and-gmail-to-train-ai-agents-on-complex-tasks-22091/",
      "research_notes": {
        "found": [
          "Correct company identified: plato.so (Plato Technologies, Inc.), 'Simulated Worlds for Agents', builds replica websites/software as RL training & evaluation environments for browser agents (matches directory note 'browser meets enterprise workflow').",
          "Founders: Robert Farlow (Co-founder/CEO) and Pranav Putta (Co-founder/CTO); Putta has prior AI background at MultiOn, Georgia Tech, Tonic.ai.",
          "Product/tech (from docs.plato.so): Sessions, environments, sims, mutations, scoring; 'Computer Use' drives a full Linux desktop (mouse/keyboard/shell/files); Python SDK (plato-sdk-v2, Python 3.10+); multi-tenant hosting at plato.so with dedicated tenant subdomains.",
          "Founded 2025, HQ San Francisco, ~10 employees (Tracxn/PitchBook/Wellfound snippets).",
          "Third-party press (NYT/Cade Metz, Dec 2025, syndicated) names Plato/Farlow among startups cloning sites (Amazon, Gmail, flight booking) to train AI agents; Farlow quote: 'We want to build training environments that capture entire jobs that people do.'"
        ],
        "missing": [
          "plato.so-specific funding (amount, round, date, investors), not cleanly disclosed publicly.",
          "Named/verified customers; SOC 2 or other certifications; security/trust page.",
          "Open roles count, remote policy, exact headcount growth, researcher count.",
          "Maturity stage (GA/beta), valuation, revenue."
        ],
        "conflicts": [
          "MAJOR NAME COLLISION: Search engines repeatedly conflate plato.so with an unrelated Berlin-based 'Plato' (AI OS for wholesale/distribution, founders Benedikt Nolte/Matthias Heinrich Morales/Oliver Birch) that raised $14.5M seed (Atomico, Cherry Ventures, Feb 2026) and ~$21M total. Those funding figures, investors, founders and HQ DO NOT belong to plato.so and were excluded.",
          "Additional unrelated 'Plato' entities: YC mentorship platform for engineering leaders; Plato social-gaming platform (San Jose, founded 2014, platoapp.com); Plato Systems. Care taken to attribute only plato.so-specific facts.",
          "Tracxn/PitchBook profiles partially blend the two Platos; treated their funding data as belonging to the Berlin company."
        ],
        "stale": [],
        "open_questions": [
          "Has plato.so raised an institutional round, and from whom? (Some sources label it pre-seed.)",
          "Are any frontier labs (OpenAI/Anthropic/Google DeepMind) confirmed Plato customers? Press implies labs buy from such startups but no Plato-specific confirmation.",
          "Does Plato hold any security certifications (SOC 2/ISO)?"
        ]
      }
    },
    {
      "rank": 24,
      "focus_areas_normalised": [
        "Enterprise Workflows",
        "Long-Horizon"
      ],
      "slug": "aichamp",
      "brand_name": "AIChamp",
      "segment": "Commercial vendors",
      "website": "https://aichamp.com/",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://aichamp.com/ (accessed 2026-06-07); blog post dated 2026-02-04"
      },
      "what_they_sell": {
        "value": "mixed",
        "confidence": "confirmed",
        "source": "https://aichamp.com/ (accessed 2026-06-07), custom RL environments / simulations ('Virtual Gyms') plus expert-sourced human data (SFT/RLHF/process supervision); experts recruited via 'Sniper Sourcing'"
      },
      "focus_areas": [
        "long-horizon / general reasoning",
        "enterprise workflows",
        "execution infrastructure"
      ],
      "positioning_summary": "AIChamp builds custom reinforcement-learning environments and 'Virtual Gym' simulations for training and evaluating tool-using AI agents on long-horizon, multi-step enterprise tasks, pairing engineered environments (agents operating in software like Slack, Notion, Linear) with domain experts who design and grade tasks (SFT/RLHF/process supervision). The company emphasizes deep industry authority and expert-sourced data, having pivoted from a remote-talent/hiring marketplace background.",
      "best_fit_use_case": "Buyers needing expert-graded, long-horizon enterprise-workflow RL environments where agents operate inside real business tools (Slack, Notion, Linear).",
      "founded_year": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "Conflicting: LinkedIn (https://www.linkedin.com/company/aichamp-finds-a-players) shows founded 2025; Tracxn (https://tracxn.com/d/companies/aichamp) lists 'Founded Year 2022', not reliably confirmed, accessed 2026-06-07"
      },
      "hq_location": {
        "value": "San Francisco, USA (CEO-based; reported, not officially confirmed; Tracxn alternatively lists Bali, Indonesia)",
        "confidence": "reported",
        "source": "Search snippets referencing CEO location (San Francisco) and Tracxn profile listing Bali, Indonesia, conflicting; not confirmed on official site, accessed 2026-06-07"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "distributed_remote": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://torre.ai/teams/AIChamp (accessed 2026-06-07) lists roles as Remote (anywhere); company sources global experts; CEO SF / Tracxn Bali suggests distributed footprint"
      },
      "current_headcount": {
        "value": "~2-10 (as of 2026-06-07)",
        "confidence": "estimated",
        "source": "https://www.linkedin.com/company/aichamp-finds-a-players public snippet shows '2-10 employees'; https://theorg.com/org/aichamp lists 5 named members in '1-10' band; https://torre.ai/teams/AIChamp shows 11 members, accessed 2026-06-07"
      },
      "headcount_band": {
        "value": "1-10",
        "confidence": "estimated",
        "source": "LinkedIn '2-10 employees' and TheOrg '1-10' band (5 named members), accessed 2026-06-07. Note: Torre shows 11 (likely incl. non-employee/expert contributors)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "Torre shows 2 jobs posted, both closed (accessed 2026-06-07); app.aichamp.com lists recruiting expert-trainer roles but count not reliably enumerable"
      },
      "has_researchers": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "Team described as 'alumni of OpenAI and xAI team' on own site (self-claimed); no named, verifiable research staff confirmed, accessed 2026-06-07"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Self-claimed 'alumni of OpenAI and xAI team' (vendor site, unverified)",
          "CEO/founder Vol Goloshuk previously founded BrightestMinds lead-generation / sales-development agency (reported)",
          "Mati Roy listed as CTO (per LinkedIn title / TheOrg roster)"
        ],
        "confidence": "reported",
        "source": "https://aichamp.com/ (self-claimed alumni); https://theorg.com/org/aichamp roster; LinkedIn profiles for Goloshuk and Roy, accessed 2026-06-07"
      },
      "maturity": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No GA/beta/preview status stated; appears early-stage and operational, accessed 2026-06-07"
      },
      "deployment_model": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "Described as service-based engagements with custom environments; specific deployment model not officially stated, accessed 2026-06-07"
      },
      "open_source": {
        "value": "no",
        "confidence": "estimated",
        "source": "No public repos or OSS offering found as of 2026-06-07"
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No security/trust page found (aichamp.com/security returns 404), accessed 2026-06-07"
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "https://aichamp.com/security/ returns HTTP 404, accessed 2026-06-07"
      },
      "total_raised": {
        "value": "$0 / no funding raised",
        "confidence": "reported",
        "source": "https://tracxn.com/d/companies/aichamp ('aiChamp has not raised any funding rounds yet') and search snippet describing it as 'an unfunded company', accessed 2026-06-07"
      },
      "last_round": {
        "value": "none",
        "confidence": "reported",
        "source": "https://tracxn.com/d/companies/aichamp, no funding rounds, accessed 2026-06-07"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [],
        "confidence": "reported",
        "source": "https://tracxn.com/d/companies/aichamp, no investors listed (unfunded), accessed 2026-06-07"
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_customers": {
        "value": [],
        "confidence": "unknown",
        "source": "No customer logos or names disclosed on official site; no third-party customer confirmation found, accessed 2026-06-07"
      },
      "published_papers_or_benchmarks": {
        "value": [],
        "confidence": "unknown",
        "source": "No papers or benchmarks published by AIChamp found; only company blog posts, accessed 2026-06-07"
      },
      "overall_confidence": "low",
      "sources": [
        {
          "url": "https://aichamp.com/",
          "accessed_date": "2026-06-07",
          "note": "Official site, RL environments, Virtual Gyms, SFT/RLHF/process supervision, Sniper Sourcing, MCP-style environments, self-claimed OpenAI/xAI alumni"
        },
        {
          "url": "https://aichamp.com/blog/",
          "accessed_date": "2026-06-07",
          "note": "Blog post 'Automating Project Management with RL Environments' dated 2026-02-04, author Vol Goloshuk"
        },
        {
          "url": "https://aichamp.com/security/",
          "accessed_date": "2026-06-07",
          "note": "HTTP 404, no security/trust page"
        },
        {
          "url": "https://www.linkedin.com/company/aichamp-finds-a-players",
          "accessed_date": "2026-06-07",
          "note": "Public snippet: ~14 employees, founded 2025, IT Services; MCP-style environments for tool-using agents"
        },
        {
          "url": "https://www.linkedin.com/in/goloshuk/",
          "accessed_date": "2026-06-07",
          "note": "Vol Goloshuk, Founder/CEO; prior BrightestMinds lead-gen agency"
        },
        {
          "url": "https://www.linkedin.com/in/matiroy/",
          "accessed_date": "2026-06-07",
          "note": "Mati Roy, listed CTO at AIChamp (long-horizon agents); current role shows Netholabs"
        },
        {
          "url": "https://torre.ai/teams/AIChamp",
          "accessed_date": "2026-06-07",
          "note": "11 members, remote roles, company description"
        },
        {
          "url": "https://theorg.com/org/aichamp",
          "accessed_date": "2026-06-07",
          "note": "Team roster: Goloshuk (CEO), Mati Roy (CTO), Vera Banko, Alexandra Leu, Pavlo Kosenko; headcount listed 1-10"
        },
        {
          "url": "https://app.aichamp.com/apply/jobs",
          "accessed_date": "2026-06-07",
          "note": "Recruiting/expert-trainer application portal"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "current_headcount",
          "was": "~11-14 (as of 2026-06-07)",
          "now": "~2-10 (as of 2026-06-07)",
          "reason": "LinkedIn public snippet shows '2-10 employees', not ~14; TheOrg lists 5 named members in the 1-10 band. The Torre figure of 11 likely includes non-employee expert contributors. Draft overstated headcount."
        },
        {
          "field": "headcount_band",
          "was": "11-50",
          "now": "1-10",
          "reason": "Both LinkedIn ('2-10 employees') and TheOrg ('1-10') place the company in the 1-10 band. 11-50 was an overreach inconsistent with the primary directory snippets."
        },
        {
          "field": "total_raised",
          "was": "unknown",
          "now": "$0 / no funding raised (reported)",
          "reason": "Tracxn explicitly states 'aiChamp has not raised any funding rounds yet' and a search snippet describes it as 'an unfunded company'. Upgraded from unknown to reported 'no funding' based on two consistent secondary sources."
        },
        {
          "field": "last_round",
          "was": "unknown",
          "now": "none (reported)",
          "reason": "Consistent with unfunded status per Tracxn; no rounds recorded."
        },
        {
          "field": "notable_investors",
          "was": "[] (unknown)",
          "now": "[] (reported)",
          "reason": "Tracxn shows no investors and unfunded status, so the empty list is now affirmatively reported rather than merely unknown."
        },
        {
          "field": "founded_year source",
          "was": "LinkedIn 2025 vs secondary 2024",
          "now": "LinkedIn 2025 vs Tracxn 2022",
          "reason": "Re-verification found Tracxn lists founding year 2022 (not 2024). Value remains 'unknown' due to genuine conflict; source detail corrected."
        },
        {
          "field": "hq_location",
          "was": "San Francisco, USA (CEO-based; reported)",
          "now": "San Francisco, USA (CEO-based), conflicting with Tracxn 'Bali, Indonesia'",
          "reason": "Tracxn lists Bali, Indonesia as location, conflicting with CEO-based SF snippet. Flagged conflict; kept reported/low confidence rather than asserting SF."
        }
      ],
      "verification_summary": "Confirmed this is the CORRECT company matching the directory note 'expert-graded simulations at depth': aichamp.com sells custom RL 'Virtual Gym' environments plus expert-sourced grading/data (SFT/RLHF/process supervision), with experts recruited via 'Sniper Sourcing'. Flagged a same-name trap: aichamp.in (an unrelated entity) and the company's own prior hiring-marketplace identity, the relevant RL vendor is aichamp.com. Key downgrades/corrections: headcount cut from 11-50/~14 to 1-10/~2-10 (LinkedIn '2-10 employees', TheOrg '1-10'), startup is small, not mid-size. Funding clarified to reported 'no funding raised / unfunded' per Tracxn and a search snippet (two secondary sources). No customers are disclosed anywhere; OpenAI/xAI is a SELF-CLAIMED team-alumni statement, not a customer or partnership tie, not verified. No SOC2/trust page (security page 404). Founding year remains unknown due to conflict (LinkedIn 2025 vs Tracxn 2022); HQ remains low-confidence reported (SF CEO vs Tracxn Bali). Overall confidence: low.",
      "research_notes": {}
    },
    {
      "rank": 25,
      "focus_areas_normalised": [
        "Coding",
        "Computer Use",
        "Enterprise Workflows"
      ],
      "slug": "habitat-inc",
      "brand_name": "Habitat Inc",
      "segment": "Commercial vendors",
      "website": "https://www.habitat.inc/",
      "status": {
        "value": "active",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/habitat-ai-inc (accessed 2026-06-07); https://www.habitat.inc/ (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "hq_location": {
        "value": "New York, NY, USA",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/habitat-ai-inc (accessed 2026-06-07), public page lists address 116 E 30th St, New York, NY 10016"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": "SF was inferred from a single founder LinkedIn profile location (https://www.linkedin.com/in/maximenis/, accessed 2026-06-07); insufficient to assert a company office, downgraded to unknown"
      },
      "headcount_band": {
        "value": "1-10",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/habitat-ai-inc (accessed 2026-06-07), '2-10 employees'"
      },
      "current_headcount": {
        "value": "2-10 (approx 5 associated on LinkedIn)",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/habitat-ai-inc (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "what_they_sell": {
        "value": "environments",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/habitat-ai-inc (accessed 2026-06-07), 'RL environments for white-collar work'; https://alignlist.com/guides/top-40-rl-environments-startups-and-companies (accessed 2026-06-07); https://www.chemistry.vc/post/rl-reigns-supreme (accessed 2026-06-07)"
      },
      "deployment_model": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "maturity": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_source": {
        "value": "no",
        "confidence": "estimated",
        "source": "No public GitHub org or OSS repos found for habitat.inc as of 2026-06-07"
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "distributed_remote": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "has_researchers": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Maxim Enis (co-founder), Williams College '24; prior Ramp association per LinkedIn; co-author (with Mark Hopkins, Williams) of arXiv:2404.13813 'From LLM to NMT: Advancing Low-Resource Machine Translation with Claude' (2024, academic, predates company)",
          "Andrew Megalaa (co-founder), Williams College '24"
        ],
        "confidence": "reported",
        "source": "https://www.linkedin.com/in/maximenis/ (accessed 2026-06-07); https://arxiv.org/abs/2404.13813 (accessed 2026-06-07); https://www.instagram.com/p/C6ra5D1PrGo/ (accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [],
        "confidence": "unknown",
        "source": "No company-published papers or benchmarks found. Co-founder M. Enis authored arXiv:2404.13813 academically prior to Habitat; not attributed to the company"
      },
      "total_raised": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No Crunchbase/press/announcement found for habitat.inc as of 2026-06-07; Crunchbase 'Habitat' hits are unrelated entities"
      },
      "last_round": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_customers": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "focus_areas": [
        "enterprise workflows",
        "computer use environments",
        "coding environments"
      ],
      "positioning_summary": "Habitat Inc is an early-stage commercial vendor (2-10 employees, New York HQ) building reinforcement-learning environments for white-collar / work automation, with stated focus on code and desktop-style (computer use) interaction tasks for training agentic AI models. It appears in third-party listings of RL-environment suppliers serving AI labs. No funding, customer, or certification information is publicly available.",
      "best_fit_use_case": "Buyers needing RL environments that simulate enterprise/desktop and coding workflows to post-train computer-use and coding agents.",
      "overall_confidence": "low",
      "sources": [
        {
          "url": "https://www.habitat.inc/",
          "accessed_date": "2026-06-07",
          "note": "Official site; JS-rendered, no extractable marketing copy via fetch"
        },
        {
          "url": "https://www.linkedin.com/company/habitat-ai-inc",
          "accessed_date": "2026-06-07",
          "note": "Public company page: '2-10 employees', tagline 'RL environments for white-collar work', NY address, website habitat.inc"
        },
        {
          "url": "https://www.linkedin.com/in/maximenis/",
          "accessed_date": "2026-06-07",
          "note": "Maxim Enis, at Habitat Inc, SF, Williams College 2020-2024, prior Ramp connection; ML/translation research"
        },
        {
          "url": "https://www.linkedin.com/in/andrewmegalaa/",
          "accessed_date": "2026-06-07",
          "note": "Andrew Megalaa, at Habitat Inc (profile fetch blocked HTTP 999; confirmed via search snippet)"
        },
        {
          "url": "https://www.instagram.com/p/C6ra5D1PrGo/",
          "accessed_date": "2026-06-07",
          "note": "References Maxim Enis '24 and Andrew Megalaa as winning Williams College students"
        },
        {
          "url": "https://alignlist.com/guides/top-40-rl-environments-startups-and-companies",
          "accessed_date": "2026-06-07",
          "note": "Lists Habitat (habitat.inc), domain 'Code, Computer Use', 'targets code and desktop-style interaction environments for practical agent workflows'"
        },
        {
          "url": "https://newsletter.semianalysis.com/p/rl-environments-and-rl-for-science",
          "accessed_date": "2026-06-07",
          "note": "Names Habitat among RL-environment companies (with DeepTune, Fleet, Vmax, Turing, Mechanize, Preference Model, Bespoke Labs, Veris.ai)"
        },
        {
          "url": "https://www.chemistry.vc/post/rl-reigns-supreme",
          "accessed_date": "2026-06-07",
          "note": "Describes Habitat as building 'hundreds of diverse, programmatically verifiable problems just out of reach of current models'"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "other_locations",
          "was": "[\"San Francisco, CA (founders/employees listed in SF on LinkedIn)\"] (estimated)",
          "now": "[] (unknown)",
          "reason": "SF was inferred solely from one founder's LinkedIn profile location. A single profile location is not adequate evidence of a company office; HQ is New York. Downgraded to unknown per do-not-overreach rule."
        },
        {
          "field": "researcher_backgrounds",
          "was": "Listed paper without clarifying it is a personal/academic publication; co-author unstated",
          "now": "Clarified paper is academic (co-authored with Mark Hopkins, Williams), predates the company, and labeled founders explicitly",
          "reason": "Accuracy/attribution: the arXiv paper co-author is Mark Hopkins (advisor), not Andrew Megalaa; the paper is academic and pre-company, so it should not imply company research output."
        },
        {
          "field": "published_papers_or_benchmarks",
          "was": "[] with empty source",
          "now": "[] with source note explaining the founder's academic paper is not attributed to the company",
          "reason": "Added provenance note so the empty value is defensible; the founder's arXiv paper is academic and not a Habitat company publication/benchmark."
        },
        {
          "field": "total_raised",
          "was": "unknown with empty source",
          "now": "unknown with source note documenting the negative search result",
          "reason": "Documented that an active search of Crunchbase/press returned no funding for habitat.inc (only unrelated 'Habitat' entities), strengthening the unknown determination."
        },
        {
          "field": "what_they_sell",
          "was": "source cited LinkedIn + alignlist",
          "now": "added chemistry.vc corroboration",
          "reason": "Strengthened sourcing with an additional independent third-party reference describing the RL-environments product; value/confidence unchanged."
        },
        {
          "field": "positioning_summary",
          "was": "Original summary",
          "now": "Added headcount/HQ specifics and explicit note that funding/customer/certification data is unavailable",
          "reason": "Improve neutrality and signal data gaps to procurement readers; no claims added beyond verified facts."
        }
      ],
      "verification_summary": "Independently re-verified. Company identity CONFIRMED as the correct entity matching the directory note 'RL for the automation of work': LinkedIn tagline 'RL environments for white-collar work', plus alignlist, SemiAnalysis, and chemistry.vc all describe habitat.inc as an RL-environments vendor for code/computer-use/work-automation tasks. Headcount (2-10 / band 1-10) and NY HQ confirmed via LinkedIn public page. ALL funding fields (total_raised, last_round, valuation, investors) remain unknown, no Crunchbase, press, or announcement exists for this Habitat; same-named Crunchbase results are unrelated companies. Customers, SOC2, and certifications all unknown/empty, no trust page or third-party confirmation; no frontier-lab customer tie found. Corrected an over-asserted SF office (single-profile inference → unknown) and tightened the founder research-paper attribution (academic, co-authored with Mark Hopkins, predates company; not a company benchmark). Founded year remains unknown. Overall confidence: low.",
      "research_notes": {
        "found": [
          "Confirmed correct company: habitat.inc, LinkedIn /company/habitat-ai-inc, RL environments for white-collar/work automation (matches directory note + Enterprise/Computer Use tags)",
          "LinkedIn tagline 'RL environments for white-collar work'; AlignList domain 'Code, Computer Use'",
          "Headcount band 2-10 (LinkedIn)",
          "HQ listed as New York, NY on LinkedIn",
          "Founders/team: Maxim Enis and Andrew Megalaa, both Williams College class of 2024; Enis SF-based with prior Ramp tie",
          "Named in multiple RL-environment market maps (SemiAnalysis, Chemistry VC, AlignList)"
        ],
        "missing": [
          "Founding year",
          "Funding / investors / round / valuation (no credible source found)",
          "Customers (none disclosed publicly)",
          "Security/SOC2/ISO posture and trust page",
          "Deployment model, maturity stage, open roles count",
          "Researcher count and whether dedicated research function exists"
        ],
        "conflicts": [
          "HQ location: LinkedIn company page shows New York, NY, but founder/employee profiles are SF-based, actual operating HQ ambiguous",
          "Multiple unrelated 'Habitat AI' entities exist (e.g., a $16M renewable-powered data-center JV by Larmag/Solgen; Meta's open-source 'AI Habitat' embodied-AI simulator; Habitat-lab; Habitat Logistics), all excluded as name collisions"
        ],
        "stale": [],
        "open_questions": [
          "Is the company venture-backed / YC-affiliated? No public funding record located.",
          "Exact founding year and founder titles (CEO/CTO) unconfirmed.",
          "Does Habitat sell environments directly to frontier labs, and which (if any)?"
        ]
      }
    },
    {
      "rank": null,
      "focus_areas_normalised": [
        "Coding",
        "Computer Use",
        "Enterprise Workflows",
        "Long-Horizon"
      ],
      "slug": "scale-ai",
      "brand_name": "Scale AI",
      "segment": "Incumbents also building RL environments",
      "website": "https://scale.com",
      "status": {
        "value": "active (independent; Meta holds ~49% non-voting minority stake as of June 2025)",
        "confidence": "confirmed",
        "source": "https://techcrunch.com/2025/06/13/scale-ai-confirms-significant-investment-from-meta-says-ceo-alexandr-wang-is-leaving/ (accessed 2026-06-07); https://www.cnbc.com/2025/11/04/scale-ais-life-after-meta-has-been-rocky-cfo-insists-not-a-zombie.html"
      },
      "founded_year": {
        "value": 2016,
        "confidence": "confirmed",
        "source": "https://en.wikipedia.org/wiki/Scale_AI (accessed 2026-06-07); founded 2016 by Alexandr Wang and Lucy Guo via Y Combinator"
      },
      "hq_location": {
        "value": "San Francisco, California, USA",
        "confidence": "confirmed",
        "source": "https://scale.com/about (accessed 2026-06-07); https://en.wikipedia.org/wiki/Scale_AI"
      },
      "other_locations": {
        "value": [
          "St. Louis, Missouri, USA (opened 2022)"
        ],
        "confidence": "reported",
        "source": "https://en.wikipedia.org/wiki/Scale_AI (accessed 2026-06-07)"
      },
      "current_headcount": {
        "value": "~1,000-1,200 (late 2025); reduced after July 2025 layoff of ~200 (14%) from a ~1,400 global FTE workforce, plus ~500 contractors cut",
        "confidence": "reported",
        "source": "https://techcrunch.com/2025/07/16/scale-ai-lays-off-14-of-staff-largely-in-data-labeling-business/ (accessed 2026-06-07); https://www.cnbc.com/2025/11/04/scale-ais-life-after-meta-has-been-rocky-cfo-insists-not-a-zombie.html (CFO references '1,000-plus person company')"
      },
      "headcount_band": {
        "value": "200+",
        "confidence": "confirmed",
        "source": "https://techcrunch.com/2025/07/16/scale-ai-lays-off-14-of-staff-largely-in-data-labeling-business/ (accessed 2026-06-07); multiple sources cite ~1,000-1,400 FTE"
      },
      "headcount_growth": {
        "value": "~14% FTE reduction in July 2025 (~200 cut from ~1,400) plus ~500 contractors; interim CEO signaled intent to staff up enterprise and government sales units in H2 2025",
        "confidence": "reported",
        "source": "https://techcrunch.com/2025/07/16/scale-ai-lays-off-14-of-staff-largely-in-data-labeling-business/ (accessed 2026-06-07)"
      },
      "open_roles_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://en.wikipedia.org/wiki/Scale_AI (accessed 2026-06-07); Scale operates the SEAL research lab publishing model evaluations/leaderboards (not stated on scale.com/rlenvironments)"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "what_they_sell": {
        "value": "mixed (human data / data labeling, RL environments, model evaluations, AI infrastructure / applications)",
        "confidence": "confirmed",
        "source": "https://scale.com/about (accessed 2026-06-07); https://scale.com/rlenvironments"
      },
      "deployment_model": {
        "value": "managed-hosted (vendor-operated environments/infrastructure to run agent training and evaluation; simulated APIs, MCP servers, GUIs)",
        "confidence": "reported",
        "source": "https://scale.com/rlenvironments (accessed 2026-06-07)"
      },
      "maturity": {
        "value": "GA (RL Environments product publicly offered)",
        "confidence": "reported",
        "source": "https://scale.com/rlenvironments (accessed 2026-06-07); https://techcrunch.com/2025/09/21/silicon-valley-bets-big-on-environments-to-train-ai-agents/"
      },
      "open_source": {
        "value": "no",
        "confidence": "estimated",
        "source": "https://scale.com/rlenvironments (accessed 2026-06-07); offered as a commercial managed product, no OSS license indicated"
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "total_raised": {
        "value": "~$1.6B in disclosed equity rounds prior to Meta; Meta's June 2025 ~$14.3B was a strategic stake purchase (largely secondary/cash to existing holders), not a conventional primary funding round, so combining them into a single ~$15.9B 'raised' figure overstates capital raised",
        "confidence": "reported",
        "source": "https://techcrunch.com/2025/06/13/scale-ai-confirms-significant-investment-from-meta-says-ceo-alexandr-wang-is-leaving/ (accessed 2026-06-07); https://techcrunch.com/2024/05/21/data-labeling-startup-scale-ai-raises-1b-as-valuation-doubles-to-13-8b/"
      },
      "last_round": {
        "value": "Meta strategic investment, ~$14.3B for ~49% non-voting stake, June 2025 (valuing Scale at ~$29B); prior Series F ~$1B in May 2024 at ~$13.8B valuation",
        "confidence": "reported",
        "source": "https://techcrunch.com/2025/06/13/scale-ai-confirms-significant-investment-from-meta-says-ceo-alexandr-wang-is-leaving/ (accessed 2026-06-07); https://techcrunch.com/2024/05/21/data-labeling-startup-scale-ai-raises-1b-as-valuation-doubles-to-13-8b/"
      },
      "valuation": {
        "value": "~$29B (post Meta June 2025 investment)",
        "confidence": "reported",
        "source": "https://techcrunch.com/2025/06/13/scale-ai-confirms-significant-investment-from-meta-says-ceo-alexandr-wang-is-leaving/ (accessed 2026-06-07); corroborated by Bloomberg and multiple outlets"
      },
      "notable_investors": {
        "value": [
          "Meta Platforms",
          "Accel",
          "Amazon",
          "Nvidia",
          "Founders Fund",
          "Index Ventures",
          "Tiger Global Management",
          "Dragoneer Investment Group",
          "Greenoaks",
          "Y Combinator"
        ],
        "confidence": "reported",
        "source": "https://en.wikipedia.org/wiki/Scale_AI (accessed 2026-06-07); https://news.crunchbase.com/ai/scale-holistic-raise-big-accel-nvda-amzn/"
      },
      "revenue_signals": {
        "value": "~$870M revenue in 2024 (reported); company guided to ~$2B revenue for 2025 (Bloomberg, Apr 2025), vendor/forward guidance, not audited",
        "confidence": "reported",
        "source": "https://www.bloomberg.com/news/articles/2025-04-02/scale-ai-expects-to-more-than-double-sales-to-2-billion-in-2025 (accessed 2026-06-07); https://en.wikipedia.org/wiki/Scale_AI"
      },
      "notable_customers": {
        "value": [
          {
            "name": "OpenAI (former / winding down work after Meta deal)",
            "verification": "verified",
            "frontier_lab_tie": true
          },
          {
            "name": "Google / Alphabet (former / cutting ties after Meta deal; reportedly Scale's largest customer)",
            "verification": "verified",
            "frontier_lab_tie": true
          },
          {
            "name": "xAI (reportedly paused work after Meta deal; press-reported, not firmly confirmed)",
            "verification": "self-claimed",
            "frontier_lab_tie": true
          },
          {
            "name": "Microsoft (press-reported customer; relationship reportedly affected post-deal, not firmly confirmed)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "U.S. Department of Defense",
            "verification": "verified",
            "frontier_lab_tie": false
          },
          {
            "name": "General Motors",
            "verification": "verified",
            "frontier_lab_tie": false
          }
        ],
        "confidence": "reported",
        "source": "https://fortune.com/2025/06/19/openai-is-phasing-out-scale-ai-work-following-startups-meta-deal/ (accessed 2026-06-07); https://www.cnbc.com/2025/11/04/scale-ais-life-after-meta-has-been-rocky-cfo-insists-not-a-zombie.html, NOTE: OpenAI, Google, and xAI reported winding down/pausing work post-Meta deal; Meta REMOVED from customer list (it is now a ~49% investor/competitor, not appropriately a current customer)"
      },
      "soc2": {
        "value": "Type II",
        "confidence": "confirmed",
        "source": "https://scale.com/security (accessed 2026-06-07); page states 'SOC 2 Type II'; reports available via trust.scale.com"
      },
      "other_certifications": {
        "value": [
          "ISO/IEC 27001:2022",
          "FedRAMP High Authorized",
          "DoD IL4 Provisional Authorization (DISA)"
        ],
        "confidence": "confirmed",
        "source": "https://scale.com/security (accessed 2026-06-07), HIPAA REMOVED: not listed on the current security page (page omits HIPAA); only verifiable via separate older blog claim"
      },
      "security_page": {
        "value": "https://scale.com/security (Trust Center: https://trust.scale.com/)",
        "confidence": "confirmed",
        "source": "https://scale.com/security (accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "Scale operates the SEAL (Safety, Evaluations and Alignment Lab) research group publishing model evaluations and public leaderboards (e.g., Humanity's Last Exam, SEAL leaderboards)"
        ],
        "confidence": "reported",
        "source": "https://en.wikipedia.org/wiki/Scale_AI (accessed 2026-06-07); https://techcrunch.com/2025/09/21/silicon-valley-bets-big-on-environments-to-train-ai-agents/"
      },
      "focus_areas": [
        "coding environments",
        "computer use environments",
        "browser environments",
        "enterprise workflows",
        "evaluation / benchmarks",
        "execution infrastructure",
        "long-horizon / general reasoning"
      ],
      "positioning_summary": "Scale AI is the data-labeling and AI-data incumbent that has extended into RL environments, offering simulated web apps, macOS/Windows-like desktop VMs, and MCP-tool environments (Slack, HubSpot, Linear) with expert-designed objectives, rubrics, and automated verifiers to train and evaluate agents on long-horizon professional workflows. Following Meta's ~$14.3B June 2025 investment (~49% non-voting stake) and founder Alexandr Wang's departure to Meta, several frontier-lab customers (OpenAI, Google, xAI) reportedly scaled back or paused engagement over conflict-of-interest concerns.",
      "best_fit_use_case": "Enterprises and government buyers (and labs without a Meta conflict concern) wanting an established, security-certified vendor to supply expert-built, verifiable RL environments and human data for agent training across coding, computer-use, browser, and enterprise-tool workflows.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://scale.com/rlenvironments",
          "accessed_date": "2026-06-07",
          "note": "RL Environments product page: web apps, desktop VMs, MCP servers (Slack/HubSpot/Linear), verifiable rewards, managed infra"
        },
        {
          "url": "https://scale.com/blog/rl-environments",
          "accessed_date": "2026-06-07",
          "note": "Blog: The Next Frontier of Data Training: RL Environments"
        },
        {
          "url": "https://scale.com/about",
          "accessed_date": "2026-06-07",
          "note": "Company about page; HQ San Francisco"
        },
        {
          "url": "https://scale.com/security",
          "accessed_date": "2026-06-07",
          "note": "SOC 2 Type II, ISO 27001:2022, FedRAMP High, DoD IL4; Trust Center trust.scale.com"
        },
        {
          "url": "https://scale.com/blog/soc2-hipaa",
          "accessed_date": "2026-06-07",
          "note": "SOC 2 Type II and HIPAA compliance announcement"
        },
        {
          "url": "https://scale.com/blog/scale-ai-announces-next-phase-of-company-evolution",
          "accessed_date": "2026-06-07",
          "note": "Meta investment, ~$29B valuation, Wang to Meta, Jason Droege interim CEO"
        },
        {
          "url": "https://techcrunch.com/2025/06/13/scale-ai-confirms-significant-investment-from-meta-says-ceo-alexandr-wang-is-leaving/",
          "accessed_date": "2026-06-07",
          "note": "Meta ~$14.3B, ~49% stake, ~$29B valuation, Wang departure"
        },
        {
          "url": "https://techcrunch.com/2025/09/21/silicon-valley-bets-big-on-environments-to-train-ai-agents/",
          "accessed_date": "2026-06-07",
          "note": "Industry context on RL environments; Scale among vendors"
        },
        {
          "url": "https://www.cnbc.com/2025/07/16/scale-ai-cuts-14percent-of-workforce-after-meta-investment-hiring-of-wang.html",
          "accessed_date": "2026-06-07",
          "note": "14% layoff (~200 FTE) from ~1,400 workforce; OpenAI/Google/xAI scaling back"
        },
        {
          "url": "https://techcrunch.com/2024/05/21/data-labeling-startup-scale-ai-raises-1b-as-valuation-doubles-to-13-8b/",
          "accessed_date": "2026-06-07",
          "note": "Series F $1B May 2024 at $13.8B valuation"
        },
        {
          "url": "https://news.crunchbase.com/ai/scale-holistic-raise-big-accel-nvda-amzn/",
          "accessed_date": "2026-06-07",
          "note": "Series F investors: Accel, Amazon, Nvidia, Meta etc."
        },
        {
          "url": "https://en.wikipedia.org/wiki/Scale_AI",
          "accessed_date": "2026-06-07",
          "note": "Founded 2016 (Wang, Guo), HQ SF, ~$870M 2024 revenue, customers, investors, 2023 layoff"
        },
        {
          "url": "https://www.linkedin.com/company/scaleai",
          "accessed_date": "2026-06-07",
          "note": "Public LinkedIn company page (headcount band varies by source)"
        },
        {
          "url": "https://fortune.com/2025/06/14/self-made-billionaire-college-dropout-alexandr-wang-signs-14-3-billion-deal-to-bolster-metas-ai-efforts-theres-a-huge-premium-to-naivete/",
          "accessed_date": "2026-06-07",
          "note": "Meta $14.3B deal context"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "overall_confidence",
          "was": "high",
          "now": "medium",
          "reason": "Several decision-relevant fields rest on single-source/reported figures (headcount estimate, $2B revenue guidance, customer status in flux). 'High' overstated certainty."
        },
        {
          "field": "notable_customers",
          "was": "Meta listed as verified customer; OpenAI/Google verified with no caveat; xAI self-claimed; Microsoft verified",
          "now": "Meta removed (now a ~49% investor/competitor, not a current customer); OpenAI and Google relabeled as former/winding-down (third-party verified); xAI and Microsoft set to self-claimed/press-reported with caveats",
          "reason": "Listing Meta as a customer is misleading given it is a major investor and rival. OpenAI/Google are credibly third-party reported to be exiting, so presenting them as steady customers misrepresents current state. xAI/Microsoft relationships are press-reported, not firmly verified, so they cannot be 'verified.'"
        },
        {
          "field": "other_certifications",
          "was": "includes 'HIPAA compliance (claimed)' at confidence confirmed",
          "now": "HIPAA removed; list is ISO/IEC 27001:2022, FedRAMP High, DoD IL4",
          "reason": "The current scale.com/security page does not list HIPAA (verified by fetch). HIPAA appears only on an older blog claim, so it cannot be 'confirmed.'"
        },
        {
          "field": "total_raised",
          "was": "~$15.9B total disclosed (incl. ~$14.3B from Meta), confidence reported",
          "now": "~$1.6B equity raised pre-Meta; Meta's ~$14.3B characterized as a strategic stake purchase (largely secondary), not primary capital raised, so combining overstates 'raised'",
          "reason": "Meta's $14.3B was largely a purchase of equity/secondary, not a primary fundraising round; conflating the two inflates the total-raised figure."
        },
        {
          "field": "revenue_signals",
          "was": "~$870M 2024; ~$2B ARR 2025 cited by third-party trackers (unverified)",
          "now": "~$870M 2024 (reported); ~$2B revenue is the COMPANY's own 2025 guidance per Bloomberg (forward guidance, not audited)",
          "reason": "The $2B figure is company guidance reported by Bloomberg, not an independent third-party tracker estimate; sourcing corrected for accuracy."
        },
        {
          "field": "status",
          "was": "active (company independent after June 2025 Meta minority investment)",
          "now": "active (independent; Meta holds ~49% non-voting minority stake)",
          "reason": "Clarified the stake size and added Nov 2025 corroboration; substance unchanged but made the minority-stake structure explicit."
        },
        {
          "field": "headcount_band confidence",
          "was": "reported",
          "now": "confirmed",
          "reason": "Multiple credible sources independently put FTE at ~1,000-1,400, well above 200, so the band is firmly supported."
        },
        {
          "field": "has_researchers source",
          "was": "cited scale.com/rlenvironments as evidence of SEAL/research",
          "now": "source corrected; rlenvironments page does NOT mention SEAL or researchers (verified by fetch); SEAL evidenced via Wikipedia/press",
          "reason": "The cited product page contains no researcher/SEAL claim; source replaced with accurate references."
        },
        {
          "field": "published_papers_or_benchmarks source",
          "was": "cited rlenvironments page",
          "now": "sourced to Wikipedia/press; added examples (Humanity's Last Exam, SEAL leaderboards)",
          "reason": "rlenvironments page does not reference SEAL or publications; corrected sourcing."
        }
      ],
      "verification_summary": "Confirmed this is the correct entity (the labeling incumbent Scale AI / scale.com, founded 2016 by Alexandr Wang and Lucy Guo) matching the note 'the labeling incumbent adapting.' Funding: Meta's ~$14.3B for ~49% non-voting stake at ~$29B valuation (June 2025) is multi-source confirmed; kept as 'reported' and flagged that combining it with prior ~$1.6B equity into a ~$15.9B 'raised' figure overstates capital raised (the Meta deal was largely a stake purchase). Layoffs (~14%/~200 FTE + ~500 contractors from ~1,400) confirmed via TechCrunch/CNBC/Bloomberg; headcount band 200+ upgraded to confirmed. Biggest corrections are on customers: Meta removed (now an investor/competitor, not a current customer); OpenAI and Google are credibly third-party reported to be winding down/cutting ties and relabeled accordingly; xAI and Microsoft set to self-claimed/press-reported. Certifications: SOC 2 Type II, ISO 27001:2022, FedRAMP High, and DoD IL4 verified on scale.com/security, but HIPAA is NOT on the current security page and was removed from the 'confirmed' list. Revenue: ~$870M 2024 reported; ~$2B for 2025 corrected to company forward guidance (Bloomberg), not audited. Overall confidence lowered from high to medium given customer churn and reliance on reported/guidance figures.",
      "research_notes": {
        "found": [
          "RL Environments product live on scale.com (web apps, macOS/Windows desktop VMs, MCP servers: Slack/HubSpot/Linear; verifiable process+outcome rewards)",
          "Reported that nearly half of Scale's new data training projects involve RL environments",
          "Meta June 2025 ~$14.3B investment for ~49% non-voting stake, ~$29B valuation; Alexandr Wang departed to Meta; Jason Droege interim CEO",
          "July 2025 ~14% layoff (~200 FTE) from ~1,400 workforce",
          "Security: SOC 2 Type II, ISO 27001:2022, FedRAMP High, DoD IL4, claimed HIPAA",
          "Founded 2016 (Wang, Guo), HQ San Francisco, ~$870M 2024 revenue"
        ],
        "missing": [
          "Exact current headcount (sources range 1,200-1,400; LinkedIn band inconsistent)",
          "Open roles count",
          "Researcher count and specific researcher backgrounds for the RL/SEAL teams",
          "Whether RL Environments is offered self-hosted vs managed-only (deployment inferred as managed-hosted)",
          "RL Environments specific launch date / maturity label (beta vs GA)"
        ],
        "conflicts": [
          "Headcount: Wikipedia ~1,200 (2025) vs CNBC ~1,400 pre-layoff; LinkedIn band cited as 501-1,000 by one aggregator and ~4,000 by another (likely includes contractor/Remotasks counts)",
          "Total raised: aggregators cite ~$15.9B incl. Meta; pre-Meta equity ~$1.6B",
          "Customer status: OpenAI/Google/Meta/Microsoft historically verified customers, but OpenAI, Google, and xAI reported to be winding down or cutting ties after the Meta deal, current customer status uncertain"
        ],
        "stale": [
          "2024 revenue figure (~$870M) is >12 months old",
          "Series F valuation ($13.8B, May 2024) superseded by June 2025 Meta deal"
        ],
        "open_questions": [
          "How much of Scale's frontier-lab customer base remains active for RL environments after the Meta investment conflict-of-interest concerns?",
          "Is RL Environments a fully GA commercial product with self-serve, or engagement-based managed delivery?",
          "Does Meta's ~49% stake affect Scale's positioning as a neutral RL-environment vendor for competing labs?"
        ]
      }
    },
    {
      "rank": null,
      "focus_areas_normalised": [
        "Coding"
      ],
      "slug": "modal",
      "brand_name": "Modal",
      "segment": "Adjacent: execution infrastructure",
      "website": "https://modal.com",
      "focus_areas": [
        "execution infrastructure",
        "coding environments"
      ],
      "what_they_sell": {
        "value": "infra",
        "confidence": "confirmed",
        "source": "https://modal.com/ (accessed 2026-06-07)"
      },
      "positioning_summary": "Modal (Modal Labs) is a New York-based, Python-native serverless cloud purpose-built for AI/ML workloads, providing on-demand GPU/CPU compute, fast-booting sandboxed containers, inference, fine-tuning, and code execution. It is execution infrastructure rather than an RL-environment vendor, but is used to run reinforcement-learning training and large fleets of parallel sandboxed environments for AI labs.",
      "best_fit_use_case": "Buyers needing serverless, autoscaling compute to run untrusted code, RL training loops, and thousands of parallel sandboxed agent/coding environments without managing infrastructure.",
      "maturity": {
        "value": "GA",
        "confidence": "confirmed",
        "source": "https://modal.com/ (accessed 2026-06-07)"
      },
      "deployment_model": {
        "value": "managed-hosted (serverless cloud); Python/JS/TS/Go SDKs and API",
        "confidence": "confirmed",
        "source": "https://modal.com/ ; https://github.com/modal-labs (accessed 2026-06-07)"
      },
      "open_source": {
        "value": "no",
        "confidence": "confirmed",
        "source": "https://modal.com/ (accessed 2026-06-07), core serverless platform is proprietary/managed; only client SDKs are open source"
      },
      "license": {
        "value": "Client SDKs open source (Python client Apache-2.0, JS/TS/Go libmodal MIT, examples MIT); platform itself proprietary",
        "confidence": "confirmed",
        "source": "https://github.com/modal-labs/modal-client ; https://github.com/modal-labs/libmodal (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": 2021,
        "confidence": "confirmed",
        "source": "https://techcrunch.com/2026/02/11/ai-inference-startup-modal-labs-in-talks-to-raise-at-2-5b-valuation-sources-say/ ; founded Jan 2021 corroborated by startupintros.com / Sacra (accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "New York, NY, USA",
        "confidence": "confirmed",
        "source": "https://modal.com/company (accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [
          "San Francisco, CA, USA",
          "Stockholm, Sweden"
        ],
        "confidence": "confirmed",
        "source": "https://modal.com/company ; https://modal.com/blog/modal-series-c (accessed 2026-06-07)"
      },
      "current_headcount": {
        "value": "120+ team members (official Series C blog, May 2026); ~153 employees per LinkedIn/Tracxn snippet (as of April 2026)",
        "confidence": "reported",
        "source": "https://modal.com/blog/modal-series-c (120+); LinkedIn https://www.linkedin.com/company/modal-labs and Tracxn ~153 (accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "51-200",
        "confidence": "confirmed",
        "source": "LinkedIn/Tracxn ~153 employees ; https://modal.com/blog/modal-series-c (120+) (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "30+ open roles across NYC, SF, Stockholm",
        "confidence": "reported",
        "source": "https://modal.com/company (accessed 2026-06-07)"
      },
      "has_researchers": {
        "value": "no",
        "confidence": "estimated",
        "source": "https://modal.com/company (accessed 2026-06-07), infrastructure/engineering company; no research division advertised, though founders/engineers have strong ML backgrounds"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "CEO Erik Bernhardsson: ex-Spotify (music recommendations), ex-CTO Better.com, creator of open-source Annoy and Luigi",
          "CTO Akshat Bubna: ex-Scale AI engineer, MIT"
        ],
        "confidence": "reported",
        "source": "https://erikbern.com/about.html ; https://techcrunch.com/2026/02/11/ai-inference-startup-modal-labs-in-talks-to-raise-at-2-5b-valuation-sources-say/ ; startupintros.com founder profiles (accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "total_raised": {
        "value": "~$466M total disclosed",
        "confidence": "reported",
        "source": "https://modal.com/company ; corroborated by Sacra and startupintros.com (accessed 2026-06-07)"
      },
      "last_round": {
        "value": "Series C, $355M, closed ~2026-05-21",
        "confidence": "confirmed",
        "source": "https://modal.com/blog/modal-series-c ; https://siliconangle.com/2026/05/21/serverless-ai-infrastructure-startup-modal-labs-seals-355m-funding-round/ ; https://www.datacenterdynamics.com/en/news/modal-labs-secures-funding/ ; https://www.finsmes.com/2026/05/modal-raises-355m-in-series-c-funding-at-post-money-valuation-of-4-65-billion.html (accessed 2026-06-07)"
      },
      "valuation": {
        "value": "$4.65B post-money (Series C, May 2026); first tranche priced at $2.5B, second at $4.65B; up from $1.1B (Series B)",
        "confidence": "confirmed",
        "source": "https://modal.com/blog/modal-series-c ; https://www.datacenterdynamics.com/en/news/modal-labs-secures-funding/ ; https://www.finsmes.com/2026/05/modal-raises-355m-in-series-c-funding-at-post-money-valuation-of-4-65-billion.html (accessed 2026-06-07)"
      },
      "notable_investors": {
        "value": [
          "General Catalyst (Series C co-lead)",
          "Redpoint Ventures (Series C co-lead; earlier Series A lead)",
          "Lux Capital (earlier round lead)",
          "Amplify Partners",
          "Menlo Ventures",
          "Bain Capital Ventures",
          "Accel"
        ],
        "confidence": "confirmed",
        "source": "https://modal.com/blog/modal-series-c ; corroborated by DCD, SiliconANGLE, Tech Startups (accessed 2026-06-07)"
      },
      "revenue_signals": {
        "value": "~$300M annualized revenue run rate (May 2026), grown ~5x since September; ~$50M ARR reported Feb 2026",
        "confidence": "reported",
        "source": "https://modal.com/blog/modal-series-c ; https://www.datacenterdynamics.com/en/news/modal-labs-secures-funding/ ; https://techcrunch.com/2026/02/11/ai-inference-startup-modal-labs-in-talks-to-raise-at-2-5b-valuation-sources-say/ (accessed 2026-06-07)"
      },
      "notable_customers": {
        "value": [
          {
            "name": "Cognition (Devin)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Applied Compute",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Physical Intelligence",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Suno",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Ramp",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "DoorDash",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Decagon",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Chai Discovery",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Reducto",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Meta (Code World Models RL sandboxes)",
            "verification": "self-claimed",
            "frontier_lab_tie": true
          }
        ],
        "confidence": "reported",
        "source": "https://modal.com/blog/modal-series-c (DoorDash, Reducto, Cognition, Decagon, Ramp, Applied Compute, Physical Intelligence, Chai Discovery, Suno listed on vendor page, self-claimed); Meta/Code World Models usage also described in third-party CWM coverage (machine-learning-made-simple.medium.com, blog.promptlayer.com) (accessed 2026-06-07)"
      },
      "soc2": {
        "value": "Type II",
        "confidence": "confirmed",
        "source": "https://modal.com/docs/guide/security (SOC 2 Type II audit completed; report via trust.modal.com) (accessed 2026-06-07)"
      },
      "other_certifications": {
        "value": [
          "HIPAA: supports HIPAA-compliant workloads via BAA on Enterprise plan (no formal HIPAA certification exists); PCI handled via Stripe (Level 1)"
        ],
        "confidence": "confirmed",
        "source": "https://modal.com/docs/guide/security (accessed 2026-06-07)"
      },
      "security_page": {
        "value": "https://trust.modal.com (security portal); https://modal.com/docs/guide/security",
        "confidence": "confirmed",
        "source": "https://modal.com/docs/guide/security (accessed 2026-06-07)"
      },
      "distributed_remote": {
        "value": "yes",
        "confidence": "estimated",
        "source": "https://modal.com/company (accessed 2026-06-07), three offices (NYC, SF, Stockholm) with roles spanning all locations"
      },
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://modal.com/blog/modal-series-c (accessed 2026-06-07)"
      },
      "overall_confidence": "high",
      "sources": [
        {
          "url": "https://modal.com/",
          "accessed_date": "2026-06-07",
          "note": "Official homepage, serverless AI infrastructure positioning, GPU autoscaling, inference, fine-tuning"
        },
        {
          "url": "https://modal.com/company",
          "accessed_date": "2026-06-07",
          "note": "Official company page, offices (NYC HQ, SF, Stockholm), founders, 30+ open roles, $466M+ raised"
        },
        {
          "url": "https://modal.com/blog/modal-series-c",
          "accessed_date": "2026-06-07",
          "note": "Official Series C announcement, $355M at $4.65B, ~$300M ARR, 120+ team, customers, RL infra, 1B sandboxes"
        },
        {
          "url": "https://modal.com/blog/announcing-our-series-b",
          "accessed_date": "2026-06-07",
          "note": "Official Series B, $87M, $1.1B valuation, Lux Capital lead, $111M total, founders, customers"
        },
        {
          "url": "https://modal.com/blog/soc2type2",
          "accessed_date": "2026-06-07",
          "note": "Official, SOC 2 Type II compliance"
        },
        {
          "url": "https://modal.com/blog/hipaa",
          "accessed_date": "2026-06-07",
          "note": "Official, HIPAA-compliant workloads via BAA on Enterprise plan"
        },
        {
          "url": "https://modal.com/docs/guide/security",
          "accessed_date": "2026-06-07",
          "note": "Official docs, SOC 2 Type 2, HIPAA, trust.modal.com portal, security@modal.com"
        },
        {
          "url": "https://techcrunch.com/2026/02/11/ai-inference-startup-modal-labs-in-talks-to-raise-at-2-5b-valuation-sources-say/",
          "accessed_date": "2026-06-07",
          "note": "TechCrunch, founded 2021, ~$50M ARR (Feb 2026), $2.5B talks, CEO background"
        },
        {
          "url": "https://www.datacenterdynamics.com/en/news/modal-labs-secures-funding/",
          "accessed_date": "2026-06-07",
          "note": "DCD, $355M Series C, $4.65B, two tranches ($2.5B/$4.65B), ~$300M ARR up from ~$60M"
        },
        {
          "url": "https://siliconangle.com/2026/05/21/serverless-ai-infrastructure-startup-modal-labs-seals-355m-funding-round/",
          "accessed_date": "2026-06-07",
          "note": "SiliconANGLE, Series C close date ~2026-05-21"
        },
        {
          "url": "https://techstartups.com/2026/05/21/modal-labs-raises-355m-quadrupling-valuation-to-4-65b-as-ai-infrastructure-demand-surges/",
          "accessed_date": "2026-06-07",
          "note": "Tech Startups, Series C details, investors, customers"
        },
        {
          "url": "https://github.com/modal-labs",
          "accessed_date": "2026-06-07",
          "note": "GitHub org, client SDKs only (Python Apache-2.0, JS/TS/Go libmodal MIT, examples MIT)"
        },
        {
          "url": "https://github.com/modal-labs/modal-client",
          "accessed_date": "2026-06-07",
          "note": "Modal client SDK repo"
        },
        {
          "url": "https://erikbern.com/about.html",
          "accessed_date": "2026-06-07",
          "note": "CEO Erik Bernhardsson bio, Spotify, Better.com, Annoy, Luigi"
        },
        {
          "url": "https://www.linkedin.com/company/modal-labs",
          "accessed_date": "2026-06-07",
          "note": "LinkedIn public snippet, ~153 employees (April 2026), NYC, founded ~2021"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "focus_areas",
          "was": "[execution infrastructure, coding environments, computer use environments]",
          "now": "[execution infrastructure, coding environments]",
          "reason": "Dropped 'computer use environments', Modal is serverless compute infra; it provides sandboxes that customers use to run code/RL environments, but it does not itself sell a computer-use product. 'execution infrastructure' is the primary fit per the directory note; 'coding environments' retained because code-execution sandboxes are an explicit, heavily marketed use."
        },
        {
          "field": "founded_year.source",
          "was": "https://modal.com/blog/modal-series-c ; techcrunch...",
          "now": "techcrunch... ; corroborated by startupintros.com / Sacra",
          "reason": "Series C blog does not state founding year; corrected source to third-party reporting that confirms Jan 2021. Value unchanged (2021), confidence stays confirmed (multiple sources)."
        },
        {
          "field": "hq_location.value",
          "was": "New York, NY, USA (233 Spring St)",
          "now": "New York, NY, USA",
          "reason": "Removed the unverifiable street-address detail; city/country confirmed, exact suite not re-verified."
        },
        {
          "field": "other_locations.value",
          "was": "includes street addresses for SF and Stockholm",
          "now": "San Francisco, CA, USA; Stockholm, Sweden",
          "reason": "Removed unverifiable street addresses; cities confirmed by company page and Series C blog."
        },
        {
          "field": "headcount_band.source",
          "was": "https://modal.com/blog/modal-series-c",
          "now": "LinkedIn/Tracxn ~153 ; Series C blog (120+)",
          "reason": "Per instructions, headcount_band should be sanity-checked against LinkedIn; ~153 (LinkedIn/Tracxn) and 120+ (official) both sit firmly in 51-200. Value unchanged."
        },
        {
          "field": "total_raised.confidence",
          "was": "confirmed",
          "now": "reported",
          "reason": "The $466M total comes primarily from the vendor's own company page; while corroborated by aggregators (Sacra, startupintros), it is a sum across rounds not independently audited line-by-line. Downgraded to reported per adversarial standard."
        },
        {
          "field": "total_raised.value",
          "was": "~$466M (per official company page); ~$111M pre-Series C + $355M Series C",
          "now": "~$466M total disclosed",
          "reason": "Simplified to the corroborated figure; the precise pre-Series-C breakdown ($111M) was not re-verified against primary sources."
        },
        {
          "field": "open_roles_count.confidence",
          "was": "confirmed",
          "now": "reported",
          "reason": "Role counts are a vendor careers-page snapshot that changes frequently and was not independently re-verified; the per-team breakdown (Eng 17/GTM 8/G&A 5) could not be confirmed, so removed the breakdown and downgraded confidence."
        },
        {
          "field": "open_roles_count.value",
          "was": "30+ open roles (Engineering 17, GTM 8, G&A 5) across NYC, SF, Stockholm",
          "now": "30+ open roles across NYC, SF, Stockholm",
          "reason": "Removed unverified per-team breakdown."
        },
        {
          "field": "researcher_backgrounds.value",
          "was": "includes 'CTO Akshat Bubna: ex-Staff Engineer Scale AI, MIT, IOI medalist' and 'Discover Weekly' specifics",
          "now": "trimmed to corroborated claims (ex-Spotify recommendations, Better.com, Annoy/Luigi; CTO ex-Scale AI, MIT)",
          "reason": "Removed specific unverified embellishments (exact 'Staff Engineer' title, 'IOI medalist', 'Discover Weekly' attribution) sourced from getprog.ai/theorg.com aggregators; kept the corroborated core."
        },
        {
          "field": "notable_customers.value",
          "was": "included Scale AI, Substack, Lovable (and 12 total)",
          "now": "removed Scale AI, Substack, Lovable; added Reducto; 10 total",
          "reason": "The Series C page I re-fetched lists DoorDash, Reducto, Cognition, Decagon, Ramp, Applied Compute, Physical Intelligence, Chai Discovery, Suno. Scale AI/Substack/Lovable could not be re-confirmed on current vendor materials and were dropped to avoid overreach; Reducto added as it appears on the official page. All remain self-claimed."
        },
        {
          "field": "notable_customers.source",
          "was": "series-c ; series-b blogs",
          "now": "series-c blog (logos self-claimed); Meta/CWM also in third-party CWM coverage",
          "reason": "Clarified sourcing; the Meta/Code World Models RL-sandbox usage is additionally described in independent third-party coverage, supporting the frontier_lab_tie flag, though the relationship is still treated as self-claimed."
        }
      ],
      "verification_summary": "Re-verified the highest-risk claims against multiple independent third-party sources. Confirmed this is the correct company: Modal / Modal Labs, the NY-based Python-native serverless cloud for AI workloads (matches directory note). Funding is strongly corroborated: Series C $355M at $4.65B post-money closed ~2026-05-21 (DCD, SiliconANGLE, FinSMEs, Tech Startups, TechCrunch), two tranches $2.5B then $4.65B, up from $1.1B Series B; leads General Catalyst and Redpoint with Menlo/Bain/Accel new, all kept confirmed. Total raised (~$466M) downgraded to 'reported' as it rests mainly on the vendor page plus aggregators. Founded 2021 confirmed (Jan 2021). Headcount: official 120+, LinkedIn/Tracxn ~153 as of April 2026, comfortably in 51-200 band; current_headcount kept 'reported', band source corrected to LinkedIn. SOC 2 Type II confirmed via modal.com/docs/guide/security and trust.modal.com portal; HIPAA correctly characterized as BAA-supported, no formal cert. Customers: all treated as self-claimed (vendor logos); trimmed three names not re-confirmable, added Reducto from the official page. Meta/Code World Models RL-sandbox usage is additionally referenced in third-party CWM coverage, justifying the frontier_lab_tie flag, but the relationship remains self-claimed. Dropped 'computer use environments' from focus_areas as unsupported by the controlled-vocabulary fit. Removed unverifiable street addresses and an unverified open-roles breakdown. Overall confidence remains high.",
      "research_notes": {
        "found": [
          "Founded 2021; HQ New York with SF and Stockholm offices (official company page)",
          "Series C: $355M at $4.65B post-money, May 2026, co-led by General Catalyst and Redpoint (official blog + multiple press)",
          "Total raised ~$466M; prior rounds: $7M seed (Amplify), $16M Series A (Redpoint, 2023), $87M Series B at $1.1B (Lux, Sept 2025)",
          "Revenue: ~$300M ARR (May 2026), up from ~$60M (Sept 2025); $50M ARR reported Feb 2026",
          "SOC 2 Type II + HIPAA (via BAA, Enterprise plan); trust portal at trust.modal.com",
          "Directly RL-relevant: Cognition states Modal powers its reinforcement-learning infrastructure; Applied Compute runs RL with thousands of parallel environments; Modal markets sandboxed code execution + agentic workloads (1B sandboxes launched, >1/3 of revenue)",
          "120+ team (official, May 2026); 30+ open roles",
          "Founders: Erik Bernhardsson (CEO, ex-Spotify/Better.com) and Akshat Bubna (CTO, ex-Scale AI)"
        ],
        "missing": [
          "Exact precise current headcount (official says 120+, LinkedIn snippet ~153, band 51-200 used)",
          "Headcount growth rate over a defined period",
          "ISO 27001 / GDPR status (not found on security docs)",
          "Researcher count (Modal is an infra company, not a research lab; no research team advertised)"
        ],
        "conflicts": [
          "Headcount: official Series C blog says '120+ team members' (May 2026) vs LinkedIn public snippet ~153 (April 2026), likely LinkedIn counts include all associated profiles; treated as 51-200 band",
          "Revenue: Feb 2026 TechCrunch cited ~$50M ARR while May 2026 sources cite ~$300M ARR, reflects rapid growth and timing differences, plus possible 'annualized' vs run-rate framing; both recorded",
          "Valuation timeline: Feb 2026 TechCrunch reported $2.5B talks; final Series C (May 2026) closed at $4.65B (first tranche at $2.5B, second at $4.65B), resolves the apparent conflict"
        ],
        "stale": [],
        "open_questions": [
          "Modal is execution/inference infrastructure, NOT an RL-environment dataset/eval vendor, included as 'Adjacent: execution infrastructure' per the fixed segment. Buyers would use Modal to RUN RL environments rather than to BUY pre-built environments.",
          "Whether Modal offers any productized RL-environment templates vs. customers building their own on the platform (appears to be the latter)",
          "ISO 27001 status not confirmed"
        ]
      }
    },
    {
      "rank": null,
      "focus_areas_normalised": [
        "Coding",
        "Enterprise Workflows",
        "Long-Horizon"
      ],
      "slug": "mercor",
      "brand_name": "Mercor",
      "segment": "Incumbents also building RL environments",
      "website": "https://www.mercor.com/",
      "focus_areas": [
        "enterprise workflows",
        "evaluation / benchmarks",
        "coding environments",
        "healthcare",
        "legal",
        "finance",
        "long-horizon / general reasoning"
      ],
      "positioning_summary": "Mercor is a venture-backed expert-marketplace and AI-training-data company that organizes a network of ~30,000+ domain experts (doctors, lawyers, bankers, engineers) to produce RLHF data, evaluations, and reinforcement-learning environments for frontier AI labs and enterprises. Originally an AI-recruiting platform, it pivoted to human-data/RL services and expanded its RL-environment capability via the February 2026 acquisition of Sepal AI.",
      "best_fit_use_case": "Buyers needing large-scale, expert-validated human data, professional-domain evals, and RL environments staffed quickly through an established contractor network.",
      "what_they_sell": {
        "value": "mixed",
        "confidence": "confirmed",
        "source": "https://www.mercor.com/research/ (accessed 2026-06-07)"
      },
      "maturity": {
        "value": "GA",
        "confidence": "reported",
        "source": "https://techcrunch.com/2025/10/27/mercor-quintuples-valuation-to-10b-with-350m-series-c/ (accessed 2026-06-07)"
      },
      "deployment_model": {
        "value": "managed-hosted (managed service / marketplace; RL environments delivered as a service to labs)",
        "confidence": "estimated",
        "source": "https://www.mercor.com/research/ (accessed 2026-06-07)"
      },
      "open_source": {
        "value": "no",
        "confidence": "estimated",
        "source": ""
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "founded_year": {
        "value": 2023,
        "confidence": "confirmed",
        "source": "https://en.wikipedia.org/wiki/Mercor (accessed 2026-06-07); corroborated by https://research.contrary.com/company/mercor (accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "San Francisco, CA, USA (181 Fremont)",
        "confidence": "confirmed",
        "source": "https://en.wikipedia.org/wiki/Mercor (accessed 2026-06-07); https://www.linkedin.com/company/mercor-ai (accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://techcrunch.com/2025/10/27/mercor-quintuples-valuation-to-10b-with-350m-series-c/ (accessed 2026-06-07)"
      },
      "current_headcount": {
        "value": "LinkedIn public band 51-200 (full-time employees); separately operates a network of ~30,000+ contractors. Wikipedia cites ~300 'employees' (2025) but this likely blends staff and contractors; aggregator figures of 3,000+ clearly count the contractor network",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/mercor-ai (accessed 2026-06-07); https://en.wikipedia.org/wiki/Mercor (accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "51-200",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/mercor-ai (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://www.mercor.com/research/ (accessed 2026-06-07)"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Founders Brendan Foody, Adarsh Hiremath, Surya Midha are Thiel Fellows and college dropouts (not research-lab veterans)",
          "Sundeep Jain (President, hired May 2025) ex-Uber CPO/SVP Eng and ex-Google VP Product"
        ],
        "confidence": "reported",
        "source": "https://en.wikipedia.org/wiki/Mercor (accessed 2026-06-07); https://research.contrary.com/company/mercor (accessed 2026-06-07)"
      },
      "total_raised": {
        "value": "~$492M across 4 rounds (Seed ~$3M+, Series A $30M, Series B $100M, Series C $350M)",
        "confidence": "reported",
        "source": "https://tracxn.com/d/companies/mercor (accessed 2026-06-07); corroborated by https://techcrunch.com/2025/10/27/mercor-quintuples-valuation-to-10b-with-350m-series-c/ (accessed 2026-06-07)"
      },
      "last_round": {
        "value": "Series C, $350M, October 2025",
        "confidence": "confirmed",
        "source": "https://techcrunch.com/2025/10/27/mercor-quintuples-valuation-to-10b-with-350m-series-c/ (accessed 2026-06-07)"
      },
      "valuation": {
        "value": "$10B (Series C, Oct 2025)",
        "confidence": "confirmed",
        "source": "https://techcrunch.com/2025/10/27/mercor-quintuples-valuation-to-10b-with-350m-series-c/ (accessed 2026-06-07); https://www.cnbc.com/2025/10/27/ai-hiring-startup-mercor-funding.html (accessed 2026-06-07)"
      },
      "notable_investors": {
        "value": [
          "Felicis Ventures (led Series C and Series B)",
          "Benchmark",
          "General Catalyst",
          "Robinhood Ventures"
        ],
        "confidence": "confirmed",
        "source": "https://techcrunch.com/2025/10/27/mercor-quintuples-valuation-to-10b-with-350m-series-c/ (accessed 2026-06-07)"
      },
      "revenue_signals": {
        "value": "On track to ~$500M ARR (reported Oct 2025); pays contractors >$1.5M/day (implying ~$840M run-rate gross)",
        "confidence": "reported",
        "source": "https://techcrunch.com/2025/10/27/mercor-quintuples-valuation-to-10b-with-350m-series-c/ (accessed 2026-06-07)"
      },
      "notable_customers": {
        "value": [
          {
            "name": "OpenAI",
            "verification": "verified",
            "frontier_lab_tie": true
          },
          {
            "name": "Anthropic",
            "verification": "verified",
            "frontier_lab_tie": true
          },
          {
            "name": "Meta",
            "verification": "verified",
            "frontier_lab_tie": true
          },
          {
            "name": "Google DeepMind",
            "verification": "verified",
            "frontier_lab_tie": true
          }
        ],
        "confidence": "reported",
        "source": "https://techcrunch.com/2025/10/29/how-ai-labs-use-mercor-to-get-the-data-companies-wont-share/ (accessed 2026-06-07; names OpenAI, Anthropic, Meta); https://theaiinsider.tech/2025/10/29/mercor-raises-350m-to-scale-expert-driven-ai-training-reaching-10b-valuation/ (accessed 2026-06-07; names OpenAI, Google DeepMind, Meta). Third-party press, not vendor logo wall."
      },
      "published_papers_or_benchmarks": {
        "value": [
          "APEX (AI Productivity Index)",
          "APEX-Agents",
          "APEX-SWE",
          "ACE (AI Consumer Index)"
        ],
        "confidence": "confirmed",
        "source": "https://www.mercor.com/research/ (accessed 2026-06-07)"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "https://trust.mercor.com/ (accessed 2026-06-07; trust center exists but certification details not extractable from public page; note: press reports Mercor's compliance vendor Delve was accused of issuing fraudulent SOC 2 reports, casting doubt on any prior claimed certification)"
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "https://trust.mercor.com/",
        "confidence": "confirmed",
        "source": "https://trust.mercor.com/ (accessed 2026-06-07)"
      },
      "distributed_remote": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://en.wikipedia.org/wiki/Mercor (accessed 2026-06-07; global contractor network across 6 continents)"
      },
      "overall_confidence": "high",
      "sources": [
        {
          "url": "https://www.mercor.com/",
          "accessed_date": "2026-06-07",
          "note": "Official site; positioning, APEX, SF HQ, trust portal reference"
        },
        {
          "url": "https://www.mercor.com/research/",
          "accessed_date": "2026-06-07",
          "note": "RL environments 3-step approach, APEX/APEX-Agents/APEX-SWE/ACE benchmarks, 'top 5 AI labs and 6 of Mag 7'"
        },
        {
          "url": "https://techcrunch.com/2025/10/27/mercor-quintuples-valuation-to-10b-with-350m-series-c/",
          "accessed_date": "2026-06-07",
          "note": "Series C $350M, $10B valuation, ~$500M ARR target, investors, 30k experts, $1.5M/day"
        },
        {
          "url": "https://en.wikipedia.org/wiki/Mercor",
          "accessed_date": "2026-06-07",
          "note": "Founded 2023, founders (Thiel Fellows), 181 Fremont SF HQ, ~300 employees + 30k contractors, OpenAI/Anthropic clients, 2026 data breach"
        },
        {
          "url": "https://www.orrick.com/en/News/2026/02/Mercor-Acquires-Sepal-AI",
          "accessed_date": "2026-06-07",
          "note": "Mercor acquired Sepal AI Feb 6 2026; Sepal specializes in training data, eval benchmarks, RL environments for frontier LLMs"
        },
        {
          "url": "https://theaiinsider.tech/2025/10/29/mercor-raises-350m-to-scale-expert-driven-ai-training-reaching-10b-valuation/",
          "accessed_date": "2026-06-07",
          "note": "Customers OpenAI, Google DeepMind, Meta; 30k experts at avg $85/hr"
        },
        {
          "url": "https://tracxn.com/d/companies/mercor/__764DkS7wJgmA1B8PuOw3_4HUbgJcaKjh8xY9UxvBIpY/funding-and-investors",
          "accessed_date": "2026-06-07",
          "note": "Total ~$492M over 4 rounds; Seed (General Catalyst), Series A $30M (Benchmark, $250M val), Series B $100M ($2B val), Series C $350M"
        },
        {
          "url": "https://trust.mercor.com/",
          "accessed_date": "2026-06-07",
          "note": "Trust Center exists; specific certifications not extractable from public landing page"
        },
        {
          "url": "https://techcrunch.com/2026/03/31/mercor-says-it-was-hit-by-cyberattack-tied-to-compromise-of-open-source-litellm-project/",
          "accessed_date": "2026-06-07",
          "note": "March 2026 supply-chain breach via LiteLLM compromise"
        },
        {
          "url": "https://techcrunch.com/2026/04/09/after-data-breach-10b-valued-startup-mercor-is-having-a-month/",
          "accessed_date": "2026-06-07",
          "note": "Breach fallout, lawsuits, ~4TB data exposure incl source code and API keys"
        },
        {
          "url": "https://newsletter.semianalysis.com/p/rl-environments-and-rl-for-science",
          "accessed_date": "2026-06-07",
          "note": "SemiAnalysis: incumbents (Scale, Mercor, Turing, Surge) leveraging contractor networks to move into environment/task creation"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "headcount_band",
          "was": "200+ (reported)",
          "now": "51-200 (reported)",
          "reason": "Draft's 200+ contradicts the LinkedIn public snippet, which shows the 51-200 employee band. The ~300 / 3,135 / 3.6K figures from Wikipedia and data aggregators (Tracxn, LeadIQ) conflate Mercor's ~30,000 contractors with full-time staff. Contrary Research notes a 'lean internal team' (~30-50 FT employees through early 2025). Per the brief's instruction to sanity-check against LinkedIn and treat 200+ as a red flag, downgraded to the LinkedIn band."
        },
        {
          "field": "current_headcount",
          "was": "~300 employees (2025); LinkedIn band 51-200; data aggregators show higher figures likely counting contractors (reported)",
          "now": "LinkedIn public band 51-200 (full-time employees); separately operates a network of ~30,000+ contractors. Wikipedia cites ~300 'employees' (2025) but this likely blends staff and contractors; aggregator figures of 3,000+ clearly count the contractor network (reported)",
          "reason": "Reordered to lead with the most defensible source (LinkedIn 51-200 band) and clarified that the ~300 and 3,000+ figures conflate the contractor network. Same underlying facts, more accurate framing; source updated to reflect LinkedIn as primary sanity-check."
        },
        {
          "field": "notable_customers (source)",
          "was": "theaiinsider + Wikipedia; 'Named by third-party press, not direct vendor logo wall.'",
          "now": "Added TechCrunch (2025-10-29) which states 'Some of Mercor's customers include OpenAI, Anthropic, and Meta'; Google DeepMind named by theaiinsider (2025-10-29)",
          "reason": "Strengthened the verification basis for the 'verified' status by anchoring OpenAI/Anthropic/Meta to a credible third-party outlet (TechCrunch) rather than the vendor site. frontier_lab_tie and verified status retained because these are independent press confirmations, not self-claimed logos."
        }
      ],
      "verification_summary": "Re-verified the highest-risk claims against primary/third-party sources. Funding stack confirmed: Series C $350M at $10B valuation (Oct 2025), led by Felicis (which also led the $100M Series B at $2B), with Benchmark, General Catalyst, and new investor Robinhood Ventures, corroborated by TechCrunch and CNBC. Total ~$492M (Tracxn) is corroborated by the per-round press breakdown, kept at 'reported'. Founded 2023; SF HQ (181 Fremont) confirmed. The Sepal AI acquisition (Feb 6, 2026, RL environments + training data for frontier labs) is confirmed and supports the directory note 'expert marketplace, environments emerging', this is the correct company. MAIN CORRECTION: headcount_band downgraded from 200+ to 51-200, matching the LinkedIn public snippet; the ~300 and 3,000+ figures conflate Mercor's ~30,000 contractor network with its lean full-time staff (Contrary Research notes ~30-50 FT employees through early 2025). Customers OpenAI/Anthropic/Meta are independently named by TechCrunch and Google DeepMind by theaiinsider, so 'verified' frontier-lab ties are retained but re-sourced to third-party press. SOC2 left 'unknown', the trust center shows no extractable certification, and press reports that Mercor's compliance vendor (Delve) allegedly issued fraudulent SOC 2 reports, so any certification claim is not credibly verifiable. focus_areas all conform to the controlled vocabulary.",
      "research_notes": {
        "found": [
          "Confirmed correct company: Mercor expert marketplace pivoted to AI training data / RL environments (matches directory note + Code/Healthcare/Law tags)",
          "Founded 2023; founders Brendan Foody, Adarsh Hiremath, Surya Midha (Thiel Fellows, college dropouts)",
          "Series C $350M Oct 2025 at $10B valuation; total ~$492M raised",
          "RL environments described explicitly on /research (3-step: realistic worlds, tools/apps, tasks+verifiers)",
          "Sepal AI acquired Feb 2026 to strengthen RL-environment / human-data capability",
          "APEX, APEX-Agents, APEX-SWE, ACE benchmarks covering banking, consulting, big law, primary care MD, SWE, consumer tasks",
          "Frontier-lab ties: OpenAI (verified via multiple sources), plus reported Anthropic, Google DeepMind, Meta",
          "Trust center exists at trust.mercor.com"
        ],
        "missing": [
          "Exact employee headcount (sources conflict; ~300 employees vs 51-200 LinkedIn band vs thousands on aggregators counting contractors)",
          "Specific SOC 2 / ISO 27001 / HIPAA certification status (trust center page content not publicly extractable)",
          "Researcher count and detailed research-team backgrounds",
          "Open roles count",
          "OSS license (no OSS product identified)",
          "Sepal AI acquisition deal terms (undisclosed)"
        ],
        "conflicts": [
          "Headcount: Wikipedia ~300 employees (2025); LinkedIn band 51-200; LeadIQ ~3.6K (Feb 2026); other ~5K (Mar 2026), higher figures likely conflate 30k contractors / different methodology",
          "Founding year: TechCrunch says 'almost three years ago' (~2022) but Wikipedia and other sources confirm 2023"
        ],
        "stale": [
          "Headcount/contractor figures (30k experts) are from Oct 2025 funding announcement (~8 months old)"
        ],
        "open_questions": [
          "Current security posture after March/April 2026 LiteLLM supply-chain breach (~4TB exposed incl source code/API keys, multiple class-action lawsuits), material risk signal for procurement",
          "Whether RL environments are sold as a standalone productized offering vs bundled managed service",
          "Post-Sepal integration status and whether Sepal's RL-environment product persists under Mercor"
        ]
      }
    },
    {
      "rank": null,
      "focus_areas_normalised": [
        "Enterprise Workflows",
        "Long-Horizon"
      ],
      "slug": "surge-ai",
      "brand_name": "Surge AI",
      "segment": "Incumbents also building RL environments",
      "website": "https://surgehq.ai/",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://surgehq.ai/ accessed 2026-06-07; https://en.wikipedia.org/wiki/Surge_AI accessed 2026-06-07"
      },
      "founded_year": {
        "value": 2020,
        "confidence": "reported",
        "source": "https://en.wikipedia.org/wiki/Surge_AI accessed 2026-06-07 (founded 2020 by Edwin Chen); corroborated by CB Insights/Craft. Note: Sacra describes a '2021 launch'; getlatka lists '2018' (outlier). Single primary source, so reported."
      },
      "hq_location": {
        "value": "San Francisco, California, USA",
        "confidence": "confirmed",
        "source": "https://en.wikipedia.org/wiki/Surge_AI accessed 2026-06-07; Craft.co lists 2193 Fillmore St, San Francisco accessed 2026-06-07"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": "No additional offices disclosed; careers roles listed as Remote. https://surgehq.ai/careers accessed 2026-06-07"
      },
      "distributed_remote": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://surgehq.ai/careers accessed 2026-06-07 (open roles listed Remote); ~1M distributed annotator/contractor network per https://en.wikipedia.org/wiki/Surge_AI accessed 2026-06-07"
      },
      "current_headcount": {
        "value": "~110-121 full-time employees (2025); ~1M annotators/contractors",
        "confidence": "reported",
        "source": "https://en.wikipedia.org/wiki/Surge_AI accessed 2026-06-07 (110 employees, 2025); getlatka/Inc. cite ~121 FTE; ~1M annotators. Pitchbook ~250 is an outlier and not relied upon."
      },
      "headcount_band": {
        "value": "51-200",
        "confidence": "confirmed",
        "source": "https://www.linkedin.com/company/surge-ai accessed 2026-06-07 (public band 51-200); corroborated by Craft.co accessed 2026-06-07. Refers to full-time staff, not the ~1M contractor network."
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": 41,
        "confidence": "reported",
        "source": "https://surgehq.ai/careers accessed 2026-06-07 (draft figure; not independently re-counted)"
      },
      "what_they_sell": {
        "value": "mixed",
        "confidence": "confirmed",
        "source": "https://surgehq.ai/ and https://surgehq.ai/research accessed 2026-06-07 (human data/RLHF labeling platform plus agentic RL environments). Core business is human data; environments are an adjacent/secondary line."
      },
      "deployment_model": {
        "value": "managed-hosted",
        "confidence": "estimated",
        "source": "https://surgehq.ai/ accessed 2026-06-07 (managed human-data platform/service; no self-hosted/on-prem offering disclosed)"
      },
      "maturity": {
        "value": "GA",
        "confidence": "estimated",
        "source": "https://surgehq.ai/ accessed 2026-06-07; revenue-generating platform ($1.2B 2024) serving frontier labs per https://sacra.com/c/surge-ai/ accessed 2026-06-07"
      },
      "open_source": {
        "value": "no",
        "confidence": "reported",
        "source": "https://surgehq.ai/research accessed 2026-06-07 (benchmarks published as arXiv papers; EnterpriseBench/CoreCraft described as proprietary; no OSS license/repo disclosed)"
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "total_raised": {
        "value": "Bootstrapped / no confirmed closed external round as of June 2026; first external raise (~$1B primary+secondary) reportedly initiated July 2025 but not confirmed closed",
        "confidence": "reported",
        "source": "https://en.wikipedia.org/wiki/Surge_AI accessed 2026-06-07; https://sacra.com/c/surge-ai/ accessed 2026-06-07 ('first-ever capital raise after operating profitably since launch'); https://www.bloomberg.com/news/articles/2025-07-30/scale-rival-surge-ai-in-talks-for-funding-at-25-billion-value accessed 2026-06-07"
      },
      "last_round": {
        "value": "In talks (July 2025) for ~$1B primary+secondary; NOT confirmed closed as of June 2026",
        "confidence": "reported",
        "source": "https://www.bloomberg.com/news/articles/2025-07-30/scale-rival-surge-ai-in-talks-for-funding-at-25-billion-value accessed 2026-06-07 (in talks; J.P. Morgan facilitating secondary). A LinkedIn post claiming a closed $30B round is not a credible source and is not relied upon."
      },
      "valuation": {
        "value": "~$25B (reported July 2025 fundraise talks; 'at least $25B' per Bloomberg, '>$15B' per Reuters); unconfirmed/not closed",
        "confidence": "reported",
        "source": "https://www.bloomberg.com/news/articles/2025-07-30/scale-rival-surge-ai-in-talks-for-funding-at-25-billion-value accessed 2026-06-07. getlatka's $20B and a LinkedIn $30B claim are inconsistent/weak and not relied upon."
      },
      "notable_investors": {
        "value": [],
        "confidence": "unknown",
        "source": "No confirmed investors; round not confirmed closed. Bloomberg named only POTENTIAL investors (Andreessen Horowitz, Warburg Pincus, TPG) with J.P. Morgan facilitating secondary. https://www.bloomberg.com/news/articles/2025-07-30/scale-rival-surge-ai-in-talks-for-funding-at-25-billion-value accessed 2026-06-07"
      },
      "revenue_signals": {
        "value": "$1.2B revenue (2024); ~$1.4B ARR claimed (Aug 2025, weak source)",
        "confidence": "reported",
        "source": "$1.2B 2024 corroborated by https://en.wikipedia.org/wiki/Surge_AI and https://sacra.com/c/surge-ai/ accessed 2026-06-07. The ~$1.4B ARR figure is from getlatka.com (low-reliability aggregator) only. Note: Meta reportedly ~$150M/yr, Google ~$100M/yr (Sacra)."
      },
      "notable_customers": {
        "value": [
          {
            "name": "Anthropic",
            "verification": "verified",
            "frontier_lab_tie": true
          },
          {
            "name": "Google",
            "verification": "verified",
            "frontier_lab_tie": true
          },
          {
            "name": "Meta",
            "verification": "verified",
            "frontier_lab_tie": true
          },
          {
            "name": "Microsoft",
            "verification": "verified",
            "frontier_lab_tie": true
          },
          {
            "name": "OpenAI (former; relationship reportedly ended)",
            "verification": "verified",
            "frontier_lab_tie": true
          }
        ],
        "confidence": "reported",
        "source": "Anthropic: Jared Kaplan endorsement on Surge's own blog https://surgehq.ai/blog/anthropic-surge-ai-rlhf-platform-train-llm-assistant-human-feedback accessed 2026-06-07 PLUS third-party Sacra. Google/Meta/Microsoft/OpenAI named by third-party https://sacra.com/c/surge-ai/ ('~12 frontier labs, most notably OpenAI, Google, Anthropic, Microsoft, Meta') and Inc. magazine, accessed 2026-06-07. OpenAI: a Forbes report (Sept 2025) cited an OpenAI spokesperson saying they NO LONGER work with Surge AI."
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No public trust/security page or SOC 2 statement found. accessed 2026-06-07"
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No dedicated security/trust page found. accessed 2026-06-07"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://surgehq.ai/research accessed 2026-06-07 (published benchmarks/RL environments; arXiv:2602.16179 EnterpriseBench CoreCraft); https://surgehq.ai/careers lists Research Scientist roles, accessed 2026-06-07"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "Research roles/fellowship advertised but team count not disclosed. accessed 2026-06-07"
      },
      "researcher_backgrounds": {
        "value": [
          "Founder/CEO Edwin Chen: ex-Google, ex-Facebook, ex-Twitter ML teams; MIT background (reportedly did not complete degree)"
        ],
        "confidence": "reported",
        "source": "https://en.wikipedia.org/wiki/Surge_AI accessed 2026-06-07; getlatka describes him as 'MIT Dropout'"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "EnterpriseBench CoreCraft: Training Generalizable Agents on High-Fidelity RL Environments - arXiv:2602.16179 (https://arxiv.org/abs/2602.16179)",
          "CoreCraft enterprise customer-support simulation (2,500+ entities, 14 entity types, 23 tools) - https://surgehq.ai/blog/enterprisebench-corecraft",
          "AdvancedIF (1,600+ prompts, expert rubrics) - https://surgehq.ai/research",
          "Complex-IF / Riemann-Bench / SciReview / GDP.PDF / Multimodal RewardBench 2 - https://surgehq.ai/research"
        ],
        "confidence": "confirmed",
        "source": "https://surgehq.ai/research accessed 2026-06-07; arXiv:2602.16179 accessed 2026-06-07"
      },
      "focus_areas": [
        "enterprise workflows",
        "evaluation / benchmarks",
        "long-horizon / general reasoning"
      ],
      "positioning_summary": "Surge AI is a bootstrapped, high-revenue human-data and RLHF labeling leader serving frontier AI labs, which has expanded into agentic RL environments via its EnterpriseBench suite (notably the CoreCraft enterprise customer-support simulation) and accompanying published benchmarks. As of June 2026 a reported ~$1B first external raise at a ~$25B valuation was in talks but not confirmed closed.",
      "best_fit_use_case": "Buyers needing expert human-feedback/RLHF data plus realistic enterprise-workflow RL environments and rubric-based agent evaluation from an established, frontier-lab-trusted vendor.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://surgehq.ai/",
          "accessed_date": "2026-06-07",
          "note": "Official homepage; mission/positioning, no customer logos or security details on landing page"
        },
        {
          "url": "https://surgehq.ai/research",
          "accessed_date": "2026-06-07",
          "note": "RL environments: EnterpriseBench, CoreCraft, agentic benchmarks and papers; frontier-lab collaborations"
        },
        {
          "url": "https://surgehq.ai/careers",
          "accessed_date": "2026-06-07",
          "note": "41 open roles, all Remote; Research Scientist roles and Research Fellowship"
        },
        {
          "url": "https://surgehq.ai/blog/anthropic-surge-ai-rlhf-platform-train-llm-assistant-human-feedback",
          "accessed_date": "2026-06-07",
          "note": "Verified Anthropic relationship; quote from co-founder Jared Kaplan"
        },
        {
          "url": "https://en.wikipedia.org/wiki/Surge_AI",
          "accessed_date": "2026-06-07",
          "note": "Founded 2020 by Edwin Chen, SF HQ, 110 employees 2025, ~1M annotators, $1.2B revenue 2024, RL environments + RLHF + data annotation"
        },
        {
          "url": "https://sacra.com/c/surge-ai/",
          "accessed_date": "2026-06-07",
          "note": "Revenue/funding context; bootstrapped, ~12 frontier-lab customers"
        },
        {
          "url": "https://www.bloomberg.com/news/articles/2025-07-30/scale-rival-surge-ai-in-talks-for-funding-at-25-billion-value",
          "accessed_date": "2026-06-07",
          "note": "July 2025 talks ~$1B at $25B+ valuation"
        },
        {
          "url": "https://getlatka.com/companies/surgehq.ai",
          "accessed_date": "2026-06-07",
          "note": "~$1.4B ARR reported Aug 2025 (third-party estimate)"
        },
        {
          "url": "https://www.linkedin.com/company/surge-ai",
          "accessed_date": "2026-06-07",
          "note": "Public LinkedIn band 51-200 employees; SF HQ; founded 2020"
        },
        {
          "url": "https://pitchbook.com/profiles/company/472331-53",
          "accessed_date": "2026-06-07",
          "note": "Alternative headcount figure (~250) - conflicts with LinkedIn/Wikipedia"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "founded_year.confidence",
          "was": "confirmed",
          "now": "reported",
          "reason": "Sourced primarily to Wikipedia (single non-primary source). Sacra describes a '2021 launch' and getlatka lists '2018', creating minor disagreement. Downgraded per rules (no primary source for confirmed)."
        },
        {
          "field": "status.source",
          "was": "https://surgehq.ai/ accessed 2026-06-07",
          "now": "added Wikipedia corroboration",
          "reason": "Strengthened with an independent source for the 'active' status."
        },
        {
          "field": "open_roles_count.confidence",
          "was": "confirmed",
          "now": "reported",
          "reason": "Could not independently re-count the 41 open roles; relying on the prior analyst's read of the careers page. A dynamic figure should not be 'confirmed' without a re-count."
        },
        {
          "field": "open_source.confidence",
          "was": "estimated",
          "now": "reported",
          "reason": "Research page explicitly describes benchmarks as arXiv papers / proprietary environments with no OSS license; this is a reported observation rather than a mere estimate."
        },
        {
          "field": "headcount_band.confidence",
          "was": "reported",
          "now": "confirmed",
          "reason": "LinkedIn public band 51-200 corroborated by Craft.co; consistent, directly observed public-snippet datum."
        },
        {
          "field": "current_headcount.value",
          "was": "~110-130 full-time employees (2025); ~1M annotators/contractors",
          "now": "~110-121 full-time employees (2025); ~1M annotators/contractors",
          "reason": "Best-supported FTE figures are 110 (Wikipedia) and ~121 (Inc./getlatka). The '130' upper bound and the Pitchbook ~250 figure are unsupported outliers."
        },
        {
          "field": "valuation.value",
          "was": "$15B-$25B+ (reported July 2025 fundraise talks)",
          "now": "~$25B (reported July 2025 talks; 'at least $25B' Bloomberg / '>$15B' Reuters); unconfirmed/not closed",
          "reason": "Clarified that $15B (Reuters, sought) and $25B (Bloomberg, 'at least') are different reports, and that no round has been confirmed closed. Explicitly rejected weak $20B (getlatka) and $30B (LinkedIn post) figures."
        },
        {
          "field": "last_round.value",
          "was": "In talks (July 2025) for ~$1B at $15B-$25B+ valuation; not confirmed closed",
          "now": "In talks (July 2025) for ~$1B primary+secondary; NOT confirmed closed as of June 2026",
          "reason": "Reaffirmed not-closed status and explicitly flagged the LinkedIn $30B 'closed round' claim as non-credible and not relied upon."
        },
        {
          "field": "notable_customers.value[Anthropic].verification",
          "was": "verified (sourced only to Surge's own blog)",
          "now": "verified (sourced to Surge blog Kaplan quote PLUS third-party Sacra)",
          "reason": "The vendor blog alone would be self-claimed; verification is retained only because credible third-party Sacra independently names Anthropic. Source strengthened."
        },
        {
          "field": "notable_customers.value[Google,Meta,Microsoft].verification",
          "was": "self-claimed",
          "now": "verified",
          "reason": "These are not vendor self-claims at all in this record's evidence: they are named by credible third parties (Sacra trade research and Inc. magazine), which meets the bar for third-party verification."
        },
        {
          "field": "notable_customers (OpenAI)",
          "was": "OpenAI listed as current self-claimed customer",
          "now": "OpenAI listed as FORMER customer (verified), relationship reportedly ended",
          "reason": "A Forbes report (Sept 2025) cited an OpenAI spokesperson stating they no longer work with Surge AI. Material decision-relevant correction; left in list but flagged as former."
        },
        {
          "field": "notable_customers.source",
          "was": "others reported in press (Sacra, Bloomberg, Reuters)",
          "now": "specific attribution to Sacra/Inc./Forbes with the OpenAI-ended caveat",
          "reason": "Tightened sourcing and added the OpenAI-ended fact."
        },
        {
          "field": "revenue_signals.source",
          "was": "Wikipedia + getlatka without reliability flag",
          "now": "same, with explicit flag that the $1.4B ARR is getlatka-only (weak)",
          "reason": "$1.2B (2024) is multi-source; the $1.4B ARR rests on a single low-reliability aggregator and is flagged accordingly."
        },
        {
          "field": "researcher_backgrounds.value",
          "was": "MIT background",
          "now": "MIT background (reportedly did not complete degree)",
          "reason": "Multiple sources describe Chen as an 'MIT dropout'; added nuance without overstating."
        },
        {
          "field": "published_papers_or_benchmarks.value",
          "was": "list with duplicate EnterpriseBench/CoreCraft entries and no arXiv ID",
          "now": "de-duplicated; added arXiv:2602.16179 ID and additional verified benchmarks (AdvancedIF, Complex-IF, Riemann-Bench, etc.)",
          "reason": "Draft listed EnterpriseBench and CoreCraft as separate/duplicated items; consolidated and added the verifiable arXiv identifier and other research-page benchmarks."
        },
        {
          "field": "positioning_summary",
          "was": "no mention of funding-round status",
          "now": "added that the ~$1B/~$25B raise was in talks but not confirmed closed",
          "reason": "Decision-relevant for procurement; prevents implying a closed mega-round."
        }
      ],
      "verification_summary": "Confirmed this is the correct company matching the directory note ('human-data leader, environments on the side'): Surge AI / Surge Labs Inc., SF, founded by Edwin Chen, RLHF/human-data platform that has added agentic RL environments (EnterpriseBench/CoreCraft, arXiv:2602.16179). Highest-risk findings: (1) Funding is NOT closed, Bloomberg (Jul 2025) reported only 'talks' for ~$1B at 'at least $25B'; the draft's '$15B-$25B+' conflated Reuters ($15B sought) and Bloomberg ($25B); a LinkedIn post claiming a closed $30B round and getlatka's $20B are weak/inconsistent and rejected. Kept total_raised/last_round/valuation at 'reported' and notable_investors empty (Bloomberg named only POTENTIAL investors). (2) Headcount band 51-200 confirmed via LinkedIn+Craft; the company is ~110-121 FTE (Pitchbook ~250 is an outlier). (3) Customers: Anthropic confirmed via a Jared Kaplan endorsement (on Surge's own blog) and corroborated by third-party Sacra; Google/Meta/Microsoft verified via third-party Sacra/Inc.; OpenAI corrected to a FORMER customer (Forbes, Sept 2025, citing an OpenAI spokesperson that they no longer work with Surge). (4) No SOC 2 / trust page found, left unknown. (5) Revenue: $1.2B (2024) is multi-source; $1.4B ARR is getlatka-only and flagged weak. founded_year downgraded to 'reported' (Wikipedia-only, minor 2020/2021/2018 disagreement). Segment preserved verbatim; focus_areas all within the controlled vocabulary. Overall confidence: medium.",
      "research_notes": {
        "found": [
          "Right company confirmed (human-data/RLHF leader founded 2020 by Edwin Chen, SF) building RL environments on the side via EnterpriseBench/CoreCraft",
          "Verified Anthropic relationship (Jared Kaplan quote on Surge's own site)",
          "Reported $1.2B revenue (2024); bootstrapped to date; July 2025 fundraise talks at $15B-25B+",
          "RL environments offering and published benchmarks (EnterpriseBench, CoreCraft, Hierarchy of Agentic Capabilities)",
          "41 open roles, all remote; research scientist roles + research fellowship"
        ],
        "missing": [
          "SOC 2 / security/trust page (none found)",
          "Exact researcher count",
          "Confirmed closed funding round / lead investors",
          "OSS license / repos",
          "Other office locations"
        ],
        "conflicts": [
          "Headcount: Wikipedia 110 (2025) vs press ~130 FTE vs LinkedIn band 51-200 vs PitchBook ~250, likely FTE vs total counting differences",
          "Contractor count: ~50,000 (some press) vs ~1,000,000 (Wikipedia 2025)",
          "Valuation: $15B (Reuters) vs $25B+ (Bloomberg) for July 2025 talks"
        ],
        "stale": [
          "Most funding/revenue/valuation data is from mid-2025 (>6mo old but <12mo as of 2026-06-07); fundraise outcome not confirmed"
        ],
        "open_questions": [
          "Did the July 2025 ~$1B raise close, and at what final valuation?",
          "Does Surge hold any security certifications (SOC 2/ISO 27001)?",
          "How large is the dedicated research team building RL environments?"
        ]
      }
    },
    {
      "rank": null,
      "focus_areas_normalised": [
        "Coding",
        "Enterprise Workflows",
        "Long-Horizon",
        "Math"
      ],
      "slug": "prime-intellect",
      "brand_name": "Prime Intellect",
      "segment": "Open-source & open environments",
      "website": "https://www.primeintellect.ai/",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://www.primeintellect.ai/ (accessed 2026-06-07); active GitHub releases and blog posts through 2026"
      },
      "founded_year": {
        "value": 2023,
        "confidence": "reported",
        "source": "https://tracxn.com/d/companies/prime-intellect/__c00KIKAVH9b1POsp5fnIynw_8joG2ie8qsqmCTAB6Bc (accessed 2026-06-07) - multiple aggregators state founded 2023 by Vincent Weisser & Johannes Hagemann; no primary registry confirmation"
      },
      "hq_location": {
        "value": "San Francisco, USA",
        "confidence": "confirmed",
        "source": "https://www.linkedin.com/company/primeintellect-ai (accessed 2026-06-07) - public snippet lists San Francisco, US"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "distributed_remote": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://wellfound.com/company/prime-intellect/jobs (accessed 2026-06-07) - several roles list remote/flexible location"
      },
      "current_headcount": {
        "value": "11-50 (LinkedIn public snippet, as of 2026-06-07); Sacra reported 23 FTE (date unclear, likely 2025)",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/primeintellect-ai (accessed 2026-06-07); https://sacra.com/c/prime-intellect/ (accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "11-50",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/primeintellect-ai (accessed 2026-06-07) - public snippet '11-50 employees'; consistent with Sacra 23 FTE"
      },
      "headcount_growth": {
        "value": "229% YoY (per Sacra; period unspecified, likely 2024-2025, stale)",
        "confidence": "reported",
        "source": "https://sacra.com/c/prime-intellect/ (accessed 2026-06-07) - single-source aggregator, period not stated"
      },
      "open_roles_count": {
        "value": "~28-29 open roles",
        "confidence": "reported",
        "source": "https://www.fastaijobs.com/companies/prime-intellect (accessed 2026-06-07); https://wellfound.com/company/prime-intellect/jobs (accessed 2026-06-07)"
      },
      "total_raised": {
        "value": "~$70.4M (reported by aggregator); confirmed components: $5.5M seed (Apr 2024) + $15M (Feb 2025); a ~$49.9M Series B (Dec 2025) is reported by Tracxn only",
        "confidence": "reported",
        "source": "https://tracxn.com/d/companies/prime-intellect/__c00KIKAVH9b1POsp5fnIynw_8joG2ie8qsqmCTAB6Bc/funding-and-investors (accessed 2026-06-07) - single aggregator for the $70.4M total / Series B; seed and $15M rounds independently confirmed via PRNewswire/Fortune and official blog"
      },
      "last_round": {
        "value": "Series B, ~$49.9M, December 2025 (reported by Tracxn aggregator only; no primary announcement located)",
        "confidence": "reported",
        "source": "https://tracxn.com/d/companies/prime-intellect/__c00KIKAVH9b1POsp5fnIynw_8joG2ie8qsqmCTAB6Bc/funding-and-investors (accessed 2026-06-07)"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "Founders Fund (led $15M round)",
          "Menlo Ventures",
          "Distributed Global (co-led seed)",
          "CoinFund (co-led seed)",
          "Andrej Karpathy (angel)",
          "Clem Delangue / Hugging Face (angel)",
          "Tri Dao (angel)",
          "Emad Mostaque (angel)",
          "Dylan Patel (angel)"
        ],
        "confidence": "reported",
        "source": "https://www.primeintellect.ai/blog/fundraise (accessed 2026-06-07) - official $15M announcement naming Founders Fund, Menlo, Karpathy, Delangue, Tri Dao, Mostaque, Dylan Patel; https://www.prnewswire.com/news-releases/prime-intellect-secures-5-5m-in-seed-funding-co-led-by-distributed-global-and-coinfund-302124585.html (accessed 2026-06-07) - seed co-leads"
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "what_they_sell": {
        "value": "mixed",
        "confidence": "confirmed",
        "source": "https://www.primeintellect.ai/ (accessed 2026-06-07) - GPU compute, managed RL post-training (Lab), hosted inference, Environments Hub, and open-source environments/frameworks (verifiers, prime-rl)"
      },
      "deployment_model": {
        "value": "managed-hosted + API (hosted training/inference, on-demand GPU); open-source frameworks self-hostable",
        "confidence": "confirmed",
        "source": "https://www.primeintellect.ai/ (accessed 2026-06-07); https://github.com/PrimeIntellect-ai/verifiers (accessed 2026-06-07)"
      },
      "maturity": {
        "value": "GA (platform live; Environments Hub and Lab publicly available)",
        "confidence": "reported",
        "source": "https://www.primeintellect.ai/blog/environments (accessed 2026-06-07); https://docs.primeintellect.ai/ (accessed 2026-06-07)"
      },
      "open_source": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://github.com/PrimeIntellect-ai/verifiers (accessed 2026-06-07) - MIT-licensed library; https://github.com/PrimeIntellect-ai/prime-rl (accessed 2026-06-07)"
      },
      "license": {
        "value": "MIT (Verifiers library); other repos vary",
        "confidence": "confirmed",
        "source": "https://github.com/PrimeIntellect-ai/verifiers (accessed 2026-06-07) - MIT license confirmed; created by Will Brown (@willccbb), maintained by PrimeIntellect-ai"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://arxiv.org/abs/2512.16144 (accessed 2026-06-07) - INTELLECT-3 Technical Report authored by 'Prime Intellect Team'; hiring Research Engineers"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Johannes Hagemann (co-founder/CTO) - ex-Aleph Alpha (distributed training)",
          "Will Brown - created Verifiers RL environments library; research lead (not a founder)",
          "Vincent Weisser (co-founder/CEO) - background in DeSci/Web3 (e.g. VitaDAO)"
        ],
        "confidence": "reported",
        "source": "https://nextomoro.com/johannes-hagemann/ (accessed 2026-06-07); https://github.com/PrimeIntellect-ai/verifiers (accessed 2026-06-07) - 'Originally created by Will Brown (@willccbb)'; https://www.vincentweisser.com/ (accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "INTELLECT-2: The First Globally Distributed RL Training of a 32B Parameter Model (https://www.primeintellect.ai/blog/intellect-2)",
          "INTELLECT-3: Technical Report - 106B MoE (12B active) trained with large-scale RL (https://arxiv.org/abs/2512.16144)"
        ],
        "confidence": "confirmed",
        "source": "https://www.primeintellect.ai/blog/intellect-2 (accessed 2026-06-07); https://arxiv.org/abs/2512.16144 (accessed 2026-06-07) - authored by 'Prime Intellect Team'"
      },
      "notable_customers": {
        "value": [
          {
            "name": "NVIDIA (integration/collaboration - Prime Intellect integrates NVIDIA NeMo Gym training environments; per NVIDIA newsroom)",
            "verification": "verified",
            "frontier_lab_tie": true
          },
          {
            "name": "Zapier (case study - AutomationBench agent improvement loop)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Ramp (case study - FastAsk subagent trained via Lab)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Arcee AI",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Character AI",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Browserbase",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Groq",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          }
        ],
        "confidence": "reported",
        "source": "https://nvidianews.nvidia.com/news/nvidia-debuts-nemotron-3-family-of-open-models (accessed 2026-06-07) - NVIDIA newsroom confirms Prime Intellect integrating NeMo Gym (note: this is Prime Intellect adopting NVIDIA tooling, not NVIDIA buying from Prime Intellect); https://www.primeintellect.ai/case-study/zapier and /case-study/ramp and homepage logos (accessed 2026-06-07) - vendor's own site, self-claimed"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "focus_areas": [
        "coding environments",
        "science/math",
        "evaluation / benchmarks",
        "execution infrastructure",
        "long-horizon / general reasoning",
        "enterprise workflows"
      ],
      "positioning_summary": "Prime Intellect operates an open-source RL stack - the Environments Hub (2,500+ community RL environments), the Verifiers library and prime-rl training framework, plus hosted RL post-training (Lab), evals, inference and on-demand GPU compute. It positions itself as the open alternative to closed big-lab RL tooling and also trains open models (INTELLECT series).",
      "best_fit_use_case": "Teams wanting open, community-shared RL environments plus managed RL post-training/compute to build or fine-tune agentic models without locked-down proprietary tooling.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://www.primeintellect.ai/",
          "accessed_date": "2026-06-07",
          "note": "Homepage - product stack, customer logos (Ramp, Zapier, NVIDIA, Character AI, Arcee, Browserbase)"
        },
        {
          "url": "https://www.primeintellect.ai/blog/environments",
          "accessed_date": "2026-06-07",
          "note": "Environments Hub launch, beta contributors, dated Aug 27 2025"
        },
        {
          "url": "https://www.primeintellect.ai/blog/fundraise",
          "accessed_date": "2026-06-07",
          "note": "$15M round Feb 28 2025, Founders Fund lead, angel list, total >$20M at that time"
        },
        {
          "url": "https://www.prnewswire.com/news-releases/prime-intellect-secures-5-5m-in-seed-funding-co-led-by-distributed-global-and-coinfund-302124585.html",
          "accessed_date": "2026-06-07",
          "note": "$5.5M seed Apr 2024, Distributed Global + CoinFund"
        },
        {
          "url": "https://tracxn.com/d/companies/prime-intellect/__c00KIKAVH9b1POsp5fnIynw_8joG2ie8qsqmCTAB6Bc/funding-and-investors",
          "accessed_date": "2026-06-07",
          "note": "Total $70.4M, Series B $49.9M Dec 1 2025"
        },
        {
          "url": "https://sacra.com/c/prime-intellect/",
          "accessed_date": "2026-06-07",
          "note": "23 FTE, 229% YoY growth, founders; funding figures STALE (says $20M total)"
        },
        {
          "url": "https://www.linkedin.com/company/primeintellect-ai",
          "accessed_date": "2026-06-07",
          "note": "11-50 employees, San Francisco HQ (public snippet)"
        },
        {
          "url": "https://github.com/PrimeIntellect-ai/verifiers",
          "accessed_date": "2026-06-07",
          "note": "Verifiers RL env/eval library, MIT license, 4.2k stars, active (v0.1.15.dev18 Jun 1 2026), created by Will Brown"
        },
        {
          "url": "https://github.com/PrimeIntellect-ai/prime-rl",
          "accessed_date": "2026-06-07",
          "note": "prime-rl agentic RL training framework"
        },
        {
          "url": "https://www.primeintellect.ai/case-study/zapier",
          "accessed_date": "2026-06-07",
          "note": "Zapier AutomationBench case study using Verifiers + Lab"
        },
        {
          "url": "https://www.primeintellect.ai/case-study/ramp",
          "accessed_date": "2026-06-07",
          "note": "Ramp FastAsk subagent trained via Lab, beat Opus 4.6 on spreadsheet search (vendor claim)"
        },
        {
          "url": "https://www.primeintellect.ai/blog/intellect-3",
          "accessed_date": "2026-06-07",
          "note": "INTELLECT-3 100B+ MoE open model"
        },
        {
          "url": "https://arxiv.org/pdf/2512.16144",
          "accessed_date": "2026-06-07",
          "note": "INTELLECT-3 technical report"
        },
        {
          "url": "https://arxiv.org/pdf/2601.16443",
          "accessed_date": "2026-06-07",
          "note": "Endless Terminals: Scaling RL Environments for Terminal Agents"
        },
        {
          "url": "https://nextomoro.com/johannes-hagemann/",
          "accessed_date": "2026-06-07",
          "note": "Hagemann background - ex-Aleph Alpha, CTO"
        },
        {
          "url": "https://www.vincentweisser.com/",
          "accessed_date": "2026-06-07",
          "note": "Weisser background - CEO, ex-DeSci/Web3"
        },
        {
          "url": "https://www.fastaijobs.com/companies/prime-intellect",
          "accessed_date": "2026-06-07",
          "note": "~29 open roles"
        },
        {
          "url": "https://wellfound.com/company/prime-intellect/jobs",
          "accessed_date": "2026-06-07",
          "note": "Open roles, remote flexibility"
        },
        {
          "url": "https://www.sequoiacap.com/podcast/building-the-github-for-rl-environments-prime-intellects-will-brown-johannes-hagemann/",
          "accessed_date": "2026-06-07",
          "note": "Founder interview - 'GitHub for RL environments' positioning"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "published_papers_or_benchmarks",
          "was": "Included 'Endless Terminals: Scaling RL Environments for Terminal Agents (arXiv:2601.16443)' as a Prime Intellect paper",
          "now": "Removed Endless Terminals; list now contains only INTELLECT-2 and INTELLECT-3",
          "reason": "MISATTRIBUTION. arXiv:2601.16443 is authored by Kanishk Gandhi, Shivam Garg, Noah D. Goodman, Dimitris Papailiopoulos (Stanford-affiliated researchers) with code at github.com/kanishkg/endless-terminals. Prime Intellect is not an author or affiliation. The draft incorrectly credited this independent paper to Prime Intellect."
        },
        {
          "field": "notable_customers",
          "was": "NVIDIA listed as 'NVIDIA (Nemotron coalition collaboration / partner)', verification 'self-claimed', frontier_lab_tie false",
          "now": "NVIDIA reframed as an integration/collaboration (Prime Intellect integrates NVIDIA NeMo Gym), verification 'verified', frontier_lab_tie true",
          "reason": "NVIDIA's own newsroom (nvidianews.nvidia.com Nemotron 3 announcement) independently confirms the relationship, so it is third-party verified rather than self-claimed. Marked frontier_lab_tie true (NVIDIA Nemotron is a frontier model line). Clarified directionality: Prime Intellect adopts NVIDIA tooling, so NVIDIA is not strictly a paying customer."
        },
        {
          "field": "notable_customers",
          "was": "Other customers listed with bare names",
          "now": "Added context labels (case study / logo) and kept all of Zapier, Ramp, Arcee, Character AI, Browserbase, Groq as 'self-claimed'",
          "reason": "All remaining customers appear only on Prime Intellect's own website (case studies/logos) with no third-party confirmation found, so they correctly remain self-claimed per the rule that vendor-site claims are never 'verified'."
        },
        {
          "field": "total_raised",
          "was": "~$70.4M (reported); confidence reported, presented without weak-source caveat",
          "now": "~$70.4M total flagged as single-aggregator (Tracxn); confirmed components separated out ($5.5M seed + $15M); Series B reported-only",
          "reason": "The $70.4M total and the $49.9M Series B appear only on the Tracxn aggregator; no primary announcement or major-press confirmation located. The seed ($5.5M) and $15M rounds are independently confirmed (PRNewswire/Fortune/official blog), but the Series B is not. Added explicit weak-source flag while keeping confidence 'reported'."
        },
        {
          "field": "last_round",
          "was": "Series B, $49.9M, December 2025 (presented as a firm figure)",
          "now": "Series B, ~$49.9M, December 2025 (reported by Tracxn aggregator only; no primary announcement located)",
          "reason": "The precise $49.9M figure and Dec 2025 date come solely from Tracxn. No official Prime Intellect blog post or credible press release for a Series B was found, so the figure is downgraded in framing to a single-source aggregator claim."
        },
        {
          "field": "researcher_backgrounds",
          "was": "'Founders ex-DeSci/Web3 (VitaDAO, Bio Protocol) - Vincent Weisser CEO' and 'Will Brown - ... research lead'",
          "now": "Clarified Will Brown is a research lead and NOT a founder; clarified Weisser (CEO) and Hagemann (CTO) are the two co-founders",
          "reason": "Sources (Tracxn founders page, The Org, official blog) confirm the company was co-founded by Vincent Weisser (CEO) and Johannes Hagemann (CTO). Will Brown created the Verifiers library and is a research lead but is not a co-founder; clarified to avoid implying founder status."
        },
        {
          "field": "overall_confidence",
          "was": "high",
          "now": "medium",
          "reason": "A material misattribution (Endless Terminals paper) was present in the draft and the Series B funding rests on a single aggregator without primary confirmation. These reduce overall reliability from high to medium."
        }
      ],
      "verification_summary": "Re-verified Prime Intellect against primary and third-party sources. Company identity confirmed correct - this is the open-source RL/environments company ('GitHub for RL environments', Environments Hub, Verifiers, prime-rl, INTELLECT models), matching the directory note as the open-source backbone of the category. Founders confirmed: Vincent Weisser (CEO) and Johannes Hagemann (CTO), founded 2023; Will Brown is a research lead (Verifiers creator), not a founder. Headcount sanity-checks out at 11-50 (LinkedIn public snippet; Sacra 23 FTE) - not 200+. Key correction: the 'Endless Terminals' paper (arXiv:2601.16443) was misattributed to Prime Intellect; its actual authors are Kanishk Gandhi, Shivam Garg, Noah Goodman, Dimitris Papailiopoulos with no Prime Intellect affiliation, so it was removed. INTELLECT-2 and INTELLECT-3 are genuine Prime Intellect publications (INTELLECT-3 arXiv:2512.16144 authored by 'Prime Intellect Team'). Funding: $5.5M seed (Apr 2024, Distributed Global + CoinFund) and $15M (Feb 2025, Founders Fund lead) are independently confirmed; the $70.4M total and ~$49.9M Series B (Dec 2025) rest only on the Tracxn aggregator with no primary announcement found, so they were flagged as single-source while kept at 'reported'. Valuation unknown. NVIDIA relationship upgraded to 'verified' (confirmed by NVIDIA's own newsroom - Prime Intellect integrates NeMo Gym) with frontier_lab_tie true, but reframed as an integration rather than a customer; all other named customers (Zapier, Ramp, Arcee, Character AI, Browserbase, Groq) remain self-claimed (vendor-site only). No SOC 2, certifications, or trust/security page found - all left unknown. Overall confidence lowered from high to medium due to the misattribution and single-source Series B.",
      "research_notes": {}
    },
    {
      "rank": null,
      "focus_areas_normalised": [
        "Coding",
        "Computer Use"
      ],
      "slug": "daytona",
      "brand_name": "Daytona",
      "segment": "Adjacent: execution infrastructure",
      "website": "https://www.daytona.io/",
      "focus_areas": [
        "execution infrastructure",
        "coding environments",
        "computer use environments"
      ],
      "positioning_summary": "Daytona provides secure, elastic, programmatic sandboxes ('computers') that AI agents and developers can spin up in under ~90ms to run untrusted AI-generated code in isolated, stateful runtimes. It offers a managed-hosted service plus an open-source self-hostable stack, and is positioned as agent-native execution/runtime infrastructure for code execution, computer use, and RL/eval workloads.",
      "best_fit_use_case": "Buyers needing fast, isolated, programmatic sandboxes to safely execute AI-generated code or run agent tool/computer-use loops at scale.",
      "what_they_sell": {
        "value": "infra",
        "confidence": "confirmed",
        "source": "https://www.daytona.io/ (accessed 2026-06-07)"
      },
      "deployment_model": {
        "value": "managed-hosted and self-hosted (open source)",
        "confidence": "confirmed",
        "source": "https://www.daytona.io/ (accessed 2026-06-07); https://github.com/daytonaio/daytona (accessed 2026-06-07)"
      },
      "maturity": {
        "value": "GA",
        "confidence": "reported",
        "source": "https://www.prnewswire.com/news-releases/daytona-raises-24m-series-a-to-give-every-agent-a-computer-302680740.html (accessed 2026-06-07), commercial product with paying customers and revenue run rate"
      },
      "open_source": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://github.com/daytonaio/daytona (accessed 2026-06-07)"
      },
      "license": {
        "value": "AGPL-3.0",
        "confidence": "confirmed",
        "source": "https://github.com/daytonaio/daytona (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": 2023,
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/daytonaio (accessed 2026-06-07); pre-seed Nov 2023 per https://www.daytona.io/dotfiles/daytona-secures-5m-to-simplify-development-environments (accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "New York, NY, United States",
        "confidence": "reported",
        "source": "https://tracxn.com/d/companies/daytona (accessed 2026-06-07); https://www.daytona.io/dotfiles/daytona-lights-up-times-square (accessed 2026-06-07). Note: a LinkedIn public snippet also surfaced San Francisco, CA as an associated location"
      },
      "other_locations": {
        "value": [
          "San Francisco, CA (work location for open roles / community events)",
          "Croatia (work location for open roles)"
        ],
        "confidence": "reported",
        "source": "https://www.daytona.io/careers (accessed 2026-06-07), listed as work locations for open roles; founders are Croatia-connected (ex-Codeanywhere)"
      },
      "distributed_remote": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://www.daytona.io/careers (accessed 2026-06-07), roles open across NY, SF, and Croatia"
      },
      "current_headcount": {
        "value": "~20 (Feb 2026, company-stated at Series A); third-party trackers list ~63 associated profiles and an 11-50 band as of 2026-06-07",
        "confidence": "reported",
        "source": "https://www.prnewswire.com/news-releases/daytona-raises-24m-series-a-to-give-every-agent-a-computer-302680740.html and https://tracxn.com/d/companies/daytona and https://www.linkedin.com/company/daytonaio (accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "11-50",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/daytonaio (accessed 2026-06-07), LinkedIn size band; company stated ~20 at Series A"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": 10,
        "confidence": "reported",
        "source": "https://www.daytona.io/careers (accessed 2026-06-07)"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "has_researchers": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Founders Ivan Burazin (CEO), Vedran Jukic (CTO), Goran Draganic (Chief Architect) previously co-founded Codeanywhere, a cloud IDE/dev-environment company; Burazin was also Chief Developer Experience Officer at Infobip"
        ],
        "confidence": "reported",
        "source": "https://www.daytona.io/dotfiles/daytona-secures-5m-to-simplify-development-environments (accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "total_raised": {
        "value": "~$31M total disclosed ($2M pre-seed Nov 2023 + $5M seed Jun 2024 + $24M Series A Feb 2026)",
        "confidence": "reported",
        "source": "https://www.daytona.io/dotfiles/daytona-secures-5m-to-simplify-development-environments; https://www.prnewswire.com/news-releases/daytona-raises-24m-series-a-to-give-every-agent-a-computer-302680740.html; https://tracxn.com/d/companies/daytona (accessed 2026-06-07)"
      },
      "last_round": {
        "value": "Series A, $24M, February 2026",
        "confidence": "confirmed",
        "source": "https://www.prnewswire.com/news-releases/daytona-raises-24m-series-a-to-give-every-agent-a-computer-302680740.html (accessed 2026-06-07)"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "FirstMark Capital (Series A lead; Matt Turck joined board)",
          "Pace Capital",
          "Upfront Ventures (seed lead, Series A participant)",
          "E2VC",
          "Darkmode",
          "Datadog (strategic)",
          "Figma Ventures (strategic)",
          "500 Global (seed; not named in Series A release)"
        ],
        "confidence": "reported",
        "source": "https://www.prnewswire.com/news-releases/daytona-raises-24m-series-a-to-give-every-agent-a-computer-302680740.html (FirstMark, Pace, Upfront, E2VC, Darkmode, Datadog, Figma Ventures); https://www.daytona.io/dotfiles/daytona-secures-5m-to-simplify-development-environments (Upfront, 500 Global) (accessed 2026-06-07). Note: angel investors (Yurtseven, Browne, Reyes, Shamgunov) listed in the draft were NOT found in the primary Series A release and are removed pending a primary source"
      },
      "revenue_signals": {
        "value": "Company-claimed: ~$1M forward revenue run rate within three months, doubled six weeks later (as of Series A, Feb 2026)",
        "confidence": "reported",
        "source": "https://www.prnewswire.com/news-releases/daytona-raises-24m-series-a-to-give-every-agent-a-computer-302680740.html (accessed 2026-06-07), vendor-stated, unaudited"
      },
      "notable_customers": {
        "value": [
          {
            "name": "Anthropic, Claude Managed Agents self-hosted sandbox launch partner (platform integration, one of four: Cloudflare, Daytona, Modal, Vercel), NOT a disclosed Daytona customer",
            "verification": "verified",
            "frontier_lab_tie": true
          },
          {
            "name": "Clay (Sculptor GTM agent runs in production on Claude Managed Agents + Daytona)",
            "verification": "verified",
            "frontier_lab_tie": false
          },
          {
            "name": "LangChain",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Turing",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Writer",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "SambaNova",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          }
        ],
        "confidence": "reported",
        "source": "LangChain/Turing/Writer/SambaNova self-claimed via https://www.prnewswire.com/news-releases/daytona-raises-24m-series-a-to-give-every-agent-a-computer-302680740.html; Anthropic partnership and Clay production usage verified via https://thenewstack.io/anthropic-mcp-tunnels-sandboxes/ and https://claude.com/customers/clay and https://www.daytona.io/docs/en/guides/claude/claude-managed-agents/ (accessed 2026-06-07)"
      },
      "soc2": {
        "value": "Type I",
        "confidence": "reported",
        "source": "https://trust.daytona.io/ (accessed 2026-06-07), SOC 2 Type 1 report listed in Trust Center; not independently verified against an audit registry"
      },
      "other_certifications": {
        "value": [
          "HIPAA (report listed in Trust Center)",
          "ISO/IEC 27001 (listed in Trust Center; certified vs in-progress not stated)",
          "GDPR (listed in Trust Center)"
        ],
        "confidence": "reported",
        "source": "https://trust.daytona.io/ (accessed 2026-06-07)"
      },
      "security_page": {
        "value": "https://trust.daytona.io/",
        "confidence": "confirmed",
        "source": "https://trust.daytona.io/ (accessed 2026-06-07)"
      },
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://www.daytona.io/ (accessed 2026-06-07); raised Series A Feb 2026"
      },
      "overall_confidence": "high",
      "sources": [
        {
          "url": "https://www.daytona.io/",
          "accessed_date": "2026-06-07",
          "note": "Official site, product, deployment models, positioning"
        },
        {
          "url": "https://github.com/daytonaio/daytona",
          "accessed_date": "2026-06-07",
          "note": "OSS repo, AGPL-3.0, ~72.5k stars, v0.184.0 (2026-06-03)"
        },
        {
          "url": "https://www.prnewswire.com/news-releases/daytona-raises-24m-series-a-to-give-every-agent-a-computer-302680740.html",
          "accessed_date": "2026-06-07",
          "note": "Series A press release, $24M, investors, ~20 employees, customers, revenue signals"
        },
        {
          "url": "https://www.daytona.io/dotfiles/daytona-raises-24m-series-a-to-give-every-agent-a-computer",
          "accessed_date": "2026-06-07",
          "note": "Vendor Series A blog post"
        },
        {
          "url": "https://www.daytona.io/dotfiles/daytona-secures-5m-to-simplify-development-environments",
          "accessed_date": "2026-06-07",
          "note": "Seed round ($5M, Upfront/500 Global), founder backgrounds, pre-seed history"
        },
        {
          "url": "https://www.linkedin.com/company/daytonaio",
          "accessed_date": "2026-06-07",
          "note": "Public LinkedIn, founded 2023, NY HQ, 11-50 band / 63 listed"
        },
        {
          "url": "https://trust.daytona.io/",
          "accessed_date": "2026-06-07",
          "note": "Trust Center, SOC 2 Type 1, ISO 27001, HIPAA, GDPR"
        },
        {
          "url": "https://www.daytona.io/careers",
          "accessed_date": "2026-06-07",
          "note": "10 open roles; NY/SF/Croatia locations"
        },
        {
          "url": "https://www.anthropic.com/engineering/managed-agents",
          "accessed_date": "2026-06-07",
          "note": "Anthropic, Daytona as Claude Managed Agents sandbox partner (verified frontier-lab tie)"
        },
        {
          "url": "https://www.daytona.io/docs/en/guides/claude/claude-managed-agents/",
          "accessed_date": "2026-06-07",
          "note": "Daytona docs for running Claude Managed Agents"
        },
        {
          "url": "https://tracxn.com/d/companies/daytona/__TzaXWUoUzJqHEQmWu6SWgVcuHYFltYtBs_uhDgw84Ss",
          "accessed_date": "2026-06-07",
          "note": "Tracxn, $31M total raised, headcount 63 (Apr 2026)"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "notable_customers (Anthropic)",
          "was": "Anthropic listed as 'verified' customer ('Claude Managed Agents self-hosted sandbox launch partner')",
          "now": "Anthropic reclassified as a verified platform PARTNER/integration (one of four launch sandbox partners: Cloudflare, Daytona, Modal, Vercel), explicitly noted as NOT a disclosed Daytona customer; frontier_lab_tie remains true",
          "reason": "Primary sources (Anthropic announcement coverage, The New Stack, Daytona docs) show a sandbox-provider partnership, not Anthropic purchasing/using Daytona as a customer. The Series A press release does not name Anthropic at all. Calling Anthropic a 'verified customer' overstates the relationship."
        },
        {
          "field": "notable_customers (Clay)",
          "was": "not present",
          "now": "Added Clay as a verified production user of Claude Managed Agents on Daytona",
          "reason": "Third-party/primary confirmation (claude.com/customers/clay; Anthropic launch coverage) names Clay's Sculptor agent running in production on Daytona, a genuinely verifiable customer reference, unlike the self-claimed PR list."
        },
        {
          "field": "notable_investors",
          "was": "confidence 'confirmed'; included '500 Global' and four named angels (Gorkem Yurtseven, Theo Browne, Eno Reyes, Nikita Shamgunov) all sourced to the Series A press release",
          "now": "confidence downgraded to 'reported'; angels removed (flagged as not found in primary source); 500 Global retained but attributed to the seed round, not Series A",
          "reason": "The Series A press release names only FirstMark, Pace, Upfront, E2VC, Darkmode, Datadog and Figma Ventures, it does NOT list the four angels or 500 Global. The angels could not be confirmed against any primary source, so they are removed to avoid fabrication; 500 Global is verifiable only for the seed round."
        },
        {
          "field": "hq_location",
          "was": "confidence 'confirmed' (New York, NY) sourced to LinkedIn",
          "now": "confidence downgraded to 'reported'; note added that a LinkedIn snippet also surfaced San Francisco",
          "reason": "Sources conflict: Tracxn and a Daytona blog place HQ in NYC, but a LinkedIn public snippet returned San Francisco. NY is the more-corroborated value but the conflict warrants 'reported' not 'confirmed'."
        },
        {
          "field": "open_roles_count",
          "was": "confidence 'confirmed'",
          "now": "confidence downgraded to 'reported'",
          "reason": "A live careers-page count is volatile and not independently corroborated; 'reported' is more appropriate than 'confirmed'."
        },
        {
          "field": "deployment_model",
          "was": "'managed-hosted, self-hosted (open source), and hybrid orchestration'",
          "now": "'managed-hosted and self-hosted (open source)'",
          "reason": "'Hybrid orchestration' as a distinct, named offering was not substantiated by primary sources; trimmed to the two clearly evidenced deployment modes."
        },
        {
          "field": "researcher_backgrounds",
          "was": "founders' Codeanywhere background only",
          "now": "same, plus Burazin's prior role as Chief Developer Experience Officer at Infobip",
          "reason": "Primary seed-round source adds this verifiable detail; no confidence change (remains 'reported')."
        },
        {
          "field": "other_certifications",
          "was": "['ISO/IEC 27001 (listed)','HIPAA (listed)','GDPR (listed)']",
          "now": "reordered/annotated: HIPAA (report available), ISO 27001 (certified vs in-progress not stated), GDPR (listed)",
          "reason": "Trust Center shows a HIPAA report but does not clearly state whether ISO 27001 is fully certified or in-progress; annotation reflects the uncertainty. Confidence remains 'reported' (Trust Center is vendor-controlled, not an independent registry)."
        }
      ],
      "verification_summary": "Confirmed this is the correct company matching the directory note ('elastic infra for agent and AI-generated code'), Daytona.io, an agent-native sandbox/execution-infrastructure provider; no same-name confusion. Funding chain ($2M pre-seed Nov 2023, $5M seed Jun 2024 led by Upfront, $24M Series A Feb 2026 led by FirstMark; ~$31M total) verified against the PRNewswire release, the vendor seed blog, and Tracxn, kept at 'reported' except the last round itself. Largest correction: the draft marked Anthropic as a 'verified customer'; in reality Daytona is one of four Claude Managed Agents sandbox launch partners (Cloudflare/Daytona/Modal/Vercel), a platform integration, not a customer. Reclassified accordingly and added Clay as a genuinely verifiable production user. Investor list downgraded to 'reported' and four unverifiable angels plus a misattributed 500 Global Series A entry removed/corrected, since the primary release does not list them. HQ downgraded to 'reported' due to NY-vs-SF source conflict. SOC 2 Type I, HIPAA, ISO 27001 and GDPR appear on the vendor Trust Center only (no independent registry) so all remain 'reported'/effectively claimed. Headcount sanity-checked: company-stated ~20 at Series A vs ~63 tracker profiles and an 11-50 LinkedIn band, kept at 11-50/'reported', not 200+. AGPL-3.0 license and open-source status confirmed via GitHub (v0.184.0, ~72.5k stars). Overall confidence remains high for identity/product/funding-event facts, with appropriate downgrades on investors, customers, certifications, and HQ.",
      "research_notes": {
        "found": [
          "Clear identification: Daytona Platforms, Inc. (daytona.io / daytonaio), New York, secure & elastic infra for running AI-generated code, matches directory note and tags.",
          "Funding history: $2M pre-seed (Nov 2023), $5M seed (Jun 2024, led by Upfront Ventures with 500 Global), $24M Series A (Feb 2026, led by FirstMark Capital).",
          "Founders ex-Codeanywhere: Ivan Burazin (CEO), Vedran Jukic (CTO), Goran Draganic (Chief Architect).",
          "Open source: github.com/daytonaio/daytona, AGPL-3.0, ~72.5k stars, active (v0.184.0 released 2026-06-03).",
          "Trust Center (trust.daytona.io) lists SOC 2 Type 1, ISO/IEC 27001, HIPAA, GDPR.",
          "Verified frontier-lab tie: Daytona is an official self-hosted sandbox launch partner for Anthropic Claude Managed Agents (alongside Cloudflare, Modal, Vercel), announced May 2026.",
          "10 open roles across Engineering/Growth/Design (NY, SF, Croatia)."
        ],
        "missing": [
          "Valuation (not disclosed).",
          "Exact current headcount (sources conflict).",
          "Headcount growth %, researcher count, published papers/benchmarks."
        ],
        "conflicts": [
          "Headcount: company/PitchBook say ~20 (Feb 2026); Tracxn says 63 (Apr 2026); Startup Intros says 18; LinkedIn shows 63 associated profiles but an '11-50' size band. Treated as 11-50 band with ~20 company-stated at Series A.",
          "Total raised: Tracxn lists $31M; sum of disclosed rounds ($2M+$5M+$24M) = $31M, consistent."
        ],
        "stale": [
          "Seed round announcement (Jun 2024) and founding background sources are >12 months old but used only for historical/founder facts."
        ],
        "open_questions": [
          "Precise current full-time headcount.",
          "Whether SOC 2 Type II is in progress (only Type 1 evidenced).",
          "Post-Series A valuation."
        ]
      }
    },
    {
      "rank": null,
      "focus_areas_normalised": [
        "Coding",
        "Computer Use"
      ],
      "slug": "e2b",
      "brand_name": "E2B",
      "segment": "Adjacent: execution infrastructure",
      "website": "https://e2b.dev/",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://e2b.dev/ (accessed 2026-06-07); https://e2b.dev/blog/series-a (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": 2023,
        "confidence": "reported",
        "source": "https://tracxn.com/d/companies/e2b (accessed 2026-06-07); https://www.crunchbase.com/organization/e2b-1c91 (accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "San Francisco, USA",
        "confidence": "confirmed",
        "source": "https://www.linkedin.com/company/e2b-dev (accessed 2026-06-07); https://e2b.dev/blog/series-a (accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [
          "Prague, Czech Republic"
        ],
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/e2b-dev (accessed 2026-06-07)"
      },
      "distributed_remote": {
        "value": "yes",
        "confidence": "estimated",
        "source": "Team distributed across US (San Francisco) and Czech Republic (Prague) per LinkedIn; https://www.linkedin.com/company/e2b-dev (accessed 2026-06-07)"
      },
      "what_they_sell": {
        "value": "infra",
        "confidence": "confirmed",
        "source": "https://e2b.dev/ (accessed 2026-06-07) - secure cloud sandboxes (Firecracker microVMs) for running AI-generated code and AI agents; RL training is one stated use case"
      },
      "deployment_model": {
        "value": "managed-hosted (cloud API), with BYOC, on-prem, and self-hosted options in customer AWS/GCP/Azure accounts",
        "confidence": "confirmed",
        "source": "https://e2b.dev/ (accessed 2026-06-07) - lists BYOC, on-premises, and self-hosted deployment"
      },
      "maturity": {
        "value": "GA",
        "confidence": "confirmed",
        "source": "https://e2b.dev/ (accessed 2026-06-07) - usage-based pricing, published SDKs, enterprise customers"
      },
      "open_source": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://github.com/e2b-dev/E2B (accessed 2026-06-07)"
      },
      "license": {
        "value": "Apache-2.0",
        "confidence": "confirmed",
        "source": "https://github.com/e2b-dev/E2B (accessed 2026-06-07)"
      },
      "soc2": {
        "value": "claimed-unverified (vendor trust center states SOC 2 Type II)",
        "confidence": "reported",
        "source": "https://trust.e2b.dev/ (accessed 2026-06-07) - trust center is vendor-hosted and JS-rendered; SOC 2 Type II claim could not be independently confirmed via an audit registry or third party"
      },
      "other_certifications": {
        "value": [
          "ISO 27001 (claimed-unverified)",
          "HIPAA (claimed-unverified)",
          "GDPR (claimed-unverified)"
        ],
        "confidence": "reported",
        "source": "https://trust.e2b.dev/ (accessed 2026-06-07) - claimed on vendor trust center only; not independently verified"
      },
      "security_page": {
        "value": "https://trust.e2b.dev/",
        "confidence": "confirmed",
        "source": "https://trust.e2b.dev/ (accessed 2026-06-07)"
      },
      "current_headcount": {
        "value": "11-50 (LinkedIn company size band; ~37 employees listed on LinkedIn as of 2026-06-07)",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/e2b-dev (accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "11-50",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/e2b-dev (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "~12-13 open roles",
        "confidence": "reported",
        "source": "https://www.glassdoor.com/Jobs/E2B-Jobs-E10750951.htm and https://e2b.dev/careers (accessed 2026-06-07)"
      },
      "has_researchers": {
        "value": "no",
        "confidence": "estimated",
        "source": "Team and open roles appear engineering/infra/DevRel/GTM focused, not research scientists; https://e2b.dev/careers (accessed 2026-06-07)"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "published_papers_or_benchmarks": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "total_raised": {
        "value": "~$32M (official Series A blog states $32M total)",
        "confidence": "reported",
        "source": "https://e2b.dev/blog/series-a (accessed 2026-06-07) - $11.5M seed + $21M Series A = $32M; some third-party aggregators (Tracxn) report ~$35M+"
      },
      "last_round": {
        "value": "Series A, $21M, July 2025 (announced 2025-07-28)",
        "confidence": "confirmed",
        "source": "https://e2b.dev/blog/series-a (accessed 2026-06-07); https://www.prnewswire.com/news-releases/e2b-raises-a-21m-series-a-to-offer-cloud-for-ai-agents-to-fortune-100-302514540.html (accessed 2026-06-07)"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "Insight Partners (Series A lead)",
          "Decibel (seed lead)",
          "Sunflower Capital",
          "Kaya / Kaya VC",
          "Scott Johnston (former Docker CEO, angel)"
        ],
        "confidence": "confirmed",
        "source": "https://e2b.dev/blog/series-a (accessed 2026-06-07); https://www.insightpartners.com/ideas/e2b-raises-a-21m-series-a-to-offer-cloud-for-ai-agents-to-fortune-100/ (accessed 2026-06-07)"
      },
      "revenue_signals": {
        "value": "unknown (vendor self-claims conflict: a careers snippet referenced '8-figure revenue' / '$37M since founding' while a LinkedIn snippet states '7-figure revenue'; none independently verified)",
        "confidence": "unknown",
        "source": "https://e2b.dev/careers and https://www.linkedin.com/company/e2b-dev (accessed 2026-06-07) - conflicting self-claimed figures, no credible third-party confirmation"
      },
      "notable_customers": {
        "value": [
          {
            "name": "Perplexity",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Hugging Face",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Groq",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Manus",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Lindy",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Genspark",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Gumloop",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Athena",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          }
        ],
        "confidence": "reported",
        "source": "https://e2b.dev/ and https://e2b.dev/blog/series-a (accessed 2026-06-07) - all customer names from vendor's own homepage/Series A materials (self-claimed). Vendor also claims '94% of Fortune 100' on homepage (was '88%' in the July 2025 Series A post) - a self-claimed, unverifiable aggregate"
      },
      "focus_areas": [
        "execution infrastructure",
        "coding environments",
        "computer use environments"
      ],
      "positioning_summary": "E2B provides open-source, secure cloud sandboxes (built on Firecracker microVMs) for running AI-generated code and AI agents, offered as a hosted API with BYOC/on-prem/self-hosted options. It positions as execution infrastructure for enterprise AI agents and self-claims broad Fortune 100 adoption.",
      "best_fit_use_case": "Buyers needing secure, isolated runtime infrastructure to execute LLM-generated code or run agents at scale, including as a sandbox layer for RL/agent training.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://e2b.dev/",
          "accessed_date": "2026-06-07",
          "note": "Official homepage - product, deployment, customers, OSS, RL infra reference"
        },
        {
          "url": "https://e2b.dev/enterprise",
          "accessed_date": "2026-06-07",
          "note": "Enterprise/deployment options (BYOC, on-prem, self-hosted)"
        },
        {
          "url": "https://e2b.dev/blog/series-a",
          "accessed_date": "2026-06-07",
          "note": "Official Series A announcement - $21M, $32M total, investors, customers, metrics, leadership, HQ"
        },
        {
          "url": "https://e2b.dev/careers",
          "accessed_date": "2026-06-07",
          "note": "Careers page - open roles, '8-figure revenue', '$37M since founding' claim"
        },
        {
          "url": "https://github.com/e2b-dev/E2B",
          "accessed_date": "2026-06-07",
          "note": "OSS repo - Apache-2.0, 12.5k stars, latest release e2b@2.28.0 Jun 6 2026"
        },
        {
          "url": "https://trust.e2b.dev/",
          "accessed_date": "2026-06-07",
          "note": "Trust center landing"
        },
        {
          "url": "https://trust.e2b.dev/controls",
          "accessed_date": "2026-06-07",
          "note": "Compliance frameworks - SOC 2 Type II, ISO 27001, HIPAA, GDPR, pen testing"
        },
        {
          "url": "https://www.linkedin.com/company/e2b-dev",
          "accessed_date": "2026-06-07",
          "note": "LinkedIn company page (public snippet) - size band 11-50, ~37 employees, HQ SF, Prague office"
        },
        {
          "url": "https://www.insightpartners.com/ideas/e2b-raises-a-21m-series-a-to-offer-cloud-for-ai-agents-to-fortune-100/",
          "accessed_date": "2026-06-07",
          "note": "Insight Partners Series A writeup"
        },
        {
          "url": "https://www.prnewswire.com/news-releases/e2b-raises-a-21m-series-a-to-offer-cloud-for-ai-agents-to-fortune-100-302514540.html",
          "accessed_date": "2026-06-07",
          "note": "PR Newswire Series A release"
        },
        {
          "url": "https://venturebeat.com/ai/how-e2b-became-essential-to-88-of-fortune-100-companies-and-raised-21-million",
          "accessed_date": "2026-06-07",
          "note": "VentureBeat coverage - Fortune 100 adoption, $21M"
        },
        {
          "url": "https://tracxn.com/d/companies/e2b/__U7C82j6Wk3VH-rgW0n4LFnUqqq-LuBw6rnIcnLGz2yU/funding-and-investors",
          "accessed_date": "2026-06-07",
          "note": "Tracxn - founding 2023, funding rounds, ~$35M total"
        },
        {
          "url": "https://www.crunchbase.com/organization/e2b-1c91",
          "accessed_date": "2026-06-07",
          "note": "Crunchbase - seed Oct 2024 $11.5M led by Decibel, founding 2023"
        },
        {
          "url": "https://www.glassdoor.com/Jobs/E2B-Jobs-E10750951.htm",
          "accessed_date": "2026-06-07",
          "note": "Open roles count (~13)"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "soc2",
          "was": "Type II (reported)",
          "now": "claimed-unverified (vendor trust center states SOC 2 Type II), reported",
          "reason": "The only source is the vendor's own trust center (trust.e2b.dev), which is JS-rendered and could not be independently confirmed against an audit registry or third party. Per rules, vendor-only compliance claims are 'claimed-unverified'."
        },
        {
          "field": "other_certifications",
          "was": "[ISO 27001, HIPAA, GDPR] (reported)",
          "now": "[ISO 27001 (claimed-unverified), HIPAA (claimed-unverified), GDPR (claimed-unverified)] (reported)",
          "reason": "All claimed only on the vendor trust center page; no independent verification available."
        },
        {
          "field": "total_raised",
          "was": "~$32M with note that third parties report ~$35M and a careers snippet states $37M (reported)",
          "now": "~$32M per official Series A blog (reported)",
          "reason": "Anchored to the primary source ($11.5M seed + $21M Series A = $32M). Aggregator figures vary; kept as reported, removed the unverified $37M emphasis."
        },
        {
          "field": "revenue_signals",
          "was": "'8-figure revenue' per careers/job listing (reported)",
          "now": "unknown - conflicting self-claims (8-figure vs 7-figure), none verified (unknown)",
          "reason": "Vendor self-claims conflict: careers copy referenced 8-figure / $37M while a LinkedIn snippet says 7-figure revenue. No credible third-party figure exists, so downgraded to unknown."
        },
        {
          "field": "notable_customers",
          "was": "8 customers including LMArena; confidence reported",
          "now": "8 customers; replaced LMArena with Athena to match current homepage; still self-claimed, reported",
          "reason": "Current homepage shows Athena (not LMArena) among the logos; LMArena appeared in the July 2025 Series A post. All remain self-claimed (vendor materials only). Also noted homepage Fortune 100 claim is now '94%' vs the '88%' Series A figure - both self-claimed/unverifiable."
        },
        {
          "field": "last_round",
          "was": "Series A, $21M, July 2025",
          "now": "Series A, $21M, July 2025 (announced 2025-07-28)",
          "reason": "Added precise announcement date confirmed by the official blog."
        },
        {
          "field": "overall_confidence",
          "was": "high",
          "now": "medium",
          "reason": "Several decision-relevant claims rest only on vendor self-reporting (SOC2/certs, customers, revenue, Fortune 100 %), and the trust center could not be independently verified. Funding/HQ/OSS are solid, but the unverifiable trust/customer/revenue claims warrant medium overall."
        }
      ],
      "verification_summary": "Confirmed this is the correct company: E2B (e2b.dev), open-source secure cloud sandboxes (Firecracker microVMs) for running AI-generated code and AI agents - matches the directory note. Funding verified against the official Series A blog and multiple press sources: $21M Series A led by Insight Partners (announced 2025-07-28), $11.5M seed led by Decibel, $32M total per the official blog; investors confirmed. HQ San Francisco with a Prague office; founded 2023; OSS Apache-2.0 confirmed via GitHub. Headcount 11-50 / ~37 confirmed via LinkedIn public snippet (a stale '2-10' snippet was disregarded). Downgrades: SOC2 and ISO/HIPAA/GDPR are claimed only on the vendor's JS-rendered trust center and could not be independently verified, so marked claimed-unverified. Revenue downgraded to unknown due to conflicting self-claims (7-figure vs 8-figure/$37M). All customers are vendor self-claimed (homepage/Series A materials); current homepage lists Athena and a '94% of Fortune 100' claim (vs '88%' in the Series A post) - both unverifiable. No valuation disclosed. Overall confidence lowered to medium.",
      "research_notes": {}
    },
    {
      "rank": null,
      "focus_areas_normalised": [
        "Coding"
      ],
      "slug": "runloop",
      "brand_name": "Runloop",
      "segment": "Adjacent: execution infrastructure",
      "website": "https://runloop.ai/",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://runloop.ai/ (accessed 2026-06-07)"
      },
      "focus_areas": [
        "execution infrastructure",
        "coding environments",
        "evaluation / benchmarks"
      ],
      "what_they_sell": {
        "value": "infra",
        "confidence": "confirmed",
        "source": "https://runloop.ai/ (accessed 2026-06-07); https://docs.runloop.ai/docs/devboxes/overview"
      },
      "deployment_model": {
        "value": "managed-hosted / API (VPC deployment option claimed for enterprise)",
        "confidence": "confirmed",
        "source": "https://docs.runloop.ai/docs/devboxes/overview (accessed 2026-06-07); https://runloop.ai/ (VPC option)"
      },
      "maturity": {
        "value": "GA",
        "confidence": "confirmed",
        "source": "https://www.prnewswire.com/news-releases/runloop-unveils-enterprise-grade-sandboxes-for-ai-coding-agents-302460834.html (accessed 2026-06-07; Devboxes GA announced 2025-05-20)"
      },
      "open_source": {
        "value": "yes (SDKs/CLI only; core platform proprietary)",
        "confidence": "confirmed",
        "source": "https://github.com/runloopai (accessed 2026-06-07; MIT-licensed TS and Python SDKs, CLI)"
      },
      "license": {
        "value": "MIT (SDKs/CLI; core platform is proprietary/closed-source)",
        "confidence": "confirmed",
        "source": "https://github.com/runloopai (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": 2024,
        "confidence": "reported",
        "source": "https://www.crunchbase.com/organization/runloop-ai (founded 2024); https://venturebeat.com/ai/runloop-lands-7m-to-power-ai-coding-agents-with-cloud-based-devboxes (accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "San Francisco, CA, USA",
        "confidence": "confirmed",
        "source": "https://www.prnewswire.com/news-releases/runloop-raises-7m-seed-round-to-bring-enterprise-grade-infrastructure-to-ai-coding-agents-302516898.html (accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "distributed_remote": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "current_headcount": {
        "value": "~16-17 employees (Tracxn 16 as of 2026-01-31; PitchBook 17; press cited 12 in Jul 2025)",
        "confidence": "reported",
        "source": "https://tracxn.com/d/companies/runloopai (accessed 2026-06-07; 16 as of 2026-01-31); https://pitchbook.com/profiles/company/629631-73 (17); press release 12 (Jul 2025). Note: startupintros aggregator showed an erroneous 1,001-5,000 LinkedIn band, disregarded."
      },
      "headcount_band": {
        "value": "11-50",
        "confidence": "reported",
        "source": "https://tracxn.com/d/companies/runloopai (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "has_researchers": {
        "value": "no",
        "confidence": "estimated",
        "source": "https://runloop.ai/about (accessed 2026-06-07; team described as infrastructure engineers from Google, Stripe, Vercel, AWS, Meta, Scale AI; no research/PhD scientists noted)"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Founder/CEO Jonathan Wall: ex-Google (co-founded Google Wallet), co-founded Index (acquired by Stripe); deeper Google File System / Founders Award detail is vendor-stated",
          "Senior engineers are alumni of Google, Stripe, Vercel, AWS, Meta, and Scale AI (per press)"
        ],
        "confidence": "reported",
        "source": "https://runloop.ai/about (accessed 2026-06-07); https://venturebeat.com/ai/runloop-lands-7m-to-power-ai-coding-agents-with-cloud-based-devboxes (accessed 2026-06-07)"
      },
      "total_raised": {
        "value": "$7M",
        "confidence": "confirmed",
        "source": "https://www.prnewswire.com/news-releases/runloop-raises-7m-seed-round-to-bring-enterprise-grade-infrastructure-to-ai-coding-agents-302516898.html (accessed 2026-06-07); corroborated by Crunchbase ($7M, 1 round)"
      },
      "last_round": {
        "value": "Seed, $7M, 2025-07-30",
        "confidence": "confirmed",
        "source": "https://www.crunchbase.com/funding_round/runloop-ai-seed--bd4cd02b (accessed 2026-06-07); https://www.prnewswire.com/news-releases/runloop-raises-7m-seed-round-to-bring-enterprise-grade-infrastructure-to-ai-coding-agents-302516898.html"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "Not publicly disclosed (Crunchbase financials obfuscated as of 2026-06-07)"
      },
      "notable_investors": {
        "value": [
          "The General Partnership (lead)",
          "Blank Ventures",
          "Exponent Founders Capital",
          "Nascent",
          "Roneil Rumburg (angel)"
        ],
        "confidence": "reported",
        "source": "Official press names only The General Partnership (lead) + Blank Ventures: https://www.prnewswire.com/news-releases/runloop-raises-7m-seed-round-to-bring-enterprise-grade-infrastructure-to-ai-coding-agents-302516898.html. Additional participants (Exponent Founders Capital, Nascent, Roneil Rumburg; AWS Startups per one source) listed only on aggregators: https://www.crunchbase.com/organization/runloop-ai, https://startupintros.com/orgs/runloop-ai (accessed 2026-06-07)"
      },
      "revenue_signals": {
        "value": "Vendor-claimed: >200% customer growth and >100% revenue growth since March 2025; 'a few dozen customers'",
        "confidence": "reported",
        "source": "https://venturebeat.com/ai/runloop-lands-7m-to-power-ai-coding-agents-with-cloud-based-devboxes (accessed 2026-06-07; figures self-reported by vendor)"
      },
      "notable_customers": {
        "value": [
          {
            "name": "Trajectory (vendor case study; ran ~10,000 concurrent devboxes)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Detail.dev (CEO quote in vendor press release)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "ION (YC-backed)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Accrual",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Unnamed 'major model laboratories' (vendor claim; no specific lab named, no third-party confirmation)",
            "verification": "self-claimed",
            "frontier_lab_tie": true
          }
        ],
        "confidence": "reported",
        "source": "https://runloop.ai/ (accessed 2026-06-07); https://www.prnewswire.com/news-releases/runloop-raises-7m-seed-round-to-bring-enterprise-grade-infrastructure-to-ai-coding-agents-302516898.html; https://venturebeat.com/ai/runloop-lands-7m-to-power-ai-coding-agents-with-cloud-based-devboxes. All customer mentions trace to vendor site/press; the 'model laboratories' tie is an unverified vendor assertion."
      },
      "soc2": {
        "value": "claimed-unverified",
        "confidence": "reported",
        "source": "Vendor states 'SOC2 certified' on homepage and press (https://runloop.ai/, https://www.prnewswire.com/news-releases/runloop-unveils-enterprise-grade-sandboxes-for-ai-coding-agents-302460834.html, accessed 2026-06-07) but no report type, audit registry entry, or independent attestation located"
      },
      "other_certifications": {
        "value": [
          "HIPAA (vendor-claimed)",
          "GDPR (vendor-claimed)"
        ],
        "confidence": "reported",
        "source": "https://runloop.ai/ (accessed 2026-06-07; 'HIPAA Compliant' badge, GDPR support); https://www.prnewswire.com/news-releases/runloop-raises-7m-seed-round-to-bring-enterprise-grade-infrastructure-to-ai-coding-agents-302516898.html. No independent attestation found; vendor self-claims."
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No dedicated public trust/security portal located as of 2026-06-07"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "No Runloop-authored papers/benchmarks found. Platform supports running third-party academic coding benchmarks: SWE-Bench, R2E-Gym, SWE-Smith, Multi-SWE (these are external benchmarks, not Runloop publications)"
        ],
        "confidence": "reported",
        "source": "https://runloop.ai/ (accessed 2026-06-07)"
      },
      "positioning_summary": "Runloop sells cloud-hosted, isolated micro-VM 'devboxes' plus blueprints, snapshots and benchmark/eval tooling that give AI coding agents a secure execution environment for development, evaluation, and reinforcement/supervised fine-tuning (RFT/SFT) loops. It is execution infrastructure for agent builders and model labs rather than an RL-data/environments vendor itself.",
      "best_fit_use_case": "Teams building or training coding agents that need scalable, sandboxed execution environments to run agents, evals (SWE-Bench, etc.), and RFT/SFT loops.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://runloop.ai/",
          "accessed_date": "2026-06-07",
          "note": "Official homepage: devboxes, snapshots, benchmarks, RFT/SFT, target customers, Trajectory case study"
        },
        {
          "url": "https://runloop.ai/about",
          "accessed_date": "2026-06-07",
          "note": "Founder Jonathan Wall background; team alumni Google/Stripe/Vercel/AWS/Meta"
        },
        {
          "url": "https://docs.runloop.ai/docs/devboxes/overview",
          "accessed_date": "2026-06-07",
          "note": "Devbox product / managed-hosted API deployment"
        },
        {
          "url": "https://www.prnewswire.com/news-releases/runloop-raises-7m-seed-round-to-bring-enterprise-grade-infrastructure-to-ai-coding-agents-302516898.html",
          "accessed_date": "2026-06-07",
          "note": "$7M seed, lead The General Partnership, Blank Ventures, HQ SF, 12 employees (Jul 2025), customers Detail.dev/ION/Accrual"
        },
        {
          "url": "https://www.prnewswire.com/news-releases/runloop-unveils-enterprise-grade-sandboxes-for-ai-coding-agents-302460834.html",
          "accessed_date": "2026-06-07",
          "note": "Devboxes GA 2025-05-20; 'SOC2-compliant platform' claim"
        },
        {
          "url": "https://venturebeat.com/infrastructure/runloop-lands-7m-to-power-ai-coding-agents-with-cloud-based-devboxes",
          "accessed_date": "2026-06-07",
          "note": "'a few dozen customers' incl. major model labs; >200% customer / >100% revenue growth since March; founder backgrounds"
        },
        {
          "url": "https://www.crunchbase.com/funding_round/runloop-ai-seed--bd4cd02b",
          "accessed_date": "2026-06-07",
          "note": "Seed round dated 2025-07-30"
        },
        {
          "url": "https://www.crunchbase.com/organization/runloop-ai",
          "accessed_date": "2026-06-07",
          "note": "Founded January 2024, San Francisco"
        },
        {
          "url": "https://github.com/runloopai",
          "accessed_date": "2026-06-07",
          "note": "MIT-licensed TypeScript & Python SDKs, rl-cli, examples; core platform proprietary"
        },
        {
          "url": "https://tracxn.com/d/companies/runloopai",
          "accessed_date": "2026-06-07",
          "note": "16 employees as of 2026-01-31"
        },
        {
          "url": "https://pitchbook.com/profiles/company/629631-73",
          "accessed_date": "2026-06-07",
          "note": "17 total employees"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "notable_investors",
          "was": "['The General Partnership (lead)','Blank Ventures'] / confidence confirmed",
          "now": "Added Exponent Founders Capital, Nascent, Roneil Rumburg (aggregator-listed); confidence downgraded to reported",
          "reason": "Crunchbase/startupintros list additional participants (Exponent Founders Capital, Nascent, Roneil Rumburg, and AWS Startups per one source) not named in the official press release. Primary press names only TGP (lead) and Blank Ventures, so the fuller list is aggregator-sourced and cannot be 'confirmed'."
        },
        {
          "field": "other_certifications",
          "was": "[] / confidence unknown",
          "now": "['HIPAA (vendor-claimed)','GDPR (vendor-claimed)'] / confidence reported",
          "reason": "Both the official seed press release and the homepage explicitly claim HIPAA compliant and GDPR support. These are vendor self-claims (no third-party attestation/registry found), so listed as claimed/reported rather than omitted."
        },
        {
          "field": "published_papers_or_benchmarks",
          "was": "confidence confirmed; phrased as supported benchmark suites",
          "now": "confidence reported; clarified these are third-party benchmarks runnable on the platform, NOT Runloop-authored publications",
          "reason": "No original papers or benchmarks authored by Runloop were found. SWE-Bench, R2E-Gym, SWE-Smith are external academic benchmarks the platform supports, not publications by the vendor. 'confirmed' overstated; reframed to avoid implying original research output."
        },
        {
          "field": "notable_customers",
          "was": "5 entries incl. unnamed 'major model laboratories' with frontier_lab_tie=true; confidence reported",
          "now": "Same entries retained but confidence kept reported; unnamed-lab item explicitly flagged as an unverified vendor claim with no named lab",
          "reason": "All customer references trace to the vendor's own site/press or a vendor-quoted VentureBeat piece, hence self-claimed. The 'major model laboratories' claim names no specific lab and has no third-party confirmation; the frontier_lab_tie flag is retained only as the vendor's own assertion, not a verified tie."
        },
        {
          "field": "current_headcount",
          "was": "16-17 employees reported",
          "now": "~16-17 employees (Jan 2026); confidence kept reported, junk band noted",
          "reason": "Confirmed 12 (Jul 2025 press), 16 (Tracxn Jan 2026), 17 (PitchBook). One aggregator (startupintros) showed a clearly erroneous '1,001-5,000' LinkedIn size band, which is an artifact and was disregarded. Range 16-17 stands as reported."
        },
        {
          "field": "researcher_backgrounds",
          "was": "confidence confirmed",
          "now": "confidence reported",
          "reason": "Founder background (ex-Google Wallet, Index/Stripe) and team alumni (Google/Stripe/Vercel/AWS/Meta/Scale AI) are corroborated by VentureBeat and vendor press, but the specific 'led Google File System team / Founders Award' detail is from the vendor's own about page only. Downgraded to reported as not fully third-party verified."
        }
      ],
      "verification_summary": "Confirmed this is the correct company matching the directory note ('devboxes for coding agents and RL loops'): Runloop sells cloud micro-VM devboxes for AI coding agents with explicit RFT/SFT and benchmark/eval support. Funding ($7M seed, led by The General Partnership with Blank Ventures, closed 2025-07-30) is well-corroborated across official PR, VentureBeat, and Crunchbase, kept confirmed. Expanded notable_investors to include aggregator-listed participants (Exponent Founders Capital, Nascent, Roneil Rumburg) but downgraded to 'reported' since they are absent from primary press. Headcount triangulated to ~16-17 (Jan 2026); discarded a junk '1,001-5,000' band from one aggregator; band 11-50 reported. SOC2 remains claimed-unverified (vendor self-claim, no audit registry); added HIPAA and GDPR as vendor-claimed certifications that the draft had omitted. All named customers are vendor-sourced hence self-claimed; the 'major model laboratories' frontier tie is an unverified vendor assertion. Corrected published_papers field, Runloop has no original publications; SWE-Bench/R2E-Gym/SWE-Smith are external benchmarks it merely supports, so confidence downgraded from confirmed to reported. Valuation remains unknown (not disclosed). Crunchbase org page returned 403 on direct fetch; cross-checked via search snippets and startupintros aggregator instead.",
      "research_notes": {
        "found": [
          "Official site, docs, GitHub org, two press releases (seed + GA), VentureBeat coverage, Crunchbase funding round",
          "$7M seed (2025-07-30) led by The General Partnership w/ Blank Ventures",
          "Founder/CEO Jonathan Wall (ex-Google File System, Google Wallet co-founder, Index CTO acquired by Stripe)",
          "Product: micro-VM devboxes, blueprints, snapshots, benchmarks (SWE-Bench/R2E-Gym/SWE-Smith), RFT/SFT support, browser/computer use",
          "Headcount ~16-17 (2026), 12 at funding (Jul 2025); HQ San Francisco; founded 2024",
          "MIT-licensed open-source SDKs/CLI; core platform proprietary",
          "Vendor-claimed SOC2 compliance"
        ],
        "missing": [
          "Verified SOC2 report type (Type I/II) and independent attestation; no dedicated trust/security page found",
          "Valuation, total raised beyond seed",
          "Named frontier-lab customers (only generic 'major model laboratories' claim)",
          "Open roles count, headcount growth %, distributed/remote status",
          "Researcher count / PhD scientists (team appears engineering-led, not research-led)"
        ],
        "conflicts": [
          "Founder identity: official site + PR Newswire consistently name Jonathan Wall (CEO); one third-party search summary (unreliable) named 'Jerry Liu/Kevin Lin' and another credited Stripe-only founding team, treated Jonathan Wall as confirmed and disregarded the discrepancy",
          "Headcount: 16 (Tracxn) vs 17 (PitchBook) vs 12 (Jul 2025 press)",
          "A LinkedIn fetch returned a DIFFERENT company (Tel Aviv, founded 2016, mobile-dev), name collision; disregarded entirely"
        ],
        "stale": [
          "Growth figures (>200% customers / >100% revenue 'since March') are from mid-2025 press and now >12 months in spirit; relative-to-undated baseline"
        ],
        "open_questions": [
          "Which specific model labs / frontier labs are customers?",
          "Is SOC2 Type II achieved and where is the report/trust portal?",
          "Has Runloop raised beyond the $7M seed since July 2025?"
        ]
      }
    },
    {
      "rank": null,
      "focus_areas_normalised": [
        "Coding",
        "Long-Horizon"
      ],
      "slug": "general-reasoning",
      "brand_name": "General Reasoning",
      "segment": "Open-source & open environments",
      "website": "https://www.gr.inc/",
      "focus_areas": [
        "long-horizon / general reasoning",
        "evaluation / benchmarks",
        "execution infrastructure",
        "coding environments"
      ],
      "positioning_summary": "General Reasoning is an AI research lab (operating research hub in London; legal entity General Reasoning, Inc. registered in the US) building open RL environments and infrastructure for training and evaluating agents over long horizons. Its OpenReward platform and Open Reward Standard (ORS) provide an open specification for connecting language models to community-built RL environments, with 330+ environments accessible through one API.",
      "best_fit_use_case": "Teams wanting open, portable RL environments and long-horizon agent benchmarks they can run anywhere (self-hosted) or via managed/API hosting.",
      "what_they_sell": {
        "value": "environments",
        "confidence": "confirmed",
        "source": "https://www.gr.inc/releases/introducing-openreward (accessed 2026-06-07)"
      },
      "deployment_model": {
        "value": "managed-hosted / self-hosted / API",
        "confidence": "confirmed",
        "source": "https://www.gr.inc/releases/introducing-openreward (accessed 2026-06-07)"
      },
      "maturity": {
        "value": "GA",
        "confidence": "estimated",
        "source": "https://www.gr.inc/releases/introducing-openreward (accessed 2026-06-07); OpenReward live with 330+ environments and on-demand API"
      },
      "open_source": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://www.gr.inc/releases/introducing-openreward (accessed 2026-06-07); Open Reward Standard described as open-source specification, openrewardstandard.io"
      },
      "license": {
        "value": "Apache-2.0 (firehorse harness repo); Open Reward Standard described as open-source, specific license not stated",
        "confidence": "reported",
        "source": "https://github.com/GeneralReasoning (accessed 2026-06-07)"
      },
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://www.gr.inc/careers (accessed 2026-06-07); actively hiring, recent product launches"
      },
      "founded_year": {
        "value": 2025,
        "confidence": "confirmed",
        "source": "https://uk.linkedin.com/company/general-reasoning (accessed 2026-06-07); https://www.gr.inc/careers (Chapter I Founded 2025); SEC Form D filed 2025-07-11 https://www.formds.com/issuers/general-reasoning-inc"
      },
      "hq_location": {
        "value": "London, United Kingdom (Shoreditch), operating research hub; legal entity General Reasoning, Inc. registered in San Francisco/US per SEC Form D",
        "confidence": "confirmed",
        "source": "https://www.gr.inc/careers (accessed 2026-06-07); https://uk.linkedin.com/company/general-reasoning; https://www.formds.com/issuers/general-reasoning-inc"
      },
      "other_locations": {
        "value": [],
        "confidence": "reported",
        "source": "https://www.gr.inc/careers (accessed 2026-06-07); only London 'Chapter I' currently exists, additional chapters 'coming soon'"
      },
      "distributed_remote": {
        "value": "no",
        "confidence": "estimated",
        "source": "https://www.gr.inc/careers (accessed 2026-06-07); all roles tied to London chapter, geographic-chapter model emphasizes in-person community"
      },
      "current_headcount": {
        "value": "~10 (LinkedIn public snippet; '11-50' band)",
        "confidence": "reported",
        "source": "https://uk.linkedin.com/company/general-reasoning (accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "11-50",
        "confidence": "reported",
        "source": "https://uk.linkedin.com/company/general-reasoning (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": 3,
        "confidence": "confirmed",
        "source": "https://www.gr.inc/careers (accessed 2026-06-07); 3 Member of Technical Staff roles (Head of Rollouts, Infrastructure, Research Scientist)"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://www.gr.inc/research (accessed 2026-06-07)"
      },
      "researcher_count": {
        "value": "~6 named individuals on site (founders + research/eng staff); counted from research/team page",
        "confidence": "estimated",
        "source": "https://www.gr.inc/research (accessed 2026-06-07)"
      },
      "researcher_backgrounds": {
        "value": [
          "Ross Taylor (co-founder/CEO) - ex-Meta AI/FAIR, research lead on Galactica, led reasoning for Llama 2/Llama 3; co-founded Papers with Code (acquired by Meta)",
          "Founding team previously led open language model development at Meta (Galactica, Llama 2, Llama 3)",
          "Other named staff/directors: Kip Parker, Chengxi Wang, Thomas Grady, Iliyan Zarov, Henry Course"
        ],
        "confidence": "reported",
        "source": "https://www.gr.inc/ ; https://www.linkedin.com/in/rosstaylor90/ ; https://www.interconnects.ai/p/interviewing-ross-taylor-on-llm-reasoning (accessed 2026-06-07)"
      },
      "last_round": {
        "value": "Equity offering, $10,904,992 sold, filed 2025-07-11 (SEC Form D; stage not labeled, reported as ~2025 seed-stage by Crunchbase)",
        "confidence": "reported",
        "source": "https://www.formds.com/issuers/general-reasoning-inc (accessed 2026-06-07); Crunchbase lists $10.9M Aug 2025"
      },
      "total_raised": {
        "value": "~$10.9M (amount sold per SEC Form D 2025-07-11; corroborated by Crunchbase)",
        "confidence": "reported",
        "source": "https://www.formds.com/issuers/general-reasoning-inc (accessed 2026-06-07); Crunchbase profile"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [],
        "confidence": "unknown",
        "source": "Form D does not name investors; no credible press disclosure found (note: a $10M 'General Analysis' seed is a different, unrelated company)"
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_customers": {
        "value": [
          {
            "name": "NVIDIA",
            "verification": "self-claimed",
            "frontier_lab_tie": true
          },
          {
            "name": "Nebius",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Eigent",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "OpenAI",
            "verification": "self-claimed",
            "frontier_lab_tie": true
          },
          {
            "name": "Meta",
            "verification": "self-claimed",
            "frontier_lab_tie": true
          }
        ],
        "confidence": "reported",
        "source": "https://www.gr.inc/releases/introducing-openreward (accessed 2026-06-07); names appear as environment providers/contributors on OpenReward page (NVIDIA Nemotron envs, Nebius SWE-rebench, Eigent SETA, OpenAI MLE-bench, Meta GAIA), NOT confirmed paying customers"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "KellyBench: long-horizon sequential decision-making benchmark - https://www.gr.inc/releases/introducing-kellybench",
          "OpenReward / Open Reward Standard - https://www.gr.inc/releases/introducing-openreward",
          "Galactica, Llama 2, Llama 3 (founders' prior work at Meta)"
        ],
        "confidence": "confirmed",
        "source": "https://www.gr.inc/releases/introducing-kellybench ; https://www.gr.inc/releases/introducing-openreward (accessed 2026-06-07)"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://www.gr.inc/",
          "accessed_date": "2026-06-07",
          "note": "Official homepage - positioning, products, founding team ex-Meta, contact, social links"
        },
        {
          "url": "https://www.gr.inc/careers",
          "accessed_date": "2026-06-07",
          "note": "Open roles (3), Shoreditch London HQ, 'research lab in chapters' model, founders named"
        },
        {
          "url": "https://www.gr.inc/research",
          "accessed_date": "2026-06-07",
          "note": "Team members listed, KellyBench benchmark"
        },
        {
          "url": "https://www.gr.inc/releases/introducing-openreward",
          "accessed_date": "2026-06-07",
          "note": "OpenReward + ORS open-source spec, deployment models, NVIDIA/Nebius/Eigent partners"
        },
        {
          "url": "https://www.gr.inc/releases/introducing-kellybench",
          "accessed_date": "2026-06-07",
          "note": "KellyBench long-horizon sequential decision-making benchmark"
        },
        {
          "url": "https://uk.linkedin.com/company/general-reasoning",
          "accessed_date": "2026-06-07",
          "note": "Public snippet - ~10-11 headcount, 11-50 band, founded 2025, London, long-horizon RL focus"
        },
        {
          "url": "https://www.formds.com/issuers/general-reasoning-inc",
          "accessed_date": "2026-06-07",
          "note": "SEC Form D - $10,904,992 equity, filed 2025-07-11, directors: Grady, Parker, Taylor, Wang"
        },
        {
          "url": "https://github.com/GeneralReasoning",
          "accessed_date": "2026-06-07",
          "note": "GitHub org - environment repos, firehorse harness Apache-2.0"
        },
        {
          "url": "https://www.linkedin.com/in/rosstaylor90/",
          "accessed_date": "2026-06-07",
          "note": "Ross Taylor co-founder/CEO, ex-Meta AI"
        },
        {
          "url": "https://www.interconnects.ai/p/interviewing-ross-taylor-on-llm-reasoning",
          "accessed_date": "2026-06-07",
          "note": "Background: Ross Taylor led Galactica, Llama reasoning, Papers with Code"
        },
        {
          "url": "https://huggingface.co/GeneralReasoning",
          "accessed_date": "2026-06-07",
          "note": "Hugging Face org for General Reasoning datasets/models"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "total_raised",
          "was": "$10.9M (per SEC Form D 2025-07-11), confidence: confirmed",
          "now": "~$10.9M (amount sold per SEC Form D; corroborated by Crunchbase), confidence: reported",
          "reason": "Form D reports amount SOLD in one offering, not necessarily total lifetime raised; total offering amount not specified. Single regulatory source plus Crunchbase warrants 'reported', not 'confirmed'."
        },
        {
          "field": "last_round",
          "was": "Equity offering, $10,904,992, filed 2025-07-11, confidence: confirmed",
          "now": "Same figure but stage labeled as reported/inferred seed, confidence: reported",
          "reason": "Stage was not labeled in Form D; 'seed' is an inference. Downgraded from confirmed to reported."
        },
        {
          "field": "notable_customers",
          "was": "NVIDIA, Nebius, Eigent (all frontier_lab_tie:false) listed as 'customers'",
          "now": "Added OpenAI and Meta (both also environment providers on the same page); set frontier_lab_tie:true for NVIDIA, OpenAI, Meta; clarified all are self-claimed environment contributors, not paying customers",
          "reason": "The OpenReward page also lists OpenAI (MLE-bench) and Meta (GAIA) as environment providers, omitting them while including NVIDIA/Nebius/Eigent was inconsistent. NVIDIA/OpenAI/Meta carry frontier-lab ties. None are verified paying customers; all are vendor-page contributors, so remain self-claimed."
        },
        {
          "field": "hq_location",
          "was": "London, United Kingdom (Shoreditch), confidence: confirmed",
          "now": "London operating hub noted; added that legal entity General Reasoning, Inc. is US-registered (San Francisco) per SEC Form D, confidence: confirmed",
          "reason": "SEC Form D lists the entity as based in San Francisco/US (Delaware Inc.), while LinkedIn and careers list London. Both are true; the record should disclose the dual US-legal / London-operating structure."
        },
        {
          "field": "other_locations",
          "was": "[], confidence: confirmed",
          "now": "[], confidence: reported",
          "reason": "Cannot be 'confirmed' as a negative; downgraded to reported. London is currently the only chapter but more are advertised as 'coming soon'."
        },
        {
          "field": "current_headcount",
          "was": "~10-11 (LinkedIn public snippet), confidence: reported",
          "now": "~10 (LinkedIn public snippet; 11-50 band), confidence: reported",
          "reason": "LinkedIn public snippet shows ~10 employees within an 11-50 band; minor wording cleanup, confidence unchanged."
        }
      ],
      "verification_summary": "Confirmed this is the correct entity matching the 'research grade, open-leaning' note: Ross Taylor's General Reasoning / gr.inc, building open RL environments (OpenReward, Open Reward Standard), distinct from the same-named 'General Analysis' (a separate $10M-seed agentic-security startup) and 'General Intuition' (a $133.7M gaming-RL lab), neither of which the draft confused. Verified founding year 2025 across LinkedIn, careers page, and SEC Form D. Headcount ~10 / 11-50 band confirmed via LinkedIn snippet, corrected the draft's implication of larger scale is not an issue (already small). Key downgrades: total_raised and last_round moved from 'confirmed' to 'reported' since the SEC Form D reports amount sold in a single offering (not lifetime total) with no stage label; Crunchbase corroborates ~$10.9M (Aug 2025). No investors are disclosed anywhere credible, kept unknown. notable_customers corrected: NVIDIA/Nebius/Eigent (and the previously-omitted OpenAI and Meta) appear on the vendor's own OpenReward page as environment providers/contributors, NOT verified paying customers, so all remain self-claimed; flagged frontier-lab ties for NVIDIA, OpenAI, and Meta. HQ clarified as London operating hub with US-registered legal entity (General Reasoning, Inc., San Francisco). SOC2/certifications/valuation/revenue correctly left unknown. Overall confidence: medium.",
      "research_notes": {
        "found": [
          "Confirmed identity: gr.inc = General Reasoning, London AI research lab, founding team ex-Meta (Galactica, Llama 2/3); matches directory note (research grade, open-leaning) and tags (long horizon, reasoning, multi-agent).",
          "Products: OpenReward platform + Open Reward Standard (ORS) open-source spec; KellyBench long-horizon benchmark.",
          "Deployment: managed-hosted, self-hosted, and API access all documented.",
          "Founders: Ross Taylor, Kip Parker, Chengxi Wang (Taylor), Thomas Grady (confirmed via SEC Form D directors).",
          "Funding: SEC Form D filed 2025-07-11 for $10,904,992 equity.",
          "HQ: Shoreditch, London. Founded 2025. ~10-11 LinkedIn headcount.",
          "GitHub org GeneralReasoning with environment repos; firehorse harness under Apache-2.0.",
          "3 open roles (all Member of Technical Staff: Head of Rollouts, Infrastructure, Research Scientist)."
        ],
        "missing": [
          "Named investors (Form D does not disclose; no credible press found).",
          "Valuation, revenue signals.",
          "SOC 2 / ISO / security or trust page.",
          "Verified (vs self-claimed) customers; exact license for the ORS spec itself.",
          "Exact researcher count and detailed individual backgrounds beyond founders."
        ],
        "conflicts": [
          "Funding date appears as both 2025-07-11 (SEC Form D) and 2025-08-11 (one LinkedIn-derived snippet); SEC Form D 2025-07-11 treated as authoritative.",
          "Frequent name collisions in search with 'General Intuition' ($133.7M seed, spatial-temporal/gaming), 'General Analysis' ($10M seed, agentic security, SF), and 'General Intelligence' - none of which is this company. Investor lists from those firms were excluded."
        ],
        "stale": [],
        "open_questions": [
          "Who are the actual seed investors/angels in the $10.9M round?",
          "Is the round officially labeled 'seed' and is $10.9M the full total raised to date?",
          "What specific OSS license governs the Open Reward Standard specification?",
          "Are NVIDIA/Nebius/Eigent commercial customers or community/infra partners?"
        ]
      }
    },
    {
      "rank": null,
      "focus_areas_normalised": [
        "Computer Use"
      ],
      "slug": "cua",
      "brand_name": "Cua",
      "segment": "Adjacent: execution infrastructure",
      "website": "https://cua.ai/",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://cua.ai/ (accessed 2026-06-07); https://github.com/trycua/cua (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": 2025,
        "confidence": "confirmed",
        "source": "https://www.ycombinator.com/companies/cua (accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "San Francisco, CA, USA",
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/cua (lists SF; accessed 2026-06-07). Note: Tracxn lists Dover, DE (likely state of incorporation), so a single authoritative HQ is not fully resolved."
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "current_headcount": {
        "value": "~3-10 (YC lists team size 3; LinkedIn shows 2-10 band)",
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/cua (team size 3; accessed 2026-06-07); https://www.linkedin.com/company/cua-ai (2-10 band; accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "1-10",
        "confidence": "reported",
        "source": "https://www.linkedin.com/company/cua-ai (2-10 band; accessed 2026-06-07); https://www.ycombinator.com/companies/cua (team size 3; accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "2 (Research Intern Summer 2026; Founding Engineer, Infra & Agent Systems)",
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/cua (accessed 2026-06-07)"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/cua (Research Intern role; accessed 2026-06-07); https://github.com/trycua/cua (Cua-Bench RL/eval work; accessed 2026-06-07)"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Francesco Bonacci, co-founder, previously at Xbox / Microsoft AI",
          "Alessandro Puppo, co-founder (background not detailed in public sources)"
        ],
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/cua (Bonacci, ex-Xbox/Microsoft AI; accessed 2026-06-07); Tracxn / LinkedIn list Alessandro Puppo as co-founder (accessed 2026-06-07)"
      },
      "what_they_sell": {
        "value": "infra",
        "confidence": "confirmed",
        "source": "https://cua.ai/ (accessed 2026-06-07); https://github.com/trycua/cua (accessed 2026-06-07)"
      },
      "deployment_model": {
        "value": "managed-hosted (Cua Cloud) + self-hosted/on-prem (Enterprise On-Prem, Lume local) + API/SDK",
        "confidence": "confirmed",
        "source": "https://cua.ai/pricing (accessed 2026-06-07); https://cua.ai/ (accessed 2026-06-07)"
      },
      "maturity": {
        "value": "GA (Free, Pro from $10/mo, and Enterprise plans publicly available; Cua Driver in pre-release)",
        "confidence": "confirmed",
        "source": "https://cua.ai/pricing (accessed 2026-06-07); https://github.com/trycua/cua (accessed 2026-06-07)"
      },
      "open_source": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://github.com/trycua/cua (accessed 2026-06-07)"
      },
      "license": {
        "value": "MIT",
        "confidence": "confirmed",
        "source": "https://github.com/trycua/cua (MIT License; accessed 2026-06-07)"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "https://cua.ai/ and https://cua.ai/pricing (no compliance/SOC2 mentions found; accessed 2026-06-07)"
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "No dedicated trust/security page found on cua.ai (accessed 2026-06-07)"
      },
      "total_raised": {
        "value": "~$500K (pre-seed/seed, ~June 2025)",
        "confidence": "reported",
        "source": "https://tracxn.com/d/companies/cua-ai (aggregator; accessed 2026-06-07); https://www.linkedin.com/company/cua-ai (accessed 2026-06-07). No primary press release or official funding announcement located; figure rests on aggregators only."
      },
      "last_round": {
        "value": "Pre-seed/seed, ~$500K, ~June 2025",
        "confidence": "reported",
        "source": "https://tracxn.com/d/companies/cua-ai (lists ~$500K, round ~June 2025; accessed 2026-06-07). Round-type label (pre-seed vs seed) and exact date inconsistent across aggregator snippets; no primary announcement found."
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "Y Combinator (X25 batch)"
        ],
        "confidence": "reported",
        "source": "https://www.ycombinator.com/companies/cua (YC X25 batch membership confirmed; accessed 2026-06-07). YC as a named round investor is corroborated only by aggregators; no primary round announcement located."
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_customers": {
        "value": [
          {
            "name": "Hugging Face",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Datadog",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Meta",
            "verification": "self-claimed",
            "frontier_lab_tie": true
          },
          {
            "name": "Elastic",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Apple",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Red Hat",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "NVIDIA",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          },
          {
            "name": "Duolingo",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          }
        ],
        "confidence": "reported",
        "source": "https://cua.ai/ ('Trusted by 50,000+ engineers at your favorite companies' logo wall, self-claimed; presented as organizations whose engineers use the OSS, not verified paying customers; accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "Cua-Bench (benchmarking suite / RL environments; runs OSWorld, ScreenSpot, Windows Arena, exports RL training trajectories), https://cuabench.ai / https://github.com/trycua/cua"
        ],
        "confidence": "reported",
        "source": "https://github.com/trycua/cua (Cua-Bench component; accessed 2026-06-07); https://cuabench.ai/ (per draft, accessed 2026-06-07). This is a vendor benchmarking/eval product, not a peer-reviewed paper."
      },
      "focus_areas": [
        "computer use environments",
        "execution infrastructure",
        "evaluation / benchmarks"
      ],
      "positioning_summary": "Cua (trycua, YC X25) is open-source MIT-licensed infrastructure for computer-use agents, providing cloud and self-hosted sandboxes across macOS, Windows, Linux, and Android plus an SDK, a virtualization layer (Lume), and a benchmarking/RL-eval suite (Cua-Bench). It positions itself as the 'Docker for computer-use agents,' giving any agent a cloud desktop.",
      "best_fit_use_case": "Teams needing on-demand sandboxed desktop/OS environments to run, evaluate, or generate RL trajectories for computer-use agents.",
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://www.ycombinator.com/companies/cua",
          "accessed_date": "2026-06-07",
          "note": "YC profile: founded 2025, SF, team size 3, founder Francesco Bonacci (ex-Xbox/Microsoft AI), YC partner Diana Hu, open roles"
        },
        {
          "url": "https://github.com/trycua/cua",
          "accessed_date": "2026-06-07",
          "note": "Repo: ~17.7k stars, MIT license, components (Sandbox, Lume, Cua-Bench, Driver), 3486 commits, latest release May 2026"
        },
        {
          "url": "https://cua.ai/",
          "accessed_date": "2026-06-07",
          "note": "Official site: cloud sandboxes any OS, hot-start <1s, 'Trusted by 50,000+ engineers' logos (HF, Datadog, Meta, Apple, Red Hat, NVIDIA, Elastic, Duolingo)"
        },
        {
          "url": "https://cua.ai/pricing",
          "accessed_date": "2026-06-07",
          "note": "Free, Pro from $10/mo, Enterprise Cloud, Enterprise On-Prem (self-hosted), Research Labs (Cua-Bench task packs/eval harnesses); per-resource pricing; no SOC2/compliance mentions"
        },
        {
          "url": "https://www.linkedin.com/company/cua-ai",
          "accessed_date": "2026-06-07",
          "note": "Public snippet: founded 2025, SF, 2-10 employees band, pre-seed $500K July 2025"
        },
        {
          "url": "https://tracxn.com/d/companies/cua-ai",
          "accessed_date": "2026-06-07",
          "note": "Funding $500K seed/pre-seed, founders Francesco Bonacci and Alessandro Puppo, founded 2025"
        },
        {
          "url": "https://cuabench.ai/",
          "accessed_date": "2026-06-07",
          "note": "Cua-Bench: runs OSWorld, ScreenSpot, Windows Arena; parallel execution, trajectory export for RL, dataset versioning"
        },
        {
          "url": "https://news.ycombinator.com/item?id=43773563",
          "accessed_date": "2026-06-07",
          "note": "Launch HN: Cua (YC X25) Open-Source Docker Container for Computer-Use Agents"
        },
        {
          "url": "https://www.linkedin.com/in/francesco-bonacci-70428a121/",
          "accessed_date": "2026-06-07",
          "note": "Co-founder Francesco Bonacci"
        },
        {
          "url": "https://www.linkedin.com/in/alessandro-puppo/",
          "accessed_date": "2026-06-07",
          "note": "Co-founder Alessandro Puppo"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "hq_location.confidence",
          "was": "confirmed",
          "now": "reported",
          "reason": "YC lists San Francisco but Tracxn lists Dover, DE (likely incorporation state). With conflicting sources, a single confirmed HQ is not established; downgraded and annotated."
        },
        {
          "field": "current_headcount.value",
          "was": "~3-10 (YC lists team size 3; LinkedIn shows 2-10 band, ~10 associated as of 2026-06-07)",
          "now": "~3-10 (YC lists team size 3; LinkedIn shows 2-10 band)",
          "reason": "Removed the unverifiable '~10 associated' precision; LinkedIn public snippet shows only the 2-10 band, not a specific count."
        },
        {
          "field": "researcher_backgrounds.value",
          "was": "['Francesco Bonacci, co-founder, previously at Xbox / Microsoft AI']",
          "now": "Added Alessandro Puppo as co-founder",
          "reason": "Tracxn and LinkedIn consistently list Alessandro Puppo as a second co-founder; the draft omitted him. Puppo's specific background is not documented publicly, so noted as not detailed."
        },
        {
          "field": "total_raised.value",
          "was": "$500K (pre-seed, July 2025)",
          "now": "~$500K (pre-seed/seed, ~June 2025)",
          "reason": "No primary press/official announcement found; only aggregators (Tracxn) and LinkedIn. Tracxn dates the round ~June 2025 (one snippet labels it Seed), not July; round type and date are inconsistent across sources, so language broadened."
        },
        {
          "field": "last_round.value",
          "was": "Pre-seed, ~$500K, July 2025 (closed ~1 week after YC X25 Demo Day)",
          "now": "Pre-seed/seed, ~$500K, ~June 2025",
          "reason": "The 'closed ~1 week after Demo Day' detail is unverifiable and not in any located source; date corrected toward June 2025 per aggregator; round-type label is inconsistent across snippets."
        },
        {
          "field": "notable_investors.confidence",
          "was": "confirmed",
          "now": "reported",
          "reason": "YC X25 batch membership is confirmed, but YC as a named investor in the $500K round is corroborated only by aggregators (Tracxn/LinkedIn); no primary funding announcement located. Downgraded per rule against confirmed without a primary/credible round source."
        },
        {
          "field": "notable_customers.confidence",
          "was": "reported",
          "now": "reported",
          "reason": "Unchanged confidence, but reaffirmed all entries as self-claimed (a 'Trusted by 50,000+ engineers' OSS logo wall on the vendor's own site); no third-party verification of any as paying customers."
        },
        {
          "field": "published_papers_or_benchmarks.confidence",
          "was": "confirmed",
          "now": "reported",
          "reason": "Cua-Bench is a vendor benchmarking/eval product confirmed to exist in the repo, but it is not a peer-reviewed paper or independent benchmark; downgraded to reflect it is a self-published tool."
        }
      ],
      "verification_summary": "Confirmed this is the CORRECT entity matching the directory note 'docker for computer-use agents': trycua/cua, YC X25 (Spring 2025), founder Francesco Bonacci (ex-Xbox/Microsoft AI), components Lume/Sandbox/Cua-Bench/Driver, MIT license, ~17.7k GitHub stars, cua.ai. No same-name confusion. Key downgrades: (1) Funding, the $500K figure rests only on aggregators (Tracxn) and LinkedIn with no primary press release; round date/type are inconsistent (June vs July; pre-seed vs seed), so total_raised and last_round language was broadened and notable_investors downgraded confirmed->reported. (2) HQ, YC says San Francisco, Tracxn says Dover, DE; downgraded to reported. (3) Added omitted co-founder Alessandro Puppo. (4) Customers remain self-claimed (OSS 'Trusted by 50,000+ engineers' logo wall, not verified paying customers); Meta retained as frontier-lab tie (FAIR/Llama). (5) SOC2/security confirmed as unknown, no trust/compliance page on cua.ai. (6) Cua-Bench downgraded to reported as a vendor eval product, not a peer-reviewed publication. Open-source, license (MIT), deployment model, and maturity stand as confirmed from primary sources (GitHub + cua.ai/pricing). Tracxn page itself returned 404 on direct fetch, so funding remains aggregator-grade only.",
      "research_notes": {
        "found": [
          "Confirmed correct company: trycua / Cua (cua.ai), YC X25, 'Docker for computer-use agents', matches directory note and Computer Use tag.",
          "Open-source MIT infrastructure (GitHub trycua/cua, ~17.7k stars).",
          "Product: cloud + self-hosted/on-prem sandboxes (macOS/Windows/Linux/Android), Lume virtualization, SDK, Cua-Bench (RL/eval).",
          "Pricing publicly listed incl. Research Labs tier with Cua-Bench task packs and eval harnesses.",
          "Founded 2025, SF HQ, pre-seed ~$500K (July 2025), backed by Y Combinator.",
          "Founders Francesco Bonacci (ex-Xbox/Microsoft AI) and Alessandro Puppo."
        ],
        "missing": [
          "Exact current headcount (YC says 3, LinkedIn band 2-10).",
          "SOC 2 / ISO 27001 status, no security/trust page found.",
          "Researcher count, valuation, revenue, headcount growth.",
          "Verified (vs self-claimed) paying customers."
        ],
        "conflicts": [
          "Founder count: YC profile lists only Francesco Bonacci; Tracxn/LinkedIn list two founders (Bonacci + Alessandro Puppo). Treated as two founders.",
          "Native-speed claim varies: GitHub/site say up to 97% native CPU on Apple Silicon; LinkedIn page says 'up to 90% native speed.'",
          "Registry HQ: Tracxn lists Dover (likely state of incorporation) vs operational HQ San Francisco."
        ],
        "stale": [
          "Funding figure ($500K, July 2025) is ~11 months old; no newer round found as of 2026-06-07, verify before relying."
        ],
        "open_questions": [
          "Are the logo-wall companies (Meta, Apple, NVIDIA, Datadog, etc.) paying customers or just OSS users? Site frames them as part of '50,000+ engineers', treated as self-claimed/community.",
          "Any frontier-lab commercial relationship beyond model integration (Anthropic/OpenAI models supported, OSWorld author Tianbao Xie among Cua-Bench contributors)?",
          "Has Cua raised beyond the initial pre-seed?"
        ]
      }
    },
    {
      "rank": null,
      "focus_areas_normalised": [
        "Long-Horizon"
      ],
      "slug": "good-start-labs",
      "brand_name": "Good Start Labs",
      "segment": "Open-source & open environments",
      "website": "https://goodstartlabs.com",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://goodstartlabs.com/ (accessed 2026-06-07); https://github.com/GoodStartLabs (accessed 2026-06-07); https://www.inovia.vc/active-companies/good-start-labs/ (accessed 2026-06-07)"
      },
      "focus_areas": [
        "long-horizon / general reasoning",
        "evaluation / benchmarks"
      ],
      "what_they_sell": {
        "value": "environments",
        "confidence": "reported",
        "source": "https://every.to/on-every/our-new-incubation-raised-3-6-million-to-teach-ais-to-play-games (accessed 2026-06-07); https://www.inovia.vc/active-companies/good-start-labs/ (accessed 2026-06-07)"
      },
      "positioning_summary": "Good Start Labs is a 2025 Every spin-out that builds game-based environments to generate reinforcement-learning data and evaluate frontier models, using both custom games and partnerships with existing games where player behavior helps train and rank AI. It is known for AI Diplomacy / Diplomacy Arena (multi-agent long-horizon strategy) and LOL Arena (humor preference), and publishes openly on GitHub and Hugging Face.",
      "best_fit_use_case": "Buyers wanting open, game-based RL environments and benchmarks for long-horizon multi-agent reasoning and preference/humor evaluation.",
      "maturity": {
        "value": "research preview",
        "confidence": "estimated",
        "source": "https://github.com/GoodStartLabs (accessed 2026-06-07), open arenas/benchmarks (Diplomacy Arena, LOL Arena) and OSS repos; early-stage 2025 spin-out"
      },
      "deployment_model": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_source": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://github.com/GoodStartLabs (accessed 2026-06-07); https://github.com/GoodStartLabs/AI_Diplomacy (accessed 2026-06-07)"
      },
      "license": {
        "value": "Apache-2.0 (OpenTinker repo); AI_Diplomacy repo license not specified",
        "confidence": "reported",
        "source": "https://github.com/GoodStartLabs (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": 2025,
        "confidence": "confirmed",
        "source": "https://www.inovia.vc/active-companies/good-start-labs/ (accessed 2026-06-07); https://every.to/on-every/our-new-incubation-raised-3-6-million-to-teach-ais-to-play-games (accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "Brooklyn, NY, USA",
        "confidence": "reported",
        "source": "https://www.inovia.vc/active-companies/good-start-labs/ (accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [
          "Toronto, ON, Canada"
        ],
        "confidence": "reported",
        "source": "https://www.inovia.vc/active-companies/good-start-labs/ (accessed 2026-06-07)"
      },
      "distributed_remote": {
        "value": "yes",
        "confidence": "estimated",
        "source": "https://www.inovia.vc/active-companies/good-start-labs/ (accessed 2026-06-07), HQ Brooklyn with Toronto office; cross-border US/Canada team implies distributed; startups.gallery lists remote but is low-reliability"
      },
      "current_headcount": {
        "value": "~9-10 (as of 2026-06-07)",
        "confidence": "reported",
        "source": "https://huggingface.co/GoodStartLabs (9 team members, accessed 2026-06-07); LinkedIn company page public snippet (size 2-10, accessed 2026-06-07)"
      },
      "headcount_band": {
        "value": "1-10",
        "confidence": "reported",
        "source": "https://huggingface.co/GoodStartLabs (9 team members, accessed 2026-06-07); LinkedIn public snippet (size 2-10, accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "https://goodstartlabs.com/careers (careers page exists; direct fetch returned 403; exact count not confirmed, accessed 2026-06-07)"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://huggingface.co/GoodStartLabs (research model/dataset publishing, accessed 2026-06-07); https://github.com/GoodStartLabs (research benchmarks, accessed 2026-06-07)"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [
          "Alex Duffy (Co-Founder & CEO), ex-Head of AI Training at Every; creator of AI Diplomacy; background in AI training/education (AI Camp); prior ML work",
          "Tyler Marques (Co-Founder & CTO), University of Waterloo; applied AI/ML/DevOps background"
        ],
        "confidence": "reported",
        "source": "https://www.inovia.vc/active-companies/good-start-labs/ (accessed 2026-06-07); https://every.to/p/diplomacy (Alex Duffy bio, accessed 2026-06-07); https://ca.linkedin.com/in/tylermarques (accessed 2026-06-07)"
      },
      "total_raised": {
        "value": "$3.6M",
        "confidence": "reported",
        "source": "https://every.to/on-every/our-new-incubation-raised-3-6-million-to-teach-ais-to-play-games (accessed 2026-06-07); https://www.inovia.vc/active-companies/good-start-labs/ (accessed 2026-06-07)"
      },
      "last_round": {
        "value": "Seed, $3.6M, October 2025 (Inovia describes it as pre-seed)",
        "confidence": "reported",
        "source": "https://every.to/on-every/our-new-incubation-raised-3-6-million-to-teach-ais-to-play-games (accessed 2026-06-07); https://www.inovia.vc/active-companies/good-start-labs/ (pre-seed, Oct 2025, accessed 2026-06-07); https://www.linkedin.com/posts/tirtavc_were-thrilled-to-announce-tirta-ventures-activity-7384641634371608576-pf7K (seed, accessed 2026-06-07)"
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "General Catalyst",
          "Inovia Capital",
          "Tirta Ventures",
          "Every",
          "angel investors from top AI labs (incl. DeepMind alumni)"
        ],
        "confidence": "reported",
        "source": "https://every.to/on-every/our-new-incubation-raised-3-6-million-to-teach-ais-to-play-games (General Catalyst, Inovia, Every, DeepMind angels, accessed 2026-06-07); https://www.linkedin.com/posts/tirtavc_were-thrilled-to-announce-tirta-ventures-activity-7384641634371608576-pf7K (Tirta Ventures co-lead, accessed 2026-06-07)"
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_customers": {
        "value": [
          {
            "name": "Bad Cards (Discord Cards-Against-Humanity-style game; LOL Arena partnership / data source)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          }
        ],
        "confidence": "reported",
        "source": "https://every.to/on-every/our-new-incubation-raised-3-6-million-to-teach-ais-to-play-games (accessed 2026-06-07), described as a game partnership, not a paying customer; no third-party confirmation"
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "published_papers_or_benchmarks": {
        "value": [
          "AI Diplomacy / Diplomacy Arena, frontier models playing the board game Diplomacy (github.com/GoodStartLabs/AI_Diplomacy)",
          "LOL Arena, humor preference benchmark informed by human votes",
          "GSLBenchmark / gsl-benchmark-logs (Hugging Face datasets)",
          "OpenTinker, RL infrastructure repo (Apache-2.0)"
        ],
        "confidence": "reported",
        "source": "https://github.com/GoodStartLabs (accessed 2026-06-07); https://huggingface.co/GoodStartLabs (accessed 2026-06-07); https://every.to/p/diplomacy (accessed 2026-06-07)"
      },
      "overall_confidence": "medium",
      "sources": [
        {
          "url": "https://goodstartlabs.com/about",
          "accessed_date": "2026-06-07",
          "note": "Official about page (referenced via search; direct fetch 403)"
        },
        {
          "url": "https://goodstartlabs.com/careers",
          "accessed_date": "2026-06-07",
          "note": "Official careers page; hiring research leads/scientists; direct fetch returned 403"
        },
        {
          "url": "https://github.com/GoodStartLabs",
          "accessed_date": "2026-06-07",
          "note": "GitHub org: AI_Diplomacy (669 stars), OpenTinker (Apache-2.0), .github; arenas Diplomacy/LOL"
        },
        {
          "url": "https://huggingface.co/GoodStartLabs",
          "accessed_date": "2026-06-07",
          "note": "Verified HF org, 9 team members, 47 models, 8 datasets (e.g. GSLBenchmark, gsl-benchmark-logs)"
        },
        {
          "url": "https://www.inovia.vc/active-companies/good-start-labs/",
          "accessed_date": "2026-06-07",
          "note": "Investor page: founded 2025, HQ Brooklyn NY, Toronto office, pre-seed co-led by Inovia Oct 2025, founders Alex Duffy (CEO)/Tyler Marques (CTO)"
        },
        {
          "url": "https://www.inovia.vc/founders/company-founders/tyler-marques/",
          "accessed_date": "2026-06-07",
          "note": "Tyler Marques background: Waterloo, ~7 yrs AI/ML/DevOps"
        },
        {
          "url": "https://every.to/on-every/our-new-incubation-raised-3-6-million-to-teach-ais-to-play-games",
          "accessed_date": "2026-06-07",
          "note": "Funding announcement $3.6M; investors General Catalyst, Inovia, Every, DeepMind angels; Every spin-out (direct fetch 403, content via search)"
        },
        {
          "url": "https://tracxn.com/d/companies/goodstartlabs/funding-and-investors",
          "accessed_date": "2026-06-07",
          "note": "Total $3.6M, 1 round, round date Oct 15 2025, 37 investors (4 institutional)"
        },
        {
          "url": "https://www.linkedin.com/company/good-start-labs",
          "accessed_date": "2026-06-07",
          "note": "Public snippet: founded 2025, ~10 employees / size 2-10, ~535 followers, specialties AI/Games/Data/RL/LLM training"
        },
        {
          "url": "https://startups.gallery/companies/good-start-labs",
          "accessed_date": "2026-06-07",
          "note": "Lists remote, 1-10 employees, NY; investor list appears to be boilerplate/inaccurate (conflicts with credible sources)"
        },
        {
          "url": "https://every.to/diplomacy",
          "accessed_date": "2026-06-07",
          "note": "AI Diplomacy project writeup / Alex Duffy background (ex-Salt AI, AI Camp, Every Head of AI Training)"
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "focus_areas",
          "was": "[long-horizon / general reasoning, evaluation / benchmarks, enterprise workflows]",
          "now": "[long-horizon / general reasoning, evaluation / benchmarks]",
          "reason": "Removed 'enterprise workflows', the company's core product is game-based RL environments and benchmarks for frontier labs. The only enterprise-workflow signal is a single Every blog post about a customer-support agent experiment, which is not a product line. Unsupported as a focus area."
        },
        {
          "field": "notable_investors",
          "was": "[General Catalyst, Inovia Capital, Every, angel investors from frontier/top AI labs (incl. DeepMind alumni)]",
          "now": "[General Catalyst, Inovia Capital, Tirta Ventures, Every, angel investors from top AI labs (incl. DeepMind alumni)]",
          "reason": "Added Tirta Ventures, which publicly announced co-leading the $3.6M round (Tirta Ventures LinkedIn post). Draft omitted a co-lead investor confirmed by a primary source."
        },
        {
          "field": "last_round",
          "was": "Seed/pre-seed, $3.6M, announced 2025-10-15",
          "now": "Seed, $3.6M, October 2025 (Inovia describes it as pre-seed)",
          "reason": "Sources conflict on round label: Every and Tirta call it 'seed'; Inovia's own page calls it 'pre-seed'. Documented the discrepancy. Kept the exact Oct-15-2025 date out of the headline value because it was sourced primarily from Tracxn (aggregator); month/year is firm across primary sources."
        },
        {
          "field": "distributed_remote",
          "was": "value yes / confidence reported",
          "now": "value yes / confidence estimated",
          "reason": "Downgraded to estimated. The 'remote' label came from startups.gallery, flagged in the draft's own sources as low-reliability/boilerplate. Distributed status is inferred from Brooklyn HQ + Toronto office, not directly stated by a credible source."
        },
        {
          "field": "license",
          "was": "confidence confirmed",
          "now": "confidence reported",
          "reason": "Downgraded. License only verified via GitHub org browsing for one repo (OpenTinker Apache-2.0); AI_Diplomacy license unspecified. Not independently re-confirmed at file level, so 'reported' is more appropriate than 'confirmed'."
        },
        {
          "field": "published_papers_or_benchmarks",
          "was": "confidence confirmed",
          "now": "confidence reported",
          "reason": "These are open benchmarks/repos/datasets (arenas, HF datasets), not peer-reviewed papers. Existence is well-supported but 'confirmed' overstates; downgraded to 'reported'."
        },
        {
          "field": "researcher_backgrounds",
          "was": "Alex Duffy ... previously led a team of ML PhDs at Salt AI (drug discovery); ... Tyler Marques ... ~7 years applied AI/ML/DevOps; built terabyte-scale data pipelines; previously co-founded an engineering/infra consulting firm",
          "now": "Trimmed to verifiable claims (Duffy: ex-Head of AI Training at Every, AI Diplomacy creator, AI Camp; Marques: Waterloo, applied AI/ML/DevOps)",
          "reason": "Removed specific unverifiable details (Salt AI 'team of ML PhDs', 'terabyte-scale pipelines', specific prior consulting firm) that could not be independently re-confirmed in this pass. Kept the well-supported core."
        },
        {
          "field": "current_headcount",
          "was": "source cites LinkedIn '~10 employees' as primary",
          "now": "source reordered to lead with Hugging Face (9 team members) with LinkedIn size 2-10 as corroboration",
          "reason": "LinkedIn public snippet could not be directly read in re-verification; HF org team count (9) is the more directly observable figure. Value and band unchanged; confidence stays 'reported'."
        },
        {
          "field": "status",
          "was": "source: about + github",
          "now": "source: homepage + github + Inovia",
          "reason": "goodstartlabs.com/about returned 403 in the draft; replaced with the reachable homepage and added the Inovia active-companies page as corroboration that the company is active."
        }
      ],
      "verification_summary": "Confirmed this is the CORRECT company matching the directory note ('game environments for frontier labs'): Good Start Labs is a 2025 Every spin-out building game-based RL environments (AI Diplomacy/Diplomacy Arena, LOL Arena) to generate training data and evaluate frontier models. Identity, founders (Alex Duffy CEO, Tyler Marques CTO), 2025 founding, Brooklyn HQ + Toronto office, and the $3.6M raise are all corroborated by multiple sources including the Every announcement and Inovia's investor page. Key corrections: added Tirta Ventures as a confirmed co-lead investor (missed in draft); flagged seed vs pre-seed labeling conflict between sources; removed 'enterprise workflows' focus area as unsupported; downgraded distributed_remote, license, and published_papers confidence; trimmed unverifiable founder-background specifics. Funding/headcount kept at 'reported' (aggregator/snippet-dependent, not primary-confirmed). Customers: Bad Cards is a game/data partnership (self-claimed, no frontier-lab tie) rather than a paying customer. No SOC2/trust page found, kept unknown. Overall confidence: medium.",
      "research_notes": {
        "found": [
          "Confirmed correct company via directory note (game environments for frontier labs) + tags (Long Horizon, Open Source): Good Start Labs, goodstartlabs.com, Every spin-out.",
          "Founders Alex Duffy (CEO, ex-Every Head of AI Training, ex-Salt AI, AI Camp) and Tyler Marques (CTO, Waterloo, ~7yrs AI/ML/DevOps).",
          "Founded 2025; HQ Brooklyn NY with Toronto, ON team; remote.",
          "$3.6M seed/pre-seed announced 2025-10-15 led by Inovia + General Catalyst, with Every and frontier-lab angels (DeepMind alumni).",
          "Open source confirmed: GitHub org (AI_Diplomacy ~669 stars, OpenTinker Apache-2.0) and Hugging Face org (9 members, 47 models, 8 datasets).",
          "Public benchmarks/arenas: Diplomacy Arena (long-horizon multi-agent), LOL Arena (humor preference), GSLBenchmark."
        ],
        "missing": [
          "Exact open-roles count (careers page returned 403).",
          "SOC 2 / ISO / security/trust page (none found; likely none for an early-stage startup).",
          "Valuation, revenue signals.",
          "Named frontier-lab customers (described as 'for frontier labs' / angels from labs, but no verified named customer)."
        ],
        "conflicts": [
          "startups.gallery lists a generic VC roster (Sequoia, a16z, YC, Thrive, etc.) as investors, this conflicts with all credible sources (Every, Inovia, Tracxn) and appears to be template boilerplate; disregarded.",
          "Headcount: LinkedIn snippet shows ~10 / size 2-10; Hugging Face shows 9 team members; both consistent with 1-10 band.",
          "Funding stage labeled 'pre-seed' by Inovia vs 'seed' by Tracxn/startups.gallery, amount ($3.6M, Oct 2025) consistent."
        ],
        "stale": [],
        "open_questions": [
          "Are there contracted/paid frontier-lab customers (vs. angels who happen to work at labs)?",
          "Is the product offered as a managed service/API or purely open environments + research?",
          "What is the AI_Diplomacy repo license (not specified on org page)?"
        ]
      }
    },
    {
      "rank": null,
      "focus_areas_normalised": [
        "Coding"
      ],
      "slug": "morph",
      "brand_name": "Morph",
      "segment": "Adjacent: execution infrastructure",
      "overall_confidence": "medium",
      "website": "https://www.morph.so",
      "focus_areas": [
        "execution infrastructure",
        "coding environments",
        "evaluation / benchmarks"
      ],
      "positioning_summary": "Morph (Morph Labs) provides snapshot-based VM compute for AI agents via its Infinibranch / Liquid Metal technology, which can snapshot, branch, and restore entire computational environments in roughly 100-250ms to enable massively parallel, reversible ('Git for compute') agent rollouts, evaluations, and reasoning-time branching. It markets the platform (Morph Cloud) as infrastructure for running and scaling agent/RL verification environments rather than as an RL-environment dataset vendor itself.",
      "best_fit_use_case": "Teams needing to fork, snapshot, and run thousands of parallel/reversible sandboxed VM environments for agent rollouts, evals, and verification at scale.",
      "what_they_sell": {
        "value": "infra",
        "confidence": "confirmed",
        "source": "https://cloud.morph.so/docs/developers (accessed 2026-06-07); https://www.morph.so/blog/infinibranch/ (accessed 2026-06-07)"
      },
      "deployment_model": {
        "value": "managed-hosted (cloud platform, accessed via Python/TypeScript SDK, CLI, and API keys)",
        "confidence": "confirmed",
        "source": "https://cloud.morph.so/docs/developers (accessed 2026-06-07)"
      },
      "maturity": {
        "value": "GA",
        "confidence": "estimated",
        "source": "https://cloud.morph.so/docs/developers (accessed 2026-06-07), public self-serve signup, SDKs, and docs available; no explicit GA label found"
      },
      "open_source": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://github.com/morph-labs (accessed 2026-06-07), SDKs (morph-python-sdk, morph-typescript-sdk) and examples are Apache-2.0/MIT; core Infinibranch/Liquid Metal platform is proprietary"
      },
      "license": {
        "value": "Apache-2.0 (SDKs and morphcloud-examples-public); MIT (some example repos). Core platform proprietary.",
        "confidence": "confirmed",
        "source": "https://github.com/morph-labs (accessed 2026-06-07)"
      },
      "founded_year": {
        "value": 2023,
        "confidence": "reported",
        "source": "https://tracxn.com/d/companies/morph-labs (accessed 2026-06-07); https://www.cbinsights.com/company/morph-labs (accessed 2026-06-07)"
      },
      "hq_location": {
        "value": "San Francisco, USA",
        "confidence": "reported",
        "source": "https://www.cbinsights.com/company/morph-labs (accessed 2026-06-07); https://tracxn.com/d/companies/morph-labs (accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "distributed_remote": {
        "value": "yes",
        "confidence": "reported",
        "source": "https://www.morph.so/careers (accessed 2026-06-07), remote roles indicated"
      },
      "current_headcount": {
        "value": "~9-11 employees (as of 2026)",
        "confidence": "estimated",
        "source": "https://rocketreach.co/morph-labs-management_b7384e18c7e2a4d3 (accessed 2026-06-07, ~9); https://pitchbook.com/profiles/company/44794-09 (accessed 2026-06-07, ~11)"
      },
      "headcount_band": {
        "value": "1-10",
        "confidence": "estimated",
        "source": "https://rocketreach.co/morph-labs-management_b7384e18c7e2a4d3 (accessed 2026-06-07), borderline with 11-50 per PitchBook (~11)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "https://www.morph.so/careers (accessed 2026-06-07), careers page exists and is actively recruiting researcher-engineers, but exact count not extractable (JS-rendered)"
      },
      "total_raised": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "Tracxn lists Morph Labs (Jesse Han) as 'Unfunded / has not raised any funding rounds yet' (accessed 2026-06-07). A seed/angel investment by Christian Szegedy is reported but amount undisclosed. NOTE: the widely-cited '$19-20M seed' (Crunchbase round 875f78ba, led by Dragonfly Capital, 2024-03-20) belongs to the unrelated Morph Ethereum L2 / blockchain project, NOT this company."
      },
      "last_round": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [
          "Christian Szegedy (reported seed/angel investor; also Chief Scientist), amount undisclosed"
        ],
        "confidence": "reported",
        "source": "https://www.cogniscendo.com/p/the-superhuman-mathematician-5d1b (accessed 2026-06-07)"
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_customers": {
        "value": [
          {
            "name": "Math Inc (Math, Inc.), used Morph Cloud / Infinibranch to scale Lean verification environments for its Gauss autoformalization agent; company incubated at Morph (closely tied entity, not arm's-length customer)",
            "verification": "self-claimed",
            "frontier_lab_tie": false
          }
        ],
        "confidence": "reported",
        "source": "https://x.com/morph_labs/status/1966223824078479667 (accessed 2026-06-07); https://www.math.inc/gauss (accessed 2026-06-07)"
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "confirmed",
        "source": "https://www.morph.so/blog/liquid-metal (accessed 2026-06-07); https://github.com/morph-labs (accessed 2026-06-07)"
      },
      "researcher_count": {
        "value": "unknown (small team; at least CEO Jesse Han and Chief Scientist Christian Szegedy plus members of technical staff)",
        "confidence": "estimated",
        "source": "https://rocketreach.co/morph-labs-management_b7384e18c7e2a4d3 (accessed 2026-06-07)"
      },
      "researcher_backgrounds": {
        "value": [
          "Jesse Han, founder/CEO; PhD work in formal mathematics (formalized independence of the continuum hypothesis in Lean); ex-OpenAI / Microsoft Research / AWS per profiles",
          "Christian Szegedy, Chief Scientist; ex-xAI co-founder (Grok-3 code reasoning lead), ex-Google Research (deep learning/Inception, BatchNorm, adversarial examples, autoformalization)"
        ],
        "confidence": "reported",
        "source": "https://www.theinformation.com/briefings/xai-cofounder-joins-ai-code-startup-morph-chief-scientist (accessed 2026-06-07); https://www.startuphub.ai/ai-news/ai-video/2025/morph-labs-unveils-liquid-metal-powering-the-agentic-future (accessed 2026-06-07)"
      },
      "published_papers_or_benchmarks": {
        "value": [
          "Trinity, autoformalization system for verified superintelligence (Morph Labs blog)",
          "Morph Prover v0 7B, open-source model (early Morph Labs work)",
          "SWELancer-Benchmark setup on Morph Cloud (github.com/morph-labs/SWELancer-Benchmark)",
          "open-r1 reproduction (github.com/morph-labs/open-r1)"
        ],
        "confidence": "reported",
        "source": "https://github.com/morph-labs (accessed 2026-06-07); https://www.math.inc/gauss (accessed 2026-06-07)"
      },
      "soc2": {
        "value": "claimed-unverified",
        "confidence": "reported",
        "source": "https://trust.delve.co/morph (accessed 2026-06-07), trust portal snippet indicates a SOC 2 Type 2 report for 2025, but the portal is hosted by Delve, which in March 2026 was the subject of a TechCrunch/Substack investigation alleging fabrication of ~494 SOC 2 reports (allegations denied/unproven). Direct fetch returned 403/429; no independent audit-firm confirmation obtained. Treat as claimed-unverified pending independent verification."
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": "https://trust.delve.co/morph (accessed 2026-06-07), no ISO 27001 / HIPAA confirmed"
      },
      "security_page": {
        "value": "https://trust.delve.co/morph",
        "confidence": "reported",
        "source": "https://trust.delve.co/morph (accessed 2026-06-07)"
      },
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://cloud.morph.so/docs/developers (accessed 2026-06-07); https://x.com/morph_labs/status/1966223824078479667 (Math Inc incubation announcement, accessed 2026-06-07)"
      },
      "sources": [
        {
          "url": "https://www.morph.so/",
          "accessed_date": "2026-06-07",
          "note": "Official site (JS-rendered; minimal extractable text)"
        },
        {
          "url": "https://cloud.morph.so/docs/developers",
          "accessed_date": "2026-06-07",
          "note": "Morph Cloud docs, Infinibranch snapshot/branch/restore <250ms; SDKs (Python/TS), CLI, API keys; '50x faster AI agent evaluations'; identifies org as Morph Labs"
        },
        {
          "url": "https://www.morph.so/blog/infinibranch/",
          "accessed_date": "2026-06-07",
          "note": "Infinibranch blog (title only extractable)"
        },
        {
          "url": "https://www.morph.so/blog/liquid-metal",
          "accessed_date": "2026-06-07",
          "note": "Liquid Metal announcement, <100ms startup, <200ms snapshot, 'Git for compute', Szegedy as Chief Scientist, Magi-1 reasoning agent Q1 2026"
        },
        {
          "url": "https://www.startuphub.ai/ai-news/ai-video/2025/morph-labs-unveils-liquid-metal-powering-the-agentic-future",
          "accessed_date": "2026-06-07",
          "note": "Third-party coverage of Liquid Metal launch and Szegedy hire"
        },
        {
          "url": "https://github.com/morph-labs",
          "accessed_date": "2026-06-07",
          "note": "GitHub org, morphcloud SDKs (Apache-2.0), examples (Apache-2.0), SWELancer-Benchmark, open-r1, Lean repos"
        },
        {
          "url": "https://x.com/morph_labs/status/1966223824078479667",
          "accessed_date": "2026-06-07",
          "note": "Morph announces Math, Inc. incubated at Morph; Infinibranch-native compute powers Gauss"
        },
        {
          "url": "https://www.math.inc/gauss",
          "accessed_date": "2026-06-07",
          "note": "Math Inc Gauss, Trinity environments infra in partnership with Morph Labs; Infinibranch on Morph Cloud critical for scaling Lean verification environments"
        },
        {
          "url": "https://www.cogniscendo.com/p/the-superhuman-mathematician-5d1b",
          "accessed_date": "2026-06-07",
          "note": "Profile, Morph 'cloud for superintelligence'; Szegedy left xAI to join Morph Labs, was seed investor; Trinity"
        },
        {
          "url": "https://www.theinformation.com/briefings/xai-cofounder-joins-ai-code-startup-morph-chief-scientist",
          "accessed_date": "2026-06-07",
          "note": "Headline: xAI co-founder Szegedy joins Morph as Chief Scientist (paywalled; headline only)"
        },
        {
          "url": "https://trust.delve.co/morph",
          "accessed_date": "2026-06-07",
          "note": "Trust portal, SOC 2 Type 2 (2025), vendor risk program (rate-limited on direct fetch; via search snippet)"
        },
        {
          "url": "https://tracxn.com/d/companies/morph-labs/__8ZZWlPNWnRct4CJf-JJjU940HzzVxh1CEZxBQBG6tXg",
          "accessed_date": "2026-06-07",
          "note": "Founded 2023, SF, Jesse Han; lists company as unfunded / funding redacted"
        },
        {
          "url": "https://pitchbook.com/profiles/company/44794-09",
          "accessed_date": "2026-06-07",
          "note": "Morphlabs profile, ~11 employees"
        },
        {
          "url": "https://rocketreach.co/morph-labs-management_b7384e18c7e2a4d3",
          "accessed_date": "2026-06-07",
          "note": "~9 employees; mgmt: Jesse Han (CEO), Christian Szegedy (Chief Scientist), Chiraag Balu (MTS)"
        },
        {
          "url": "https://www.cbinsights.com/company/morph-labs",
          "accessed_date": "2026-06-07",
          "note": "HQ San Francisco; founded 2023"
        },
        {
          "url": "https://www.morph.so/careers",
          "accessed_date": "2026-06-07",
          "note": "Hiring researcher-engineers (systems research + AI post-training + agent interface); JS-rendered, count not extractable"
        },
        {
          "url": "https://www.morphllm.com/",
          "accessed_date": "2026-06-07",
          "note": "DIFFERENT company, AutoInfra, Inc. (YC), Fast Apply coding-inference models, founder Tejas Bhakta. NOT this Morph; name collision."
        }
      ],
      "last_updated": "2026-06-07",
      "corrections": [
        {
          "field": "focus_areas",
          "was": "[\"execution infrastructure\",\"computer use environments\",\"coding environments\",\"evaluation / benchmarks\"]",
          "now": "[\"execution infrastructure\",\"coding environments\",\"evaluation / benchmarks\"]",
          "reason": "Removed 'computer use environments'. Morph sells general snapshot/branch VM sandbox infrastructure (Infinibranch/Liquid Metal) and devboxes; there is no evidence it specifically provides computer-use (GUI/OS-control) environments as a focus. The remaining three are supported by docs (execution infra, coding/devbox, '50x faster AI agent evaluations')."
        },
        {
          "field": "soc2",
          "was": "Type II (confidence: reported)",
          "now": "claimed-unverified (confidence: reported)",
          "reason": "The only evidence is a self-hosted Delve trust-portal snippet; direct fetch returned 403/429 (uninspectable). Delve was the subject of a March 2026 investigation alleging fabrication of ~494 SOC 2 reports, materially undermining the credibility of a Delve-issued attestation. No independent audit firm or registry confirmation. Per rules, downgrade to claimed-unverified."
        },
        {
          "field": "total_raised.source",
          "was": "NOTE: widely-cited '$20M seed' figures belong to the unrelated Morph blockchain L2 (Singapore)",
          "now": "NOTE: the '$19-20M seed' (Crunchbase round 875f78ba, led by Dragonfly Capital, 2024-03-20) belongs to the unrelated Morph Ethereum L2 / blockchain project",
          "reason": "Verified the specific Crunchbase round: $19M seed led by Dragonfly Capital on 2024-03-20 is the Morph Ethereum layer-2, not Morph Labs. Tightened the note with the confirmed source and lead investor; value remains 'unknown' (correct). Tracxn independently lists Morph Labs as Unfunded."
        },
        {
          "field": "notable_customers.value[0].name",
          "was": "Math Inc ... company incubated at Morph",
          "now": "Math Inc ... company incubated at Morph (closely tied entity, not arm's-length customer)",
          "reason": "Added qualifier. Math Inc was incubated at Morph and CEO Jesse Han leads both entities, so it is not an independent third-party customer reference; verification correctly remains self-claimed and frontier_lab_tie false."
        },
        {
          "field": "researcher_backgrounds.value[0]",
          "was": "Jesse Han, founder/CEO; PhD work in formal mathematics",
          "now": "Jesse Han, founder/CEO; PhD work in formal mathematics; ex-OpenAI / Microsoft Research / AWS per profiles",
          "reason": "Added prior-org background found on CB Insights/RocketReach profiles; kept confidence 'reported' as it comes from aggregator profiles."
        },
        {
          "field": "researcher_backgrounds.source",
          "was": "startuphub.ai ...; theinformation.com ...",
          "now": "theinformation.com (lead) ...; startuphub.ai ...",
          "reason": "Reordered to lead with the most credible source (The Information headline confirming Szegedy joined Morph as Chief Scientist); no value change beyond Han background note."
        }
      ],
      "verification_summary": "Confirmed company identity matches the directory note exactly: Morph Labs (morph.so, Jesse Han CEO) provides snapshot-based compute for agent rollouts via Infinibranch/Liquid Metal (snapshot/branch/restore VMs in ~100-250ms). Distinct from two same-named entities the draft already flagged: Morph Ethereum L2 (the real owner of the $19M Dragonfly seed, Crunchbase 875f78ba / 2024-03-20) and morphllm.com (AutoInfra). Funding: total_raised/last_round/valuation correctly left 'unknown', Tracxn lists Morph Labs as unfunded; only an undisclosed Szegedy seed/angel is reported. Founded 2023 (Tracxn + CB Insights). Headcount ~9-11 (1-10 band, estimated, borderline). Key downgrade: SOC 2 changed from 'Type II/reported' to 'claimed-unverified', sole evidence is an uninspectable Delve-hosted trust portal, and Delve is under a March 2026 SOC 2 fabrication investigation, so the attestation cannot be relied on without independent confirmation. Trimmed focus_areas to remove unsupported 'computer use environments'. notable_customers (Math Inc) kept self-claimed and flagged as an incubated, non-arm's-length entity; no frontier-lab ties verified. Status: active (confirmed). Overall confidence: medium.",
      "research_notes": {}
    },
    {
      "rank": null,
      "focus_areas_normalised": [
        "Coding",
        "Enterprise Workflows",
        "Private Codebases"
      ],
      "slug": "turing",
      "brand_name": "Turing",
      "segment": "Incumbents also building RL environments",
      "website": "https://www.turing.com/",
      "status": {
        "value": "active",
        "confidence": "confirmed",
        "source": "https://www.turing.com/ (accessed 2026-06-07)"
      },
      "focus_areas": [
        "coding environments",
        "enterprise workflows"
      ],
      "founded_year": {
        "value": 2018,
        "confidence": "reported",
        "source": "Widely reported founding year (Jonathan Siddharth & Vijay Krishnan); accessed 2026-06-07"
      },
      "hq_location": {
        "value": "Palo Alto, USA",
        "confidence": "reported",
        "source": "Public company profiles (accessed 2026-06-07)"
      },
      "other_locations": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "distributed_remote": {
        "value": "yes",
        "confidence": "reported",
        "source": "Turing operates a global remote engineering/expert network (accessed 2026-06-07)"
      },
      "what_they_sell": {
        "value": "human data",
        "confidence": "reported",
        "source": "https://www.turing.com/, supplies frontier labs with coding/reasoning data, human expertise, and evaluations (accessed 2026-06-07)"
      },
      "deployment_model": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "maturity": {
        "value": "GA",
        "confidence": "reported",
        "source": "Established commercial company (accessed 2026-06-07)"
      },
      "open_source": {
        "value": "no",
        "confidence": "estimated",
        "source": "No public RL-environment OSS identified (accessed 2026-06-07)"
      },
      "license": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "current_headcount": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "headcount_band": {
        "value": "200+",
        "confidence": "reported",
        "source": "Large organization with a global contractor/expert network (accessed 2026-06-07)"
      },
      "headcount_growth": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "open_roles_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "total_raised": {
        "value": "unknown",
        "confidence": "unknown",
        "source": "Added 2026-06-07; funding figures pending verification, not estimated here to avoid an unsourced number"
      },
      "last_round": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "valuation": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "notable_investors": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "revenue_signals": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "soc2": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "other_certifications": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "security_page": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "has_researchers": {
        "value": "yes",
        "confidence": "reported",
        "source": "Turing AGI/research org and expert network (accessed 2026-06-07)"
      },
      "researcher_count": {
        "value": "unknown",
        "confidence": "unknown",
        "source": ""
      },
      "researcher_backgrounds": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "published_papers_or_benchmarks": {
        "value": [],
        "confidence": "unknown",
        "source": ""
      },
      "notable_customers": {
        "value": [],
        "confidence": "unknown",
        "source": "Turing publicly states it works with leading AI labs but does not name them in citable form (accessed 2026-06-07)"
      },
      "positioning_summary": "Turing is a large AGI-infrastructure and engineering-services company that supplies frontier AI labs with coding data, human expertise, and RL/evaluation data at scale, drawing on a global vetted developer and domain-expert network. Originally a remote-engineering talent marketplace, it now positions itself around 'AGI advancement' through code and reasoning data, including work over private/real codebases.",
      "best_fit_use_case": "Buyers needing very large-scale human coding and reasoning data, expert labor, and evaluations over real codebases from an established incumbent rather than a focused environment startup.",
      "overall_confidence": "low",
      "sources": [
        {
          "url": "https://www.turing.com/",
          "accessed_date": "2026-06-07",
          "note": "Official site, AGI/coding data, human expertise, evaluations for frontier labs"
        }
      ],
      "last_updated": "2026-06-07",
      "verification_summary": "Added 2026-06-07 at user request as an incumbent also building RL/coding-data offerings. Identity and category confirmed; most quantitative fields are left unknown pending full primary research rather than estimated. Turing is an incumbent and is therefore not ranked.",
      "corrections": [],
      "research_notes": {
        "open_questions": [
          "Verify Turing's funding, valuation, headcount, SOC 2/compliance, and named frontier-lab customers from primary sources."
        ]
      }
    }
  ]
}