{"version":1,"pages":[{"id":"LThc2RqOxBKU56Qt3TMy","title":"About this playbook","pathname":"/","siteSpaceId":"sitesp_RfN9m","description":"","breadcrumbs":[{"label":"Introduction"},{"label":"Overview"}]},{"id":"kS34pEYSYWdhs0HAZNfq","title":"The Process Behind it","pathname":"/overview/the-process-behind-this-playbook","siteSpaceId":"sitesp_RfN9m","description":"","breadcrumbs":[{"label":"Introduction"},{"label":"Overview"}]},{"id":"kgYBGvTEQoMsRq5tvdMB","title":"How to Contribute to the Playbook","pathname":"/overview/how-to-contribute-to-the-playbook","siteSpaceId":"sitesp_RfN9m","description":"How to provide feedback, suggest edits, or contribute to the AI Evaluation Playbook.","breadcrumbs":[{"label":"Introduction"},{"label":"Overview"}]},{"id":"kyEWO2HWslN7yUGvThoO","title":"Building Blocks for GenAI Evaluation","pathname":"/getting-started/building-blocks-for-genai-evaluation","siteSpaceId":"sitesp_RfN9m","description":"","breadcrumbs":[{"label":"Introduction"},{"label":"Getting Started"}]},{"id":"V91mgmS1QmGOVyIVTszQ","title":"Building the Team","pathname":"/getting-started/building-the-team","siteSpaceId":"sitesp_RfN9m","description":"","breadcrumbs":[{"label":"Introduction"},{"label":"Getting Started"}]},{"id":"tSN6S6uJ2o6t9Y6o4LYF","title":"Building the Infrastructure","pathname":"/getting-started/building-the-infrastructure","siteSpaceId":"sitesp_RfN9m","description":"","breadcrumbs":[{"label":"Introduction"},{"label":"Getting Started"}]},{"id":"JtfTrm3LzjUqtluF8JJQ","title":"Frequently Asked Questions","pathname":"/additional-resources/frequently-asked-questions","siteSpaceId":"sitesp_RfN9m","description":"","breadcrumbs":[{"label":"Introduction"},{"label":"Additional Resources"}]},{"id":"CwyZmCqxDC5Ykit1ZWMV","title":"Tools & Templates","pathname":"/additional-resources/additional-resources","siteSpaceId":"sitesp_RfN9m","description":"","breadcrumbs":[{"label":"Introduction"},{"label":"Additional Resources"}]},{"id":"hsji8mMpBGjhnX4znVFF","title":"Minimum Viable Evaluations","pathname":"/additional-resources/minimum-viable-evaluations","siteSpaceId":"sitesp_RfN9m","description":"","breadcrumbs":[{"label":"Introduction"},{"label":"Additional Resources"}]},{"id":"zhN8SrebpP67jSDWsqTw","title":"Glossary","pathname":"/additional-resources/glossary","siteSpaceId":"sitesp_RfN9m","description":"","breadcrumbs":[{"label":"Introduction"},{"label":"Additional Resources"}]},{"id":"ZkaZfWdOyVRNT582TKdA","title":"Using the Playbook with AI Tools","pathname":"/additional-resources/using-the-playbook-with-ai-tools","siteSpaceId":"sitesp_RfN9m","description":"","breadcrumbs":[{"label":"Introduction"},{"label":"Additional Resources"}]},{"id":"DeMcUC7YhehF7wXhEazC","title":"Overview","pathname":"/model-behaviour","siteSpaceId":"sitesp_BbWiA","description":"Does the AI system perform as intended?","breadcrumbs":[{"label":"L1 - Model Evaluation","icon":"head-side-circuit"},{"label":"Level 1 - Module Evaluation"}]},{"id":"IdLwEHGlXwxGEMOg6fjm","title":"Who is most involved in this level of evaluation?","pathname":"/model-behaviour/level-1-module-evaluation/why-is-this-level-of-evaluation-important","siteSpaceId":"sitesp_BbWiA","breadcrumbs":[{"label":"L1 - Model Evaluation","icon":"head-side-circuit"},{"label":"Level 1 - Module Evaluation"}]},{"id":"YmiMvD8Yb54OQKNEVToa","title":"What is the “AI system” being evaluated?","pathname":"/model-behaviour/level-1-module-evaluation/what-is-the-ai-system-being-evaluated","siteSpaceId":"sitesp_BbWiA","breadcrumbs":[{"label":"L1 - Model Evaluation","icon":"head-side-circuit"},{"label":"Level 1 - Module Evaluation"}]},{"id":"Go9plv2ee7tJSZEoFVef","title":"What is the Minimum Viable Evaluation for Level 1?","pathname":"/model-behaviour/level-1-module-evaluation/what-is-the-minimum-viable-evaluation-for-level-1","siteSpaceId":"sitesp_BbWiA","breadcrumbs":[{"label":"L1 - Model Evaluation","icon":"head-side-circuit"},{"label":"Level 1 - Module Evaluation"}]},{"id":"hYtXrs8BLrgmkpbiFUiy","title":"How is Level 1 evaluation performed?","pathname":"/model-behaviour/how-to-evaluate/how-is-level-1-evaluation-performed","siteSpaceId":"sitesp_BbWiA","breadcrumbs":[{"label":"L1 - Model Evaluation","icon":"head-side-circuit"},{"label":"How to evaluate"}]},{"id":"SL1IaRB5Qi3xYvSC6V1M","title":"Decide on an evaluation rubric","pathname":"/model-behaviour/how-to-evaluate/1.-decide-on-an-evaluation-rubric","siteSpaceId":"sitesp_BbWiA","breadcrumbs":[{"label":"L1 - Model Evaluation","icon":"head-side-circuit"},{"label":"How to evaluate"}]},{"id":"pMp8WTGTVjysMbVi5Lmw","title":"Decide on metrics","pathname":"/model-behaviour/how-to-evaluate/2.-decide-on-metrics","siteSpaceId":"sitesp_BbWiA","breadcrumbs":[{"label":"L1 - Model Evaluation","icon":"head-side-circuit"},{"label":"How to evaluate"}]},{"id":"nlovOPA1IMARfE9MwQau","title":"Develop a golden dataset","pathname":"/model-behaviour/how-to-evaluate/3.-develop-a-golden-dataset","siteSpaceId":"sitesp_BbWiA","breadcrumbs":[{"label":"L1 - Model Evaluation","icon":"head-side-circuit"},{"label":"How to evaluate"}]},{"id":"dmxTilPPMauBPlkqtQJw","title":"Scoring & error analysis","pathname":"/model-behaviour/how-to-evaluate/4.-scoring-and-error-analysis","siteSpaceId":"sitesp_BbWiA","breadcrumbs":[{"label":"L1 - Model Evaluation","icon":"head-side-circuit"},{"label":"How to evaluate"}]},{"id":"rYvNTDC4aUQc5SmnfyB3","title":"Automate your evaluations","pathname":"/model-behaviour/how-to-evaluate/5.-automate-your-evaluations","siteSpaceId":"sitesp_BbWiA","breadcrumbs":[{"label":"L1 - Model Evaluation","icon":"head-side-circuit"},{"label":"How to evaluate"}]},{"id":"4GNTwdxlyVtQ5GBbubwv","title":"Red-teaming","pathname":"/model-behaviour/how-to-evaluate/6.-red-teaming","siteSpaceId":"sitesp_BbWiA","breadcrumbs":[{"label":"L1 - Model Evaluation","icon":"head-side-circuit"},{"label":"How to evaluate"}]},{"id":"BRhAcSDI4fzmQttWpxZl","title":"Overview","pathname":"/product-analytics","siteSpaceId":"sitesp_vztWH","description":"Does the overall product engage and retain users?","breadcrumbs":[{"label":"L2 - Product Evaluation","icon":"laptop-code"},{"label":"LEVEL 2 - Product Evaluation"}]},{"id":"E03jNZhP6xVQj9iyqtpQ","title":"Who is most involved in this level of evaluation?","pathname":"/product-analytics/level-2-product-evaluation/why-is-this-level-of-evaluation-important","siteSpaceId":"sitesp_vztWH","breadcrumbs":[{"label":"L2 - Product Evaluation","icon":"laptop-code"},{"label":"LEVEL 2 - Product Evaluation"}]},{"id":"EqhXdpLbzDck30Xp4yln","title":"What is the “Product” being evaluated?","pathname":"/product-analytics/level-2-product-evaluation/what-is-the-product-being-evaluated","siteSpaceId":"sitesp_vztWH","breadcrumbs":[{"label":"L2 - Product Evaluation","icon":"laptop-code"},{"label":"LEVEL 2 - Product Evaluation"}]},{"id":"bZLOh6lK78exeiHL7G9D","title":"What is the Minimum Viable Evaluation?","pathname":"/product-analytics/level-2-product-evaluation/what-is-the-minimum-viable-evaluation","siteSpaceId":"sitesp_vztWH","description":"We recommend using commercial platforms, when feasible, to track user metrics and automate experiments.","breadcrumbs":[{"label":"L2 - Product Evaluation","icon":"laptop-code"},{"label":"LEVEL 2 - Product Evaluation"}]},{"id":"E2tuvaDI9APvHRvWJyRS","title":"How is Level 2 evaluation performed?","pathname":"/product-analytics/how-to-evaluate/how-is-level-2-evaluation-performed","siteSpaceId":"sitesp_vztWH","breadcrumbs":[{"label":"L2 - Product Evaluation","icon":"laptop-code"},{"label":"How to evaluate"}]},{"id":"d9XH592tdYjLHb8oEy9i","title":"Methods for experimentation: A/B testing and beyond","pathname":"/product-analytics/how-to-evaluate/methods-for-experimentation-a-b-testing-and-beyond","siteSpaceId":"sitesp_vztWH","breadcrumbs":[{"label":"L2 - Product Evaluation","icon":"laptop-code"},{"label":"How to evaluate"}]},{"id":"aY9cqzZf1NFeBpqgeNEN","title":"Connection with other levels","pathname":"/product-analytics/how-to-evaluate/connection-with-other-levels","siteSpaceId":"sitesp_vztWH","breadcrumbs":[{"label":"L2 - Product Evaluation","icon":"laptop-code"},{"label":"How to evaluate"}]},{"id":"B8vqPg2GhHFUqIolxWRS","title":"Why Aren’t Users Engaging?","pathname":"/product-analytics/how-to-evaluate/why-arent-users-engaging","siteSpaceId":"sitesp_vztWH","breadcrumbs":[{"label":"L2 - Product Evaluation","icon":"laptop-code"},{"label":"How to evaluate"}]},{"id":"wcgHi9eru7seyBhXPjew","title":"Overview","pathname":"/user-expereince","siteSpaceId":"sitesp_Kxdge","description":"Does the product change users' thoughts, feelings, knowledge and behaviour towards the development outcome?","breadcrumbs":[{"label":"L3 - User Evaluation","icon":"user-gear"},{"label":"LEVEL 3 - User Evaluation"}]},{"id":"ul2a65TvdgqoIC9bEwF4","title":"Who is most involved in this level of evaluation?","pathname":"/user-expereince/level-3-user-evaluation/why-is-this-level-of-evaluation-important","siteSpaceId":"sitesp_Kxdge","breadcrumbs":[{"label":"L3 - User Evaluation","icon":"user-gear"},{"label":"LEVEL 3 - User Evaluation"}]},{"id":"z0S08pUtqzummCSqeNjA","title":"Who is the “User” being evaluated?","pathname":"/user-expereince/level-3-user-evaluation/who-is-the-user-being-evaluated","siteSpaceId":"sitesp_Kxdge","breadcrumbs":[{"label":"L3 - User Evaluation","icon":"user-gear"},{"label":"LEVEL 3 - User Evaluation"}]},{"id":"UYMLtf0yhV2QCrffSnOB","title":"What is the Minimum Viable Evaluation?","pathname":"/user-expereince/level-3-user-evaluation/what-is-the-minimum-viable-evaluation","siteSpaceId":"sitesp_Kxdge","breadcrumbs":[{"label":"L3 - User Evaluation","icon":"user-gear"},{"label":"LEVEL 3 - User Evaluation"}]},{"id":"iUTTFDo3W4jyTh7ZI6s3","title":"How is Level 3 evaluation performed?","pathname":"/user-expereince/how-to-evaluate/how-is-level-3-evaluation-performed","siteSpaceId":"sitesp_Kxdge","description":"","breadcrumbs":[{"label":"L3 - User Evaluation","icon":"user-gear"},{"label":"How to evaluate"}]},{"id":"mmnsDIJ2m98MmPatssMA","title":"Identify outcome metrics","pathname":"/user-expereince/how-to-evaluate/descriptive-analysis","siteSpaceId":"sitesp_Kxdge","description":"","breadcrumbs":[{"label":"L3 - User Evaluation","icon":"user-gear"},{"label":"How to evaluate"}]},{"id":"YS7PH4iamJQdKhqbTsOV","title":"Define guardrail metrics and measure potential harm","pathname":"/user-expereince/how-to-evaluate/defining-guardrail-metrics-measuring-potential-harm","siteSpaceId":"sitesp_Kxdge","description":"","breadcrumbs":[{"label":"L3 - User Evaluation","icon":"user-gear"},{"label":"How to evaluate"}]},{"id":"wDurugvCHKU063ijmTAd","title":"Consider conducting experiments to improve the selected key metrics and running process evaluations","pathname":"/user-expereince/how-to-evaluate/why-arent-thoughts-feelings-and-behavior-changing","siteSpaceId":"sitesp_Kxdge","description":"","breadcrumbs":[{"label":"L3 - User Evaluation","icon":"user-gear"},{"label":"How to evaluate"}]},{"id":"bQOxJgjU75xZixE5Y89s","title":"Why Aren’t Thoughts, Feelings, and Behavior Changing?","pathname":"/user-expereince/how-to-evaluate/user-privacy-and-security","siteSpaceId":"sitesp_Kxdge","description":"","breadcrumbs":[{"label":"L3 - User Evaluation","icon":"user-gear"},{"label":"How to evaluate"}]},{"id":"YnZKseJWPCqdwrTYVLTE","title":"Overview","pathname":"/social-impact","siteSpaceId":"sitesp_vzHWt","description":"Do users with access to the product improve development outcomes?","breadcrumbs":[{"label":"L4 - Impact Evaluation","icon":"hand-holding-seedling"},{"label":"Level 4 - Impact Evaluation"}]},{"id":"nDEp5z31imLnvAXYixVk","title":"Who is involved in this evaluation?","pathname":"/social-impact/level-4-impact-evaluation/why-is-this-level-of-evaluation-important","siteSpaceId":"sitesp_vzHWt","breadcrumbs":[{"label":"L4 - Impact Evaluation","icon":"hand-holding-seedling"},{"label":"Level 4 - Impact Evaluation"}]},{"id":"tVhuFj0GRCyKNwFJGA1l","title":"What is the “intervention” being evaluated?","pathname":"/social-impact/level-4-impact-evaluation/what-is-the-intervention-being-evaluated","siteSpaceId":"sitesp_vzHWt","breadcrumbs":[{"label":"L4 - Impact Evaluation","icon":"hand-holding-seedling"},{"label":"Level 4 - Impact Evaluation"}]},{"id":"iqIda5QLHf5UmqbUT2bV","title":"Minimum Viable Evaluation","pathname":"/social-impact/level-4-impact-evaluation/minimum-viable-evaluation","siteSpaceId":"sitesp_vzHWt","breadcrumbs":[{"label":"L4 - Impact Evaluation","icon":"hand-holding-seedling"},{"label":"Level 4 - Impact Evaluation"}]},{"id":"gnarBxNy7gjgKIjcRcSH","title":"How is Level 4 evaluation performed?","pathname":"/social-impact/how-to-evaluate/how-is-level-4-evaluation-performed","siteSpaceId":"sitesp_vzHWt","breadcrumbs":[{"label":"L4 - Impact Evaluation","icon":"hand-holding-seedling"},{"label":"How to evaluate"}]},{"id":"TU7OBFRR4zkKykpyLagI","title":"A Quick Primer on Impact Evaluation Methods","pathname":"/social-impact/how-to-evaluate/a-quick-primer-on-impact-evaluation-methods","siteSpaceId":"sitesp_vzHWt","breadcrumbs":[{"label":"L4 - Impact Evaluation","icon":"hand-holding-seedling"},{"label":"How to evaluate"}]},{"id":"RVNxdLBXgZfwL2MoYuss","title":"Key design considerations for AI-specific impact evaluations","pathname":"/social-impact/how-to-evaluate/key-design-considerations-for-ai-specific-impact-evaluations","siteSpaceId":"sitesp_vzHWt","breadcrumbs":[{"label":"L4 - Impact Evaluation","icon":"hand-holding-seedling"},{"label":"How to evaluate"}]},{"id":"IDBlz7DbfAPcsHfHFfq8","title":"Common pitfalls to avoid","pathname":"/social-impact/how-to-evaluate/common-pitfalls-to-avoid","siteSpaceId":"sitesp_vzHWt","breadcrumbs":[{"label":"L4 - Impact Evaluation","icon":"hand-holding-seedling"},{"label":"How to evaluate"}]},{"id":"2lV42tr38uxDWuDS5uji","title":"Process Evaluation: Why Aren’t Outcomes Changing?","pathname":"/social-impact/how-to-evaluate/process-evaluation-why-arent-outcomes-changing","siteSpaceId":"sitesp_vzHWt","breadcrumbs":[{"label":"L4 - Impact Evaluation","icon":"hand-holding-seedling"},{"label":"How to evaluate"}]},{"id":"NbLDRb1iBkUj8YFdMIiG","title":"Overview","pathname":"/level-linkages","siteSpaceId":"sitesp_rhY5J","breadcrumbs":[{"label":"Level Linkages","icon":"link"},{"label":"Linkage across levels"}]},{"id":"vYDoefUa2xHvtdC2esJ5","title":"Risk assessment and mitigation","pathname":"/level-linkages/linkage-across-levels/risk-assessment-and-mitigation","siteSpaceId":"sitesp_rhY5J","breadcrumbs":[{"label":"Level Linkages","icon":"link"},{"label":"Linkage across levels"}]},{"id":"iq3YQpeiHmbR2vQFdUnc","title":"Data protection","pathname":"/level-linkages/linkage-across-levels/data-protection","siteSpaceId":"sitesp_rhY5J","breadcrumbs":[{"label":"Level Linkages","icon":"link"},{"label":"Linkage across levels"}]},{"id":"4U7O8W1Ryyfg0LN2zY11","title":"Process Evaluations","pathname":"/level-linkages/linkage-across-levels/process-evaluations","siteSpaceId":"sitesp_rhY5J","breadcrumbs":[{"label":"Level Linkages","icon":"link"},{"label":"Linkage across levels"}]},{"id":"cMgwa28RpHrSaKaREEKP","title":"Do I need a Process Evaluation?","pathname":"/level-linkages/linkage-across-levels/process-evaluations/do-i-need-a-process-evaluation","siteSpaceId":"sitesp_rhY5J","breadcrumbs":[{"label":"Level Linkages","icon":"link"},{"label":"Linkage across levels"},{"label":"Process Evaluations"}]},{"id":"bwrt3WDa4cnesP6G3Byz","title":"What does it take to do a process evaluation?","pathname":"/level-linkages/linkage-across-levels/process-evaluations/what-does-it-take-to-do-a-process-evaluation","siteSpaceId":"sitesp_rhY5J","breadcrumbs":[{"label":"Level Linkages","icon":"link"},{"label":"Linkage across levels"},{"label":"Process Evaluations"}]}]}