Accelerating Your Model Evaluation and Fine-tuning with SFR-Judge

1.0Salesforcehttps://www.salesforce.com/blogCaiming Xionghttps://www.salesforce.com/blog/author/caiming-xiong/Accelerating Your Model Evaluation and Fine-tuning with SFR-Judgerich600338<blockquote class="wp-embedded-content" data-secret="wLwcNJDNWS"><a href="https://www.salesforce.com/blog/sfr-judge/">Accelerating Your Model Evaluation and Fine-tuning with SFR-Judge</a></blockquote><iframe sandbox="allow-scripts" security="restricted" src="https://www.salesforce.com/blog/sfr-judge/embed/#?secret=wLwcNJDNWS" width="600" height="338" title="“Accelerating Your Model Evaluation and Fine-tuning with SFR-Judge” — Salesforce" data-secret="wLwcNJDNWS" frameborder="0" marginwidth="0" marginheight="0" scrolling="no" class="wp-embedded-content"></iframe><script> /*! This file is auto-generated */ !function(d,l){"use strict";l.querySelector&&d.addEventListener&&"undefined"!=typeof URL&&(d.wp=d.wp||{},d.wp.receiveEmbedMessage||(d.wp.receiveEmbedMessage=function(e){var t=e.data;if((t||t.secret||t.message||t.value)&&!/[^a-zA-Z0-9]/.test(t.secret)){for(var s,r,n,a=l.querySelectorAll('iframe[data-secret="'+t.secret+'"]'),o=l.querySelectorAll('blockquote[data-secret="'+t.secret+'"]'),c=new RegExp("^https?:$","i"),i=0;i<o.length;i++)o[i].style.display="none";for(i=0;i<a.length;i++)s=a[i],e.source===s.contentWindow&&(s.removeAttribute("style"),"height"===t.message?(1e3<(r=parseInt(t.value,10))?r=1e3:~~r<200&&(r=200),s.height=r):"link"===t.message&&(r=new URL(s.getAttribute("src")),n=new URL(t.value),c.test(n.protocol))&&n.host===r.host&&l.activeElement===s&&(d.top.location.href=t.value))}},d.addEventListener("message",d.wp.receiveEmbedMessage,!1),l.addEventListener("DOMContentLoaded",function(){for(var e,t,s=l.querySelectorAll("iframe.wp-embedded-content"),r=0;r<s.length;r++)(t=(e=s[r]).getAttribute("data-secret"))||(t=Math.random().toString(36).substring(2,12),e.src+="#?secret="+t,e.setAttribute("data-secret",t)),e.contentWindow.postMessage({message:"ready",secret:t},"*")},!1)))}(window,document); //# sourceURL=https://wp-bn.salesforce.com/blog/wp-includes/js/wp-embed.min.js </script> As the development and deployment of large language models (LLMs) accelerates, evaluating model outputs has become increasingly important. The established method of evaluating responses typically involves recruiting and training human evaluators, having them…https://blog.salesforceairesearch.com/content/images/2024/09/pairwise_avg-1.png