{"id":245284,"date":"2025-10-15T10:49:54","date_gmt":"2025-10-15T02:49:54","guid":{"rendered":"https:\/\/www.hkubs.hku.hk\/media\/hku-business-school-releases-latest-report-on-ais-advanced-reasoning-capabilities\/"},"modified":"2025-10-15T10:54:23","modified_gmt":"2025-10-15T02:54:23","slug":"hku-business-school-releases-latest-report-on-ais-advanced-reasoning-capabilities","status":"publish","type":"hkubs-media","link":"https:\/\/www.hkubs.hku.hk\/tc\/media\/press-release\/hku-business-school-releases-latest-report-on-ais-advanced-reasoning-capabilities\/","title":{"rendered":"\u6e2f\u5927\u7d93\u7ba1\u5b78\u9662\u6700\u65b0\u8a55\u6e2c\uff1aAI\u9ad8\u968e\u63a8\u7406\u80fd\u529b\uff0c\u54ea\u5bb6\u6700\u5f37\uff1f"},"content":{"rendered":"<p>\u6e2f\u5927\u7d93\u7ba1\u5b78\u9662\u4eca\u65e5\u767c\u8868<strong>\u300a<\/strong><strong>AI<\/strong><strong>\u9ad8\u968e\u63a8\u7406\u80fd\u529b\u8a55\u6e2c\u5831\u544a\u300b<\/strong>\uff0c\u91dd\u5c0d\u9078\u5b9a\u7684AI\u5927\u8a9e\u8a00\u6a21\u578b\uff08LLM\uff09\u5728\u4e2d\u6587\u8a9e\u5883\u4e0b\u7684\u9ad8\u968e\u63a8\u7406\u80fd\u529b\u9032\u884c\u8a55\u6e2c\uff0c\u63ed\u793aAI\u9ad8\u968e\u63a8\u7406\u80fd\u529b\u7684\u767c\u5c55\u73fe\u72c0\u3002<\/p>\n<p>2025\u5e74\u4ee5\u4f86\uff0cAI\u5927\u8a9e\u8a00\u6a21\u578b\u5f9e\u300c\u80fd\u5c0d\u8a71\u300d\u5411\u300c\u6703\u601d\u8003\u300d\u5feb\u901f\u6f14\u9032\u3002\u7136\u800c\u5728\u9700\u8981\u8907\u96dc\u63a8\u7406\u80fd\u529b\u7684\u5834\u666f\u4e2d\uff0cAI\u7684\u63a8\u7406\u80fd\u529b\u53c3\u5dee\u4e0d\u9f4a\uff0c\u5305\u62ec\u5c0d\u5716\u7247\u548c\u6587\u5b57\u7b49\u8de8\u6a21\u614b\u8a0a\u606f\u7684\u6574\u5408\u5206\u6790\u80fd\u529b\uff0c\u6216\u9762\u5c0d\u975e\u5e38\u898f\u6027\u8907\u96dc\u984c\u76ee\u6642\u7684\u5275\u65b0\u63a8\u7406\u80fd\u529b\u3002\u56e0\u6b64\uff0c<strong>\u6e2f\u5927\u7d93\u7ba1\u5b78\u9662\u5275\u65b0\u53ca\u8cc7\u8a0a\u7ba1\u7406\u5b78\u6559\u6388\u517c\u590f\u5229\u840a\u4f09\u5137\u57fa\u91d1\u6559\u6388\u8523\u93ae\u8f1d<\/strong>\u7387\u9818\u4eba\u5de5\u667a\u80fd\u8a55\u4f30\u5be6\u9a57\u5ba4\uff08AIEL\uff09\uff08<span><a href=\"https:\/\/hkubs.hku.hk\/aimodelrankings\">https:\/\/hkubs.hku.hk\/aimodelrankings<\/a><\/span>\uff09\u7814\u7a76\u4eba\u54e1\uff0c\u9996\u6b21\u69cb\u5efa\u591a\u6a21\u614b\u63a8\u7406\u548c\u570b\u969b\u6578\u5b78\u5967\u6797\u5339\u514b\u7af6\u8cfd\uff08\u5967\u8cfd\uff09\u63a8\u7406\u7d9c\u5408\u8a55\u6e2c\u9ad4\u7cfb\uff0c\u4ee5\u7cfb\u7d71\u6027\u3001\u5b9a\u91cf\u5316\u7684\u65b9\u5f0f\u91dd\u5c0d2025\u5e74\u4eba\u5de5\u667a\u80fd\u6280\u8853\u7684\u6838\u5fc3\u767c\u5c55\u74f6\u9838\u2014\u2014\u9ad8\u968e\u63a8\u7406\u80fd\u529b\uff0c\u9032\u884c\u5168\u9762\u6df1\u5165\u7684\u73fe\u72c0\u8a55\u6e2c\u3002\u8a55\u6e2c\u5c0d\u8c61\u6db5\u84cb\u4e2d\u7f8e\u5169\u570b\u622a\u81f32025\u5e7410\u6708\u4e2d\u65ec\u767c\u4f48\u768437\u6b3e\u5927\u8a9e\u8a00\u6a21\u578b\uff08\u5305\u62ec14\u500b\u63a8\u7406\u6a21\u578b\u300120\u500b\u901a\u7528\u6a21\u578b\u548c3\u500b\u4e00\u9ad4\u5316\u7cfb\u7d71\uff09\u3002<\/p>\n<p><strong><u>\u8a55\u6e2c\u7d50\u679c<\/u><\/strong>\u986f\u793a\uff1a<\/p>\n<ul>\n<li>\u5728\u300c\u591a\u6a21\u614b\u63a8\u7406\u300d\u65b9\u9762\uff0cGPT\u7cfb\u5217\u6301\u7e8c\u9818\u5148\uff0c\u4ee5\u300c\u8c46\u53055 Pro\u300d\uff08\u601d\u8003\u6a21\u5f0f\uff09\u70ba\u9996\u7684\u9802\u5c16\u570b\u7522\u6a21\u578b\uff0c\u4ea6\u5df2\u6210\u529f\u8e8b\u8eab\u5168\u7403\u7b2c\u4e00\u68af\u968a\u3002<\/li>\n<li>\u5728\u66f4\u9ad8\u96e3\u5ea6\u7684\u300c\u5967\u8cfd\u63a8\u7406\u300d\u65b9\u9762\uff0c\u5247\u7531\u7f8e\u570b\u6a21\u578b\u6574\u9ad4\u4e3b\u5c0e\uff0c\u5176\u4e2d GPT-5\uff08\u601d\u8003\u6a21\u5f0f\uff09\u7684\u512a\u52e2\u66f4\u53ef\u8b02\u8868\u73fe\u7a81\u51fa\uff0c\u5927\u5e45\u62cb\u96e2\u5c0d\u624b\uff0c\u800cGemini 2.5 Pro\u4ea6\u7dca\u96a8\u5176\u5f8c\u3002<\/li>\n<li>\u6574\u9ad4\u800c\u8a00\uff0c\u662f\u6b21\u8a55\u6e2c\u63ed\u793a\u4e86\u4e00\u500b\u6e05\u6670\u7684\u884c\u696d\u8da8\u52e2\uff1a\u5c08\u70ba\u9ad8\u968e\u4efb\u52d9\u800c\u8a2d\u7684\u300c\u63a8\u7406\u6a21\u578b\u300d\uff0c\u8868\u73fe\u9060\u52dd\u300c\u901a\u7528\u6a21\u578b\u300d\uff0c\u5169\u8005\u68af\u5ea6\u5dee\u7570\u660e\u986f\u3002\u9019\u8da8\u52e2\u6b63\u6b63\u543b\u5408AI\u7522\u696d\u7684\u767c\u5c55\u898f\u5f8b\u2014\u2014\u884c\u696d\u7126\u9ede\u6b63\u7531\u4ee5\u5f80\u300c\u8ffd\u6c42\u529f\u80fd\u5920\u591a\u5920\u5ee3\u300d\uff0c\u8f49\u5411\u300c\u805a\u7126\u7279\u5b9a\u5834\u666f\u7684\u6df1\u5ea6\u512a\u5316\u300d\u3002\u9019\u6a19\u8a8c\u8457AI\u6280\u8853\u7684\u767c\u5c55\uff0c\u5df2\u7531\u300c\u5ee3\u5ea6\u64f4\u5f35\u300d\uff0c\u6b63\u5f0f\u9081\u5411\u300c\u6df1\u5ea6\u7cbe\u8015\u300d\u7684\u65b0\u6642\u4ee3\u3002<\/li>\n<\/ul>\n<p><strong>\u00a0<\/strong><\/p>\n<p><strong>\u8523\u93ae\u8f1d\u6559\u6388<\/strong>\u8868\u793a\uff1a\u300c\u9ad8\u968e\u63a8\u7406\u80fd\u529b\u5c0dAI\u5728\u6559\u80b2\u3001\u79d1\u7814\u53ca\u5546\u696d\u6c7a\u7b56\u7b49\u9818\u57df\u7684\u61c9\u7528\u62d3\u5c55\u81f3\u95dc\u91cd\u8981\u3002\u6b64\u7814\u7a76\u65e8\u5728\u63ed\u793a\u9ad8\u968e\u4eba\u5de5\u667a\u80fd\u6280\u8853\u7684\u767c\u5c55\u73fe\u72c0\uff0c\u4ee4\u884c\u696d\u80fd\u5920\u7cbe\u6e96\u5b9a\u4f4d\u6280\u8853\u74f6\u9838\uff0c\u52a0\u901f\u901a\u7528\u4eba\u5de5\u667a\u80fd\u5728\u9ad8\u8981\u6c42\u9818\u57df\u7684\u843d\u5730\u61c9\u7528\uff0c\u6700\u7d42\u63a8\u52d5AI\u5f9e\u300e\u5c0d\u8a71\u52a9\u624b\u300f\u8f49\u578b\u6210\u70ba\u66f4\u52a0\u9ad8\u7d1a\u7684\u300e\u667a\u80fd\u4f19\u4f34\u300f\u3002 \u300d<\/p>\n<p><strong><u>\u8a55\u6e2c\u65b9\u6cd5<\/u><\/strong><\/p>\n<p>\u70ba\u5168\u9762\u8a55\u6e2c\u5927\u578b\u8a9e\u8a00\u6a21\u578b\uff08LLM\uff09\u7684\u9ad8\u968e\u63a8\u7406\u80fd\u529b\uff0c\u672c\u7814\u7a76\u8a2d\u8a08\u4e86\u5169\u5927\u6838\u5fc3\u6e2c\u8a66\u7dad\u5ea6\uff0c\u65e8\u5728\u8a55\u4f30AI\u5728\u61c9\u5c0d\u771f\u5be6\u4e16\u754c\u6311\u6230\u6642\u7684\u8868\u73fe\u3002<\/p>\n<ul>\n<li><strong>\u591a\u6a21\u614b\u63a8\u7406\u80fd\u529b<\/strong>\u6307\u6a21\u578b\u80fd\u5426\u6253\u901a\u6587\u672c\u3001\u5716\u50cf\u3001\u6578\u64da\u5716\u8868\u7b49\u4e0d\u540c\u683c\u5f0f\u7684\u8cc7\u8a0a\uff0c\u9032\u884c\u8de8\u6a21\u614b\u95dc\u806f\u5206\u6790\u8207\u908f\u8f2f\u63a8\u7406\u3002 \u4f8b\u5982\u5728\u6559\u5b78\u4e0a\uff0cAI\u9700\u8981\u7d50\u5408\u6559\u79d1\u66f8\u6587\u5b57\u8207\u7269\u7406\u5716\u89e3\uff0c\u624d\u80fd\u900f\u5fb9\u89e3\u91cb\u529b\u5b78\u539f\u7406\uff1b\u5728\u5546\u696d\u5206\u6790\u4e0a\uff0c\u5206\u6790\u5e2b\u4ea6\u8981\u9760AI\u540c\u6642\u89e3\u8b80\u5e02\u5834\u6587\u5b57\u5831\u544a\u548c\u92b7\u552e\u6578\u64da\u5716\u8868\uff0c\u624d\u80fd\u6e96\u78ba\u9810\u6e2c\u6d88\u8cbb\u8da8\u52e2\u3002\u9019\u662fAI\u8655\u7406\u73fe\u5be6\u4e16\u754c\u8907\u96dc\u4efb\u52d9\u7684\u300c\u57fa\u672c\u529f\u300d\u3002<\/li>\n<li><strong>\u5967\u8cfd\u63a8\u7406\u80fd\u529b<\/strong>\u63a1\u7528\u300c\u570b\u969b\u6578\u5b78\u5967\u6797\u5339\u514b\uff08IMO\uff09\u300d\u7b49\u9802\u7d1a\u5b78\u8853\u7af6\u8cfd\u7684\u9ad8\u96e3\u5ea6\u984c\u76ee\uff0c\u53bb\u6e2c\u8a66\u6a21\u578b\u5728\u8907\u96dc\u908f\u8f2f\u3001\u591a\u6b65\u9a5f\u63a8\u7406\u548c\u5275\u610f\u601d\u7dad\u4e0a\u7684\u6975\u9650\u3002\u9019\u985e\u554f\u984c\u5f80\u5f80\u6c92\u6709\u6a19\u6e96\u7b54\u6848\uff0c\u6975\u5ea6\u8003\u9a57AI\u80fd\u5426\u8df3\u51fa\u56fa\u6709\u6846\u67b6\u3001\u5c0b\u627e\u6700\u4f73\u89e3\u6c7a\u65b9\u6848\u7684\u5275\u9020\u529b\uff0c\u662f\u8861\u91cf\u5176\u300c\u9ad8\u968e\u667a\u80fd\u300d\u7684\u7d42\u6975\u8a66\u7149\u3002<\/li>\n<\/ul>\n<p><strong><u>\u591a\u6a21\u614b\u63a8\u7406\u80fd\u529b\u6392\u540d<\/u><\/strong><\/p>\n<p>\u6839\u64da\u8a55\u6e2c\u7d50\u679c\uff0c\u5404\u5927\u8a9e\u8a00\u6a21\u578b\u5728\u300c\u591a\u6a21\u614b\u63a8\u7406\u80fd\u529b\u300d\u4e0a\u7684\u8868\u73fe\u51fa\u73fe\u660e\u986f\u5206\u5c64\u3002\u5728\u6700\u9802\u5c16\u7684\u68af\u968a\uff0885\u5206\u4ee5\u4e0a\u5340\u9593\uff09\uff0cGPT\u7cfb\u5217\u5e7e\u4e4e\u58df\u65b7\u4e86\u4e94\u500b\u5e2d\u4f4d\u4e2d\u7684\u56db\u5e2d\uff0c\u5c55\u73fe\u51fa\u5168\u9762\u7684\u9818\u5148\u512a\u52e2\u3002\u800c\u5167\u5730\u7814\u767c\u7684\u300c\u8c46\u53051.5 Pro\u300d\uff08\u601d\u8003\u6a21\u5f0f\uff09\u6210\u70ba\u552f\u4e00\u6253\u5165\u524d\u4e94\u540d\u7684\u570b\u7522\u6a21\u578b\uff0c\u8868\u73fe\u76f8\u7576\u4eae\u773c\u3002\u503c\u5f97\u7559\u610f\u7684\u662f\uff0c\u5176\u300c\u901a\u7528\u6a21\u5f0f\u300d\u8207\u300c\u601d\u8003\u6a21\u5f0f\u300d\u7684\u8a55\u5206\u5dee\u8ddd\u6975\u5c0f\uff0c\u8b49\u660e\u5176\u591a\u6a21\u614b\u63a8\u7406\u7684\u5e95\u5c64\u5be6\u529b\u5df2\u9054\u5230\u570b\u969b\u9802\u5c16\u6c34\u5e73\u3002<\/p>\n<p>\u5f9e\u6a21\u578b\u985e\u578b\u770b\uff0c\u548c\u901a\u7528\u6a21\u578b\u76f8\u6bd4\uff0c\u540c\u516c\u53f8\u7684\u63a8\u7406\u6a21\u578b\u5728\u8907\u96dc\u4efb\u52d9\u4e2d\u5c55\u73fe\u51fa\u4e00\u5b9a\u7684\u512a\u52e2\u3002<\/p>\n<table width=\"491\">\n<tbody>\n<tr>\n<td width=\"75\"><strong>\u6392\u540d<\/strong><\/td>\n<td width=\"236\"><strong>\u6a21\u578b\u540d\u7a31<\/strong><\/td>\n<td width=\"180\"><strong>\u591a\u6a21\u614b\u63a8\u7406\u80fd\u529b<\/strong><\/p>\n<p><strong>\u6e96\u78ba\u7387\u5f97\u5206<\/strong><\/td>\n<\/tr>\n<tr>\n<td width=\"75\">1<\/td>\n<td width=\"236\">GPT-5\uff08\u601d\u8003\u6a21\u5f0f\uff09<\/td>\n<td width=\"180\">91<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">2<\/td>\n<td width=\"236\">GPT-4.1<\/td>\n<td width=\"180\">90<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">3<\/td>\n<td width=\"236\">GPT-o3<\/td>\n<td width=\"180\">87<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">4<\/td>\n<td width=\"236\">\u8c46\u5305<span>1.5 Pro<\/span><span>\uff08\u601d\u8003\u6a21\u5f0f\uff09<\/span><\/td>\n<td width=\"180\">85<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">4<\/td>\n<td width=\"236\">GPT<span>-5\uff08\u81ea\u52a8\u6a21\u5f0f\uff09<\/span><\/td>\n<td width=\"180\">85<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">6<\/td>\n<td width=\"236\">GPT-4o<\/td>\n<td width=\"180\">84<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">7<\/td>\n<td width=\"236\">Claude 4 Opus<span>\uff08\u601d\u8003\u6a21\u5f0f\uff09<\/span><\/td>\n<td width=\"180\">83<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">8<\/td>\n<td width=\"236\">\u8c46\u5305<span>1.5 Pro<\/span><\/td>\n<td width=\"180\">82<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">8<\/td>\n<td width=\"236\">Grok 3<span>\uff08\u601d\u8003\u6a21\u5f0f\uff09<\/span><\/td>\n<td width=\"180\">82<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">10<\/td>\n<td width=\"236\">\u901a\u7fa9\u5343\u554f<span>3<\/span><\/td>\n<td width=\"180\">81<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">11<\/td>\n<td width=\"236\">Kimi-k1.5<\/td>\n<td width=\"180\">80<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">11<\/td>\n<td width=\"236\">\u65e5\u65e5\u65b0<span> V6<\/span><span>\u63a8\u7406<\/span><\/td>\n<td width=\"180\">80<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">11<\/td>\n<td width=\"236\">Step R1-V-Mini\uff08\u968e\u8e8d R1-V-Mini\uff09<\/td>\n<td width=\"180\">80<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">14<\/td>\n<td width=\"236\">Grok<span> 4<\/span><\/td>\n<td width=\"180\">79<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">14<\/td>\n<td width=\"236\">GPT-o4 mini<\/td>\n<td width=\"180\">79<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">14<\/td>\n<td width=\"236\">\u6df7\u5143<span>-T1<\/span><\/td>\n<td width=\"180\">79<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">17<\/td>\n<td width=\"236\">GLM-4-plus\uff08\u667a\u8b5c-4-Plus\uff09<\/td>\n<td width=\"180\">78<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">17<\/td>\n<td width=\"236\">\u901a\u7fa9\u5343\u554f<span>3<\/span><span>\uff08\u601d\u8003\u6a21\u5f0f\uff09<\/span><\/td>\n<td width=\"180\">78<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">19<\/td>\n<td width=\"236\">Gemini 2.5 Flash<\/td>\n<td width=\"180\">77<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">19<\/td>\n<td width=\"236\">GLM-Z1-Air\uff08\u667a\u8b5c-Z1-Air\uff09<\/td>\n<td width=\"180\">77<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">21<\/td>\n<td width=\"236\">Llama 3.3 70B<\/td>\n<td width=\"180\">76<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">22<\/td>\n<td width=\"236\">\u65e5\u65e5\u65b0<span> V6 Pro<\/span><\/td>\n<td width=\"180\">75<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">22<\/td>\n<td width=\"236\">Gemini 2.5 Pro<\/td>\n<td width=\"180\">75<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">23<\/td>\n<td width=\"236\">\u6587\u5fc3\u4e00\u8a00<span>4.5-Turbo<\/span><\/td>\n<td width=\"180\">74<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">24<\/td>\n<td width=\"236\">Step 2\uff08\u968e\u8e8d2\uff09<\/td>\n<td width=\"180\">73<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">26<\/td>\n<td width=\"236\">\u6df7\u5143<span>-TurboS<\/span><\/td>\n<td width=\"180\">71<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">26<\/td>\n<td width=\"236\">Claude 4 Opus<\/td>\n<td width=\"180\">71<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">28<\/td>\n<td width=\"236\">Spark 4.0 Ultra\uff08\u8a0a\u98db\u661f\u706b 4.0 Ultra\uff09<\/td>\n<td width=\"180\">68<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">28<\/td>\n<td width=\"236\">MiniMax-01<\/td>\n<td width=\"180\">68<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">30<\/td>\n<td width=\"236\">Baichuan4-Turbo\uff08\u767e\u5ddd4-Turbo\uff09<\/td>\n<td width=\"180\">67<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">31<\/td>\n<td width=\"236\">Grok 3<\/td>\n<td width=\"180\">66<\/td>\n<\/tr>\n<tr>\n<td width=\"75\">32<\/td>\n<td width=\"236\">Kimi<\/td>\n<td width=\"180\">63<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p>\u5716\u8868 1\uff1a\u591a\u6a21\u614b\u63a8\u7406\u80fd\u529b\u6392\u540d<\/p>\n<p><strong><u>\u5967\u8cfd\u63a8\u7406\u80fd\u529b\u6392\u540d<\/u><\/strong><\/p>\n<p>\u6839\u64da\u8a55\u6e2c\u7d50\u679c\uff0c\u5728\u300c\u5967\u8cfd\u63a8\u7406\u80fd\u529b\u300d\u65b9\u9762\uff0c\u7f8e\u570b\u7684\u5927\u578b\u8a9e\u8a00\u6a21\u578b\u65bc\u6b63\u78ba\u6027\u3001\u908f\u8f2f\u9023\u8cab\u6027\u3001\u89e3\u984c\u65b9\u6cd5\u5275\u65b0\u6027\u7b49\u591a\u500b\u7dad\u5ea6\uff0c\u5747\u5448\u73fe\u51fa\u5168\u9762\u7684\u9818\u5148\u614b\u52e2\u3002<\/p>\n<p>\u8655\u65bc\u9f8d\u982d\u4f4d\u7f6e\u7684\u662fGPT-5\uff08\u601d\u8003\u6a21\u5f0f\uff09\u548cGemini 2.5 Pro\uff0c\u5b83\u5011\u4ee5\u7d55\u5c0d\u512a\u52e2\u9059\u9059\u9818\u5148\uff0c\u8207\u5176\u4ed6\u6a21\u578b\u5f62\u6210\u5de8\u5927\u5dee\u8ddd\u3002\u7dca\u96a8\u5176\u5f8c\u69cb\u6210\u7b2c\u4e8c\u68af\u968a\u7684\uff0c\u5247\u662fGPT-o3\u53caClaude 4 Opus\uff08\u601d\u8003\u6a21\u5f0f\uff09\uff0c\u5206\u5225\u4f4d\u5217\u7b2c\u4e09\u53ca\u7b2c\u56db\u540d\u3002\u81f3\u65bc\u570b\u7522\u6a21\u578b\u7684\u8868\u73fe\uff0c\u7576\u4e2d\u50c5\u6709\u300c\u901a\u7fa9\u5343\u554f3\u300d\uff08\u601d\u8003\u6a21\u5f0f\uff09\u548cStep R1_V_mini\u8868\u73fe\u5c1a\u53ef\uff0c\u53cd\u6620\u570b\u7522\u6a21\u578b\u5728\u8655\u7406\u9802\u5c16\u8907\u96dc\u63a8\u7406\u4efb\u52d9\u4e0a\uff0c\u4ecd\u6709\u76f8\u7576\u5927\u7684\u9032\u6b65\u7a7a\u9593\u3002<\/p>\n<p>\u6b64\u5916\uff0c\u5373\u4f7f\u662f\u540c\u4e00\u9593\u516c\u53f8\u65d7\u4e0b\u7684\u6a21\u578b\uff0c\u601d\u8003\u6a21\u5f0f\u4e0b\u7684\u6a21\u578b\u5728\u5404\u9805\u5967\u8cfd\u63a8\u7406\u7dad\u5ea6\u4e0a\u7684\u8868\u73fe\uff0c\u666e\u904d\u512a\u65bc\u5176\u901a\u7528\u6a21\u578b\u3002<\/p>\n<table width=\"568\">\n<tbody>\n<tr>\n<td width=\"48\"><strong>\u6392\u540d<\/strong><\/td>\n<td width=\"208\"><strong>\u6a21\u578b\u540d\u79f0<\/strong><\/td>\n<td width=\"68\"><strong>\u6b63\u786e\u6027<\/strong><\/td>\n<td width=\"80\"><strong>\u903b\u8f91\u8fde\u8d2f\u6027<\/strong><\/td>\n<td width=\"77\"><strong>\u65b9\u6cd5\u521b\u65b0\u6027<\/strong><\/td>\n<td width=\"87\"><strong>\u5965\u8d5b\u63a8\u7406\u80fd\u529b<\/strong><\/p>\n<p><strong>\u52a0\u6743\u5f97\u5206<\/strong><\/td>\n<\/tr>\n<tr>\n<td width=\"48\">1<\/td>\n<td width=\"208\">GPT-5\uff08\u601d\u8003\u6a21\u5f0f\uff09<\/td>\n<td width=\"68\">48<\/td>\n<td width=\"80\">47<\/td>\n<td width=\"77\">44<\/td>\n<td width=\"87\">48<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">2<\/td>\n<td width=\"208\">Gemini 2.5 Pro<\/td>\n<td width=\"68\">48<\/td>\n<td width=\"80\">39<\/td>\n<td width=\"77\">36<\/td>\n<td width=\"87\">44<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">3<\/td>\n<td width=\"208\">GPT-o3<\/td>\n<td width=\"68\">36<\/td>\n<td width=\"80\">42<\/td>\n<td width=\"77\">39<\/td>\n<td width=\"87\">38<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">4<\/td>\n<td width=\"208\">Claude 4 Opus\uff08\u601d\u8003\u6a21\u5f0f\uff09<\/td>\n<td width=\"68\">30<\/td>\n<td width=\"80\">36<\/td>\n<td width=\"77\">39<\/td>\n<td width=\"87\">33<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">5<\/td>\n<td width=\"208\">Gemini 2.5 Flash<\/td>\n<td width=\"68\">35<\/td>\n<td width=\"80\">28<\/td>\n<td width=\"77\">31<\/td>\n<td width=\"87\">32<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">5<\/td>\n<td width=\"208\">GPT-o4 mini<\/td>\n<td width=\"68\">32<\/td>\n<td width=\"80\">33<\/td>\n<td width=\"77\">33<\/td>\n<td width=\"87\">32<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">7<\/td>\n<td width=\"208\">\u901a\u7fa9\u5343\u554f3\uff08\u601d\u8003\u6a21\u5f0f\uff09<\/td>\n<td width=\"68\">29<\/td>\n<td width=\"80\">25<\/td>\n<td width=\"77\">28<\/td>\n<td width=\"87\">28<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">7<\/td>\n<td width=\"208\">Step R1_V_mini<\/td>\n<td width=\"68\">26<\/td>\n<td width=\"80\">33<\/td>\n<td width=\"77\">22<\/td>\n<td width=\"87\">28<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">9<\/td>\n<td width=\"208\">GLM_Z1_Air<\/td>\n<td width=\"68\">27<\/td>\n<td width=\"80\">31<\/td>\n<td width=\"77\">22<\/td>\n<td width=\"87\">27<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">9<\/td>\n<td width=\"208\">\u65e5\u65e5\u65b0 V6\u63a8\u7406<\/td>\n<td width=\"68\">27<\/td>\n<td width=\"80\">28<\/td>\n<td width=\"77\">22<\/td>\n<td width=\"87\">27<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">11<\/td>\n<td width=\"208\">\u901a\u7fa9\u5343\u554f3<\/td>\n<td width=\"68\">25<\/td>\n<td width=\"80\">31<\/td>\n<td width=\"77\">17<\/td>\n<td width=\"87\">26<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">12<\/td>\n<td width=\"208\">\u6587\u5fc3\u4e00\u8a004.5-Turbo<\/td>\n<td width=\"68\">25<\/td>\n<td width=\"80\">25<\/td>\n<td width=\"77\">19<\/td>\n<td width=\"87\">24<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">13<\/td>\n<td width=\"208\">Grok 3\uff08\u601d\u8003\u6a21\u5f0f\uff09<\/td>\n<td width=\"68\">21<\/td>\n<td width=\"80\">28<\/td>\n<td width=\"77\">25<\/td>\n<td width=\"87\">23<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">14<\/td>\n<td width=\"208\">GPT-5\uff08\u81ea\u52a8\u6a21\u5f0f\uff09<\/td>\n<td width=\"68\">22<\/td>\n<td width=\"80\">22<\/td>\n<td width=\"77\">28<\/td>\n<td width=\"87\">22<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">14<\/td>\n<td width=\"208\">DeepSeek-V3\uff08\u6df1\u5ea6\u6c42\u7d22-V3\uff09<\/td>\n<td width=\"68\">26<\/td>\n<td width=\"80\">14<\/td>\n<td width=\"77\">22<\/td>\n<td width=\"87\">22<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">16<\/td>\n<td width=\"208\">Claude 4 Opus<\/td>\n<td width=\"68\">22<\/td>\n<td width=\"80\">17<\/td>\n<td width=\"77\">31<\/td>\n<td width=\"87\">21<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">17<\/td>\n<td width=\"208\">\u8c46\u53051.5 Pro\uff08\u601d\u8003\u6a21\u5f0f\uff09<\/td>\n<td width=\"68\">22<\/td>\n<td width=\"80\">17<\/td>\n<td width=\"77\">22<\/td>\n<td width=\"87\">20<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">17<\/td>\n<td width=\"208\">DeepSeek-R1\uff08\u6df1\u5ea6\u6c42\u7d22-R1\uff09<\/td>\n<td width=\"68\">17<\/td>\n<td width=\"80\">25<\/td>\n<td width=\"77\">22<\/td>\n<td width=\"87\">20<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">19<\/td>\n<td width=\"208\">Grok 3<\/td>\n<td width=\"68\">20<\/td>\n<td width=\"80\">19<\/td>\n<td width=\"77\">17<\/td>\n<td width=\"87\">19<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">19<\/td>\n<td width=\"208\">Grok 4<\/td>\n<td width=\"68\">19<\/td>\n<td width=\"80\">17<\/td>\n<td width=\"77\">25<\/td>\n<td width=\"87\">19<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">21<\/td>\n<td width=\"208\">\u6587\u5fc3\u4e00\u8a00 X1-Turbo<\/td>\n<td width=\"68\">17<\/td>\n<td width=\"80\">19<\/td>\n<td width=\"77\">14<\/td>\n<td width=\"87\">17<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">21<\/td>\n<td width=\"208\">\u6df7\u5143-T1<\/td>\n<td width=\"68\">17<\/td>\n<td width=\"80\">17<\/td>\n<td width=\"77\">19<\/td>\n<td width=\"87\">17<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">21<\/td>\n<td width=\"208\">\u6df7\u5143-TurboS<\/td>\n<td width=\"68\">17<\/td>\n<td width=\"80\">17<\/td>\n<td width=\"77\">19<\/td>\n<td width=\"87\">17<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">21<\/td>\n<td width=\"208\">Kimi-k1.5<\/td>\n<td width=\"68\">17<\/td>\n<td width=\"80\">19<\/td>\n<td width=\"77\">11<\/td>\n<td width=\"87\">17<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">25<\/td>\n<td width=\"208\">\u8c46\u53051.5 Pro<\/td>\n<td width=\"68\">16<\/td>\n<td width=\"80\">17<\/td>\n<td width=\"77\">19<\/td>\n<td width=\"87\">16<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">26<\/td>\n<td width=\"208\">GLM-4-plus\uff08\u667a\u8b5c-4-Plus\uff09<\/td>\n<td width=\"68\">12<\/td>\n<td width=\"80\">17<\/td>\n<td width=\"77\">8<\/td>\n<td width=\"87\">13<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">27<\/td>\n<td width=\"208\">GPT-4o<\/td>\n<td width=\"68\">13<\/td>\n<td width=\"80\">8<\/td>\n<td width=\"77\">19<\/td>\n<td width=\"87\">12<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">27<\/td>\n<td width=\"208\">Spark 4.0 Ultra\uff08\u8a0a\u98db\u661f\u706b 4.0 Ultra\uff09<\/td>\n<td width=\"68\">13<\/td>\n<td width=\"80\">11<\/td>\n<td width=\"77\">14<\/td>\n<td width=\"87\">12<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">29<\/td>\n<td width=\"208\">Baichuan4-Turbo\uff08\u767e\u5ddd4-Turbo\uff09<\/td>\n<td width=\"68\">8<\/td>\n<td width=\"80\">19<\/td>\n<td width=\"77\">11<\/td>\n<td width=\"87\">11<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">29<\/td>\n<td width=\"208\">GPT-4.1<\/td>\n<td width=\"68\">11<\/td>\n<td width=\"80\">8<\/td>\n<td width=\"77\">17<\/td>\n<td width=\"87\">11<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">31<\/td>\n<td width=\"208\">Kimi<\/td>\n<td width=\"68\">6<\/td>\n<td width=\"80\">14<\/td>\n<td width=\"77\">17<\/td>\n<td width=\"87\">9<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">31<\/td>\n<td width=\"208\">Llama 3.3 70B<\/td>\n<td width=\"68\">7<\/td>\n<td width=\"80\">14<\/td>\n<td width=\"77\">6<\/td>\n<td width=\"87\">9<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">33<\/td>\n<td width=\"208\">Yi-Lightning\uff08\u96f6\u4e00-Lightning\uff09<\/td>\n<td width=\"68\">6<\/td>\n<td width=\"80\">11<\/td>\n<td width=\"77\">14<\/td>\n<td width=\"87\">8<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">33<\/td>\n<td width=\"208\">\u65e5\u65e5\u65b0 V6 Pro<\/td>\n<td width=\"68\">8<\/td>\n<td width=\"80\">8<\/td>\n<td width=\"77\">6<\/td>\n<td width=\"87\">8<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">35<\/td>\n<td width=\"208\">MiniMax-01<\/td>\n<td width=\"68\">5<\/td>\n<td width=\"80\">11<\/td>\n<td width=\"77\">8<\/td>\n<td width=\"87\">7<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">35<\/td>\n<td width=\"208\">Step2<\/td>\n<td width=\"68\">6<\/td>\n<td width=\"80\">8<\/td>\n<td width=\"77\">8<\/td>\n<td width=\"87\">7<\/td>\n<\/tr>\n<tr>\n<td width=\"48\">35<\/td>\n<td width=\"208\">360\u667a\u81662-o1<\/td>\n<td width=\"68\">7<\/td>\n<td width=\"80\">6<\/td>\n<td width=\"77\">8<\/td>\n<td width=\"87\">7<\/td>\n<\/tr>\n<tr>\n<td colspan=\"6\" width=\"568\">Note: \u6240\u6709\u5206\u6570\u5747\u4e3a\u56db\u820d\u4e94\u5165\u5f97\u5206\u7ed3\u679c\u3002<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p>&nbsp;<\/p>\n<p>\u5716\u8868 2\uff1a\u5967\u8cfd\u63a8\u7406\u80fd\u529b\u6392\u540d<\/p>\n<p>\u8acb<span><a href=\"https:\/\/www.hkubs.hku.hk\/aimodelrankings\/leaderboards\/advancedreasoningCapabilities.html\">\u6309\u6b64<\/a><\/span>\u700f\u89bd\u300aAI\u9ad8\u968e\u63a8\u7406\u80fd\u529b\u8a55\u6e2c\u5831\u544a\u300b\u5168\u6587<\/p>\n<p>\u7d9c\u5408\u4ee5\u4e0a\u6392\u540d\u53ef\u77e5\uff0c\u5728\u5730\u57df\u4e0a\uff0c\u7f8e\u570b\u6a21\u578b\u5728\u591a\u6a21\u614b\u548c\u5967\u8cfd\u63a8\u7406\u4e2d\u8868\u73fe\u7a81\u51fa\uff0c\u512a\u52e2\u660e\u986f\uff1b\u4e2d\u570b\u6a21\u578b\u4ecd\u9700\u5728\u6df1\u5c64\u8a9e\u5883\u7406\u89e3\u3001\u8907\u96dc\u63a8\u7406\u93c8\u6216\u5275\u9020\u6027\u89e3\u6c7a\u554f\u984c\u4e0a\u5f4c\u88dc\u5dee\u8ddd\u3002 \u6b64\u5916\uff0c\u63a8\u7406\u6a21\u578b\u5728\u9ad8\u968e\u63a8\u7406\u80fd\u529b\u9818\u57df\u7684\u8868\u73fe\u666e\u904d\u512a\u65bc\u901a\u7528\u6a21\u578b\u3002<\/p>\n<p>\u672a\u4f86\uff0c\u4eba\u5de5\u667a\u6167\u9700\u5728\u8de8\u6a21\u614b\u6df1\u5ea6\u878d\u5408\u3001\u6975\u7aef\u8907\u96dc\u554f\u984c\u5275\u9020\u6027\u89e3\u6c7a\u4e0a\u6301\u7e8c\u7a81\u7834\u3002 \u800c\u4e2d\u570b\u6a21\u578b\u53ef\u4f9d\u8a17\u672c\u571f\u5834\u666f\u7406\u89e3\u512a\u52e2\uff0c\u91dd\u5c0d\u6027\u88dc\u8db3\u9ad8\u968e\u63a8\u7406\u77ed\u677f\uff0c\u63a8\u52d5\u300c\u771f\u667a\u6167\u300d \u5411\u66f4\u5ee3\u95ca\u7684\u61c9\u7528\u5834\u666f\u9081\u9032\u3002<\/p>\n<p><strong><u>\u5716\u7247<\/u><\/strong><\/p>\n<p><img fetchpriority=\"high\" decoding=\"async\" class=\"alignnone wp-image-245275 size-full\" src=\"https:\/\/www.hkubs.hku.hk\/wp-content\/uploads\/2025\/10\/JackJiang01.jpg\" alt=\"\" width=\"868\" height=\"1258\" srcset=\"https:\/\/www.hkubs.hku.hk\/wp-content\/uploads\/2025\/10\/JackJiang01.jpg 868w, https:\/\/www.hkubs.hku.hk\/wp-content\/uploads\/2025\/10\/JackJiang01-207x300.jpg 207w, https:\/\/www.hkubs.hku.hk\/wp-content\/uploads\/2025\/10\/JackJiang01-707x1024.jpg 707w, https:\/\/www.hkubs.hku.hk\/wp-content\/uploads\/2025\/10\/JackJiang01-768x1113.jpg 768w\" sizes=\"(max-width: 868px) 100vw, 868px\" \/><\/p>\n<p>\u6e2f\u5927\u7d93\u7ba1\u5b78\u9662\u5275\u65b0\u53ca\u8cc7\u8a0a\u7ba1\u7406\u5b78\u6559\u6388\u517c\u590f\u5229\u840a\u4f09\u5137\u57fa\u91d1\u6559\u6388<a href=\"https:\/\/www.hkubs.hku.hk\/tc\/people\/zhenhui-jack-jiang\/\"><strong>\u8523\u93ae\u8f1d\u6559\u6388<\/strong><\/a><\/p>\n<p>\u8acb<span><a href=\"https:\/\/drive.google.com\/drive\/folders\/1AQs5bn8JVDpdqfM50EWmYbcZl5pwEQ5p?usp=sharing\">\u6309\u6b64<\/a><\/span>\u4e0b\u8f09\u9ad8\u6e05\u5716\u7247\u3002<\/p>\n","protected":false},"featured_media":0,"template":"","meta":{"_lmt_disableupdate":"no","_lmt_disable":"","footnotes":""},"class_list":["post-245284","hkubs-media","type-hkubs-media","status-publish","hentry","media-categories-press-release-tc","media-topic-categories-research-tc"],"aioseo_notices":[],"_links":{"self":[{"href":"https:\/\/www.hkubs.hku.hk\/tc\/wp-json\/wp\/v2\/hkubs-media\/245284","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.hkubs.hku.hk\/tc\/wp-json\/wp\/v2\/hkubs-media"}],"about":[{"href":"https:\/\/www.hkubs.hku.hk\/tc\/wp-json\/wp\/v2\/types\/hkubs-media"}],"version-history":[{"count":1,"href":"https:\/\/www.hkubs.hku.hk\/tc\/wp-json\/wp\/v2\/hkubs-media\/245284\/revisions"}],"predecessor-version":[{"id":245292,"href":"https:\/\/www.hkubs.hku.hk\/tc\/wp-json\/wp\/v2\/hkubs-media\/245284\/revisions\/245292"}],"wp:attachment":[{"href":"https:\/\/www.hkubs.hku.hk\/tc\/wp-json\/wp\/v2\/media?parent=245284"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}