{"id":2063,"date":"2025-11-15T12:39:51","date_gmt":"2025-11-15T04:39:51","guid":{"rendered":"https:\/\/www.loganblog.com\/?p=2063"},"modified":"2025-11-15T12:50:38","modified_gmt":"2025-11-15T04:50:38","slug":"%e5%92%a8%e8%af%a2%e5%a4%84%e7%90%86%e7%9a%84%e6%9e%b6%e6%9e%84%e8%ae%be%e8%ae%a1","status":"publish","type":"post","link":"https:\/\/www.loganblog.com\/index.php\/2025\/11\/15\/%e5%92%a8%e8%af%a2%e5%a4%84%e7%90%86%e7%9a%84%e6%9e%b6%e6%9e%84%e8%ae%be%e8%ae%a1\/","title":{"rendered":"\u901a\u7528\u5185\u5bb9\u805a\u5408\u7cfb\u7edf\u8bbe\u8ba1"},"content":{"rendered":"\n<figure class=\"wp-block-table\"><table class=\"has-fixed-layout\"><thead><tr><th>\u76ee\u5f55\u540d<\/th><th>\u6027\u8d28<\/th><th>\u6838\u5fc3\u804c\u8d23<\/th><th>\u5305\u542b\u4ec0\u4e48<\/th><th>\u63d0\u4ea4Git?<\/th><th>\u8c01\u4f1a\u4fee\u6539<\/th><\/tr><\/thead><tbody><tr><td><strong>core\/<\/strong><\/td><td>\u4ee3\u7801-\u6846\u67b6\u5c42<\/td><td>\u63d0\u4f9b\u5e95\u5c42\u80fd\u529b\uff0c\u652f\u6491\u6574\u4e2a\u7cfb\u7edf\u8fd0\u8f6c<\/td><td>workflow\u3001context\u3001llm\u3001storage\u3001cache\u3001models<\/td><td>\u2705 \u662f<\/td><td>\u67b6\u6784\u5e08\/\u6846\u67b6\u5f00\u53d1\u8005<\/td><\/tr><tr><td><strong>plugins\/<\/strong><\/td><td>\u4ee3\u7801-\u4e1a\u52a1\u5c42<\/td><td>\u63d0\u4f9b\u53ef\u63d2\u62d4\u7684\u4e1a\u52a1\u529f\u80fd\u6269\u5c55<\/td><td>sources\u3001processors\u3001outputs\u4e09\u7c7b\u63d2\u4ef6<\/td><td>\u2705 \u662f<\/td><td>\u4e1a\u52a1\u5f00\u53d1\u8005(\u6700\u5e38\u6539)<\/td><\/tr><tr><td><strong>quality\/<\/strong><\/td><td>\u4ee3\u7801-\u4e1a\u52a1\u5c42<\/td><td>\u8d28\u91cf\u4fdd\u8bc1\u548c\u4f18\u5316\u5efa\u8bae\u7cfb\u7edf<\/td><td>evaluator\u3001rules\u3001optimizer<\/td><td>\u2705 \u662f<\/td><td>\u4e1a\u52a1\u5f00\u53d1\u8005<\/td><\/tr><tr><td><strong>config\/<\/strong><\/td><td>\u914d\u7f6e\u6587\u4ef6<\/td><td>\u5b9a\u4e49\u7cfb\u7edf\u884c\u4e3a\u548c\u4e1a\u52a1\u89c4\u5219<\/td><td>workflows\u914d\u7f6e\u3001\u6570\u636e\u6e90\u914d\u7f6e\u3001prompt\u6a21\u677f<\/td><td>\u2705 \u662f<\/td><td>\u8fd0\u8425\u4eba\u5458\/\u4e1a\u52a1\u5f00\u53d1\u8005<\/td><\/tr><tr><td><strong>data\/<\/strong><\/td><td>\u8fd0\u884c\u65f6\u6570\u636e<\/td><td>\u5b58\u50a8\u7a0b\u5e8f\u8fd0\u884c\u4ea7\u751f\u7684\u6240\u6709\u6570\u636e<\/td><td>raw\u3001processed\u3001published\u3001cache<\/td><td>\u274c \u5426<\/td><td>\u7a0b\u5e8f\u81ea\u52a8\u751f\u6210<\/td><\/tr><tr><td><strong>scripts\/<\/strong><\/td><td>\u5de5\u5177\u811a\u672c<\/td><td>\u8f85\u52a9\u5f00\u53d1\u548c\u8fd0\u7ef4\u7684\u4fbf\u6377\u5de5\u5177<\/td><td>\u8fd0\u884c\u811a\u672c\u3001\u9a8c\u8bc1\u5de5\u5177<\/td><td>\u2705 \u662f<\/td><td>\u5f00\u53d1\u8005<\/td><\/tr><tr><td><strong>tests\/<\/strong><\/td><td>\u6d4b\u8bd5\u4ee3\u7801<\/td><td>\u4fdd\u8bc1\u4ee3\u7801\u8d28\u91cf\u7684\u5355\u5143\u6d4b\u8bd5\u548c\u96c6\u6210\u6d4b\u8bd5<\/td><td>\u5404\u79cdtest_*.py\u6587\u4ef6<\/td><td>\u2705 \u662f<\/td><td>\u5f00\u53d1\u8005<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<h1 class=\"wp-block-heading\">\u901a\u7528\u5185\u5bb9\u805a\u5408\u7cfb\u7edf &#8211; \u8be6\u7ec6\u7cfb\u7edf\u8bbe\u8ba1\u6587\u6863<\/h1>\n\n\n\n<p><strong>\u9879\u76ee\u540d\u79f0<\/strong>: Universal Content Pipeline<br><strong>\u7248\u672c<\/strong>: v1.0<br><strong>\u521b\u5efa\u65e5\u671f<\/strong>: 2024-01-15<br><strong>\u76ee\u6807<\/strong>: \u5f00\u53d1\u4e00\u4e2a\u901a\u7528\u3001\u4f4e\u8026\u5408\u7684Python\u6846\u67b6\uff0c\u5b9e\u73b0\u667a\u80fd\u5185\u5bb9\u805a\u5408\u3001\u8d28\u91cf\u8bc4\u4f30\u548c\u81ea\u52a8\u53d1\u5e03<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e00\u3001\u9879\u76ee\u80cc\u666f\u4e0e\u76ee\u6807<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">1.1 \u4e1a\u52a1\u80cc\u666f<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u75db\u70b9<\/strong>: \u73b0\u6709\u7f51\u7ad9\u5185\u5bb9\u540c\u8d28\u5316\u4e25\u91cd\uff0c\u7528\u6237\u7559\u5b58\u7387\u4f4e<\/li>\n\n\n\n<li><strong>\u673a\u4f1a<\/strong>: \u4e2d\u56fd\u5c31\u4e1a\u73af\u5883\u4e25\u5cfb\uff0c\u526f\u4e1a\u9700\u6c42\u65fa\u76db<\/li>\n\n\n\n<li><strong>\u65b9\u5411<\/strong>: \u4ece\u6d77\u91cf\u4fe1\u606f\u4e2d\u7b5b\u9009\u51fa\u6700\u7cbe\u534e\u7684\u526f\u4e1a\u76f8\u5173\u5185\u5bb9\uff0c\u6bcf\u5929\u63a8\u835010\u7bc7\u6587\u7ae0<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">1.2 \u6838\u5fc3\u76ee\u6807<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u9ad8\u8d28\u91cf\u5185\u5bb9<\/strong>: \u901a\u8fc7AI\u8bc4\u4f30\u786e\u4fdd\u6bcf\u7bc7\u6587\u7ae0\u8d28\u91cf\u8fbe\u6807\uff08\u22657.0\u5206\uff09<\/li>\n\n\n\n<li><strong>\u81ea\u52a8\u5316\u8fd0\u884c<\/strong>: \u652f\u6301\u672c\u5730\u548cGitHub Action\u81ea\u52a8\u6267\u884c<\/li>\n\n\n\n<li><strong>\u53ef\u6269\u5c55\u6027<\/strong>: \u8f7b\u677e\u6dfb\u52a0\u65b0\u7684\u6570\u636e\u6e90\u3001\u5904\u7406\u5668\u3001\u8f93\u51fa\u683c\u5f0f<\/li>\n\n\n\n<li><strong>\u667a\u80fd\u91cd\u8bd5<\/strong>: \u8d28\u91cf\u4e0d\u8fbe\u6807\u65f6\u81ea\u52a8\u4f18\u5316\u91cd\u8bd5\uff0c\u6700\u591a3\u6b21<\/li>\n\n\n\n<li><strong>\u9ad8\u6027\u80fd<\/strong>: \u652f\u6301100+\u6570\u636e\u6e90\u5e76\u53d1\u6293\u53d6\uff0c500+\u6587\u7ae0\u6279\u91cf\u5904\u7406<\/li>\n<\/ol>\n\n\n\n<h3 class=\"wp-block-heading\">1.3 \u6280\u672f\u8981\u6c42<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li>Python 3.10+<\/li>\n\n\n\n<li>\u6a21\u5757\u5316\u3001\u4f4e\u8026\u5408\u3001\u9ad8\u5185\u805a<\/li>\n\n\n\n<li>\u652f\u6301\u914d\u7f6e\u9a71\u52a8\uff0c\u65e0\u9700\u6539\u4ee3\u7801\u5373\u53ef\u8c03\u6574\u884c\u4e3a<\/li>\n\n\n\n<li>\u5b8c\u6574\u7684\u9519\u8bef\u5904\u7406\u548c\u65e5\u5fd7\u8bb0\u5f55<\/li>\n<\/ul>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e8c\u3001\u7cfb\u7edf\u67b6\u6784\u8bbe\u8ba1<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">2.1 \u6574\u4f53\u67b6\u6784\u56fe<\/h3>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\n\u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502                         \u7528\u6237\u5c42                               \u2502\n\u2502  python main.py run daily  \u2502  GitHub Action \u5b9a\u65f6\u89e6\u53d1        \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n                     \u2193\n\u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502                      \u5de5\u4f5c\u6d41\u7f16\u6392\u5c42                             \u2502\n\u2502              core\/workflow.py (WorkflowEngine)              \u2502\n\u2502  - \u8bfb\u53d6\u914d\u7f6e \u2192 \u7f16\u6392\u6267\u884c \u2192 \u91cd\u8bd5\u5faa\u73af \u2192 \u7ed3\u679c\u8f93\u51fa                 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n                     \u2193\n        \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n        \u2193                         \u2193\n\u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510          \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502  \u6570\u636e\u91c7\u96c6\u5c42   \u2502          \u2502  \u8d28\u91cf\u4fdd\u8bc1\u5c42   \u2502\n\u2502  plugins\/    \u2502          \u2502  quality\/    \u2502\n\u2502  sources\/    \u2502          \u2502  evaluator   \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518          \u2514\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n       \u2193                         \u2193\n\u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510          \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502  \u5185\u5bb9\u5904\u7406\u5c42   \u2502          \u2502  \u4f18\u5316\u5efa\u8bae\u5c42   \u2502\n\u2502  plugins\/    \u2502          \u2502  quality\/    \u2502\n\u2502  processors\/ \u2502          \u2502  optimizer   \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518          \u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n       \u2193\n\u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502            \u8f93\u51fa\u53d1\u5e03\u5c42                    \u2502\n\u2502         plugins\/outputs\/                \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n       \u2193\n\u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502            \u5b58\u50a8\u5c42                        \u2502\n\u2502    data\/ (raw\/processed\/published)      \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n\n<\/pre><\/div>\n\n\n<h3 class=\"wp-block-heading\">2.2 \u76ee\u5f55\u7ed3\u6784\u8bbe\u8ba1<\/h3>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\nuniversal-content-pipeline\/\n\u251c\u2500\u2500 core\/                          # \u6838\u5fc3\u6846\u67b6\u5c42\n\u2502   \u251c\u2500\u2500 __init__.py\n\u2502   \u251c\u2500\u2500 workflow.py               # \u5de5\u4f5c\u6d41\u7f16\u6392\u5f15\u64ce\n\u2502   \u251c\u2500\u2500 context.py                # \u6267\u884c\u4e0a\u4e0b\u6587\n\u2502   \u251c\u2500\u2500 llm.py                    # LLM\u5ba2\u6237\u7aef\u5c01\u88c5\n\u2502   \u251c\u2500\u2500 storage.py                # \u6570\u636e\u6301\u4e45\u5316\n\u2502   \u251c\u2500\u2500 cache.py                  # \u7f13\u5b58\u7ba1\u7406\n\u2502   \u251c\u2500\u2500 models.py                 # \u9886\u57df\u6a21\u578b\u5b9a\u4e49\n\u2502   \u2514\u2500\u2500 exceptions.py             # \u5f02\u5e38\u5b9a\u4e49\n\u2502\n\u251c\u2500\u2500 plugins\/                       # \u63d2\u4ef6\u5c42\n\u2502   \u251c\u2500\u2500 sources\/                  # \u6570\u636e\u6e90\u63d2\u4ef6\n\u2502   \u2502   \u251c\u2500\u2500 __init__.py\n\u2502   \u2502   \u251c\u2500\u2500 base.py               # \u6570\u636e\u6e90\u57fa\u7c7b\n\u2502   \u2502   \u251c\u2500\u2500 github.py             # GitHub Trending\n\u2502   \u2502   \u251c\u2500\u2500 producthunt.py        # Product Hunt\n\u2502   \u2502   \u251c\u2500\u2500 hackernews.py         # Hacker News\n\u2502   \u2502   \u2514\u2500\u2500 rss.py                # RSS\u805a\u5408\n\u2502   \u251c\u2500\u2500 processors\/               # \u5904\u7406\u5668\u63d2\u4ef6\n\u2502   \u2502   \u251c\u2500\u2500 __init__.py\n\u2502   \u2502   \u251c\u2500\u2500 base.py               # \u5904\u7406\u5668\u57fa\u7c7b\n\u2502   \u2502   \u251c\u2500\u2500 dedup.py              # \u53bb\u91cd\u5904\u7406\u5668\n\u2502   \u2502   \u251c\u2500\u2500 translate.py          # \u7ffb\u8bd1\u5904\u7406\u5668\n\u2502   \u2502   \u2514\u2500\u2500 summarize.py          # \u6458\u8981\u751f\u6210\u5668\n\u2502   \u2514\u2500\u2500 outputs\/                  # \u8f93\u51fa\u63d2\u4ef6\n\u2502       \u251c\u2500\u2500 __init__.py\n\u2502       \u251c\u2500\u2500 base.py               # \u8f93\u51fa\u57fa\u7c7b\n\u2502       \u251c\u2500\u2500 vitepress.py          # VitePress\u8f93\u51fa\n\u2502       \u2514\u2500\u2500 json.py               # JSON\u8f93\u51fa\n\u2502\n\u251c\u2500\u2500 quality\/                       # \u8d28\u91cf\u4fdd\u8bc1\u5c42\n\u2502   \u251c\u2500\u2500 __init__.py\n\u2502   \u251c\u2500\u2500 evaluator.py              # \u8d28\u91cf\u8bc4\u4f30\u5668\n\u2502   \u251c\u2500\u2500 rules.py                  # \u8d28\u91cf\u89c4\u5219\u96c6\u5408\n\u2502   \u2514\u2500\u2500 optimizer.py              # \u4f18\u5316\u5efa\u8bae\u751f\u6210\u5668\n\u2502\n\u251c\u2500\u2500 config\/                        # \u914d\u7f6e\u5c42\n\u2502   \u251c\u2500\u2500 workflows\/                # \u5de5\u4f5c\u6d41\u914d\u7f6e\n\u2502   \u2502   \u2514\u2500\u2500 daily.yaml\n\u2502   \u251c\u2500\u2500 sources.yaml              # \u6570\u636e\u6e90\u914d\u7f6e\n\u2502   \u251c\u2500\u2500 quality.yaml              # \u8d28\u91cf\u89c4\u5219\u914d\u7f6e\n\u2502   \u2514\u2500\u2500 prompts\/                  # Prompt\u6a21\u677f\n\u2502       \u251c\u2500\u2500 judge.txt\n\u2502       \u251c\u2500\u2500 summarize.txt\n\u2502       \u2514\u2500\u2500 translate.txt\n\u2502\n\u251c\u2500\u2500 data\/                          # \u6570\u636e\u5c42\uff08\u4e0d\u63d0\u4ea4Git\uff09\n\u2502   \u251c\u2500\u2500 raw\/                      # \u539f\u59cb\u6570\u636e\n\u2502   \u251c\u2500\u2500 processed\/                # \u5904\u7406\u540e\u6570\u636e\n\u2502   \u251c\u2500\u2500 published\/                # \u53d1\u5e03\u5185\u5bb9\n\u2502   \u2514\u2500\u2500 cache\/                    # \u7f13\u5b58\u6570\u636e\n\u2502\n\u251c\u2500\u2500 scripts\/                       # \u5de5\u5177\u811a\u672c\n\u2502   \u251c\u2500\u2500 run.py                    # \u672c\u5730\u8fd0\u884c\u811a\u672c\n\u2502   \u2514\u2500\u2500 validate.py               # \u914d\u7f6e\u9a8c\u8bc1\u811a\u672c\n\u2502\n\u251c\u2500\u2500 tests\/                         # \u6d4b\u8bd5\n\u2502   \u251c\u2500\u2500 test_core.py\n\u2502   \u251c\u2500\u2500 test_plugins.py\n\u2502   \u2514\u2500\u2500 test_quality.py\n\u2502\n\u251c\u2500\u2500 .github\/\n\u2502   \u2514\u2500\u2500 workflows\/\n\u2502       \u2514\u2500\u2500 daily.yml             # GitHub Action\u914d\u7f6e\n\u2502\n\u251c\u2500\u2500 main.py                        # \u547d\u4ee4\u884c\u5165\u53e3\n\u251c\u2500\u2500 pyproject.toml                 # \u9879\u76ee\u914d\u7f6e\n\u251c\u2500\u2500 .env.example                   # \u73af\u5883\u53d8\u91cf\u6a21\u677f\n\u251c\u2500\u2500 .gitignore\n\u2514\u2500\u2500 README.md\n\n<\/pre><\/div>\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e09\u3001\u6838\u5fc3\u6a21\u5757\u8be6\u7ec6\u8bbe\u8ba1<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">3.1 core\/models.py &#8211; \u9886\u57df\u6a21\u578b<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">3.1.1 Article \u6587\u7ae0\u6a21\u578b<\/h4>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: python; title: ; notranslate\" title=\"\">\nfrom dataclasses import dataclass, field\nfrom datetime import datetime\nfrom typing import List, Dict, Optional\n\n@dataclass\nclass Article:\n    &quot;&quot;&quot;\u6587\u7ae0\u9886\u57df\u6a21\u578b - \u6240\u6709\u6a21\u5757\u64cd\u4f5c\u7684\u7edf\u4e00\u6570\u636e\u7ed3\u6784&quot;&quot;&quot;\n    \n    # \u57fa\u7840\u5b57\u6bb5\n    id: str                              # \u552f\u4e00\u6807\u8bc6\uff08\u683c\u5f0f: source_\u539f\u59cbid\uff09\n    source: str                          # \u6765\u6e90\u6807\u8bc6\uff08\u5982 'github', 'producthunt'\uff09\n    raw_data: dict                       # \u539f\u59cb\u6570\u636e\uff08\u4fdd\u7559\u7528\u4e8e\u8c03\u8bd5\uff09\n    \n    # \u5185\u5bb9\u5b57\u6bb5\n    title: str                           # \u6807\u9898\n    content: str                         # \u6b63\u6587\u5185\u5bb9\n    url: str                             # \u539f\u6587\u94fe\u63a5\n    published_at: datetime               # \u53d1\u5e03\u65f6\u95f4\n    \n    # \u5143\u6570\u636e\n    keywords: List&#x5B;str] = field(default_factory=list)  # \u5173\u952e\u8bcd\n    metadata: Dict = field(default_factory=dict)       # \u5176\u4ed6\u5143\u6570\u636e\n    \n    # \u5904\u7406\u72b6\u6001\n    quality_score: Optional&#x5B;'QualityScore'] = None     # \u8d28\u91cf\u8bc4\u5206\n    processing_history: List&#x5B;Dict] = field(default_factory=list)  # \u5904\u7406\u5386\u53f2\n    \n    def add_processing_record(self, processor_name: str, changes: Dict):\n        &quot;&quot;&quot;\u8bb0\u5f55\u5904\u7406\u5386\u53f2&quot;&quot;&quot;\n        self.processing_history.append({\n            'processor': processor_name,\n            'timestamp': datetime.now().isoformat(),\n            'changes': changes\n        })\n\n<\/pre><\/div>\n\n\n<h4 class=\"wp-block-heading\">3.1.2 QualityScore \u8d28\u91cf\u8bc4\u5206\u6a21\u578b<\/h4>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: python; title: ; notranslate\" title=\"\">\n@dataclass\nclass QualityScore:\n    &quot;&quot;&quot;\u8d28\u91cf\u8bc4\u5206\u6a21\u578b&quot;&quot;&quot;\n    \n    total: float                         # \u603b\u5206\uff080-10\uff09\n    dimensions: Dict&#x5B;str, float]         # \u5404\u7ef4\u5ea6\u5f97\u5206\n    feedback: List&#x5B;str]                  # \u8bc4\u5206\u53cd\u9988\n    passed: bool                         # \u662f\u5426\u901a\u8fc7\u9608\u503c\n    \n    # \u793a\u4f8b\uff1a\n    # {\n    #   'total': 7.5,\n    #   'dimensions': {'length': 8.0, 'freshness': 9.0, 'relevance': 7.0, 'llm_quality': 6.0},\n    #   'feedback': &#x5B;'\u76f8\u5173\u6027\u8f83\u597d', 'LLM\u5224\u65ad\u8d28\u91cf\u7565\u4f4e'],\n    #   'passed': True\n    # }\n\n<\/pre><\/div>\n\n\n<h4 class=\"wp-block-heading\">3.1.3 Context \u6267\u884c\u4e0a\u4e0b\u6587<\/h4>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: python; title: ; notranslate\" title=\"\">\n@dataclass\nclass RetryState:\n    &quot;&quot;&quot;\u91cd\u8bd5\u72b6\u6001&quot;&quot;&quot;\n    attempt: int = 0                     # \u5f53\u524d\u7b2c\u51e0\u6b21\u5c1d\u8bd5\uff080\u5f00\u59cb\uff09\n    max_attempts: int = 3                # \u6700\u5927\u91cd\u8bd5\u6b21\u6570\n    history: List&#x5B;Dict] = field(default_factory=list)  # \u5386\u53f2\u8bb0\u5f55\n    \n    def can_retry(self) -&gt; bool:\n        return self.attempt &lt; self.max_attempts\n    \n    def record_attempt(self, avg_score: float, reason: str, hints: Dict):\n        &quot;&quot;&quot;\u8bb0\u5f55\u672c\u6b21\u5c1d\u8bd5&quot;&quot;&quot;\n        self.history.append({\n            'attempt': self.attempt,\n            'avg_score': avg_score,\n            'reason': reason,\n            'hints': hints,\n            'timestamp': datetime.now().isoformat()\n        })\n        self.attempt += 1\n\n\n@dataclass\nclass Context:\n    &quot;&quot;&quot;\u5de5\u4f5c\u6d41\u6267\u884c\u4e0a\u4e0b\u6587 - \u643a\u5e26\u6240\u6709\u8fd0\u884c\u65f6\u72b6\u6001&quot;&quot;&quot;\n    \n    # \u57fa\u7840\u4fe1\u606f\n    workflow_name: str                   # \u5de5\u4f5c\u6d41\u540d\u79f0\n    run_date: str                        # \u8fd0\u884c\u65e5\u671f\uff08YYYY-MM-DD\uff09\n    output_dir: str                      # \u8f93\u51fa\u76ee\u5f55\n    \n    # \u91cd\u8bd5\u76f8\u5173\n    retry_state: RetryState = field(default_factory=RetryState)\n    optimization_hints: Dict = field(default_factory=dict)  # \u4f18\u5316\u5efa\u8bae\n    \n    # \u914d\u7f6e\n    config: Dict = field(default_factory=dict)  # \u5de5\u4f5c\u6d41\u914d\u7f6e\n    \n    def should_retry(self, avg_score: float) -&gt; bool:\n        &quot;&quot;&quot;\u5224\u65ad\u662f\u5426\u9700\u8981\u91cd\u8bd5&quot;&quot;&quot;\n        threshold = self.config.get('quality_threshold', 7.0)\n        \n        if avg_score &gt;= threshold:\n            return False  # \u8fbe\u6807\u4e0d\u9700\u8981\u91cd\u8bd5\n        \n        if not self.retry_state.can_retry():\n            return False  # \u65e0\u91cd\u8bd5\u673a\u4f1a\n        \n        return True\n\n<\/pre><\/div>\n\n\n<h4 class=\"wp-block-heading\">3.1.4 OptimizationHints \u4f18\u5316\u5efa\u8bae\u6a21\u578b<\/h4>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: python; title: ; notranslate\" title=\"\">\n@dataclass\nclass OptimizationHints:\n    &quot;&quot;&quot;\u4f18\u5316\u5efa\u8bae\u6a21\u578b - \u4f20\u9012\u7ed9processors&quot;&quot;&quot;\n    \n    should_retry: bool                   # \u662f\u5426\u5efa\u8bae\u91cd\u8bd5\n    failed_dimensions: List&#x5B;str]         # \u5931\u8d25\u7684\u7ef4\u5ea6\n    processor_hints: Dict&#x5B;str, Dict]     # \u7ed9\u5404processor\u7684\u5efa\u8bae\n    \n    # \u793a\u4f8b\uff1a\n    # {\n    #   'should_retry': True,\n    #   'failed_dimensions': &#x5B;'relevance', 'llm_quality'],\n    #   'processor_hints': {\n    #       'summarize': {\n    #           'issue': '\u76f8\u5173\u6027\u4e0d\u8db3',\n    #           'suggestion': '\u5f3a\u8c03\u526f\u4e1a\u76f8\u5173\u6027',\n    #           'emphasize_keywords': &#x5B;'\u526f\u4e1a', '\u72ec\u7acb\u5f00\u53d1', '\u521b\u4e1a']\n    #       },\n    #       'translate': {\n    #           'suggestion': '\u4f7f\u7528\u66f4\u51c6\u786e\u7684\u7ffb\u8bd1'\n    #       }\n    #   }\n    # }\n\n<\/pre><\/div>\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">3.2 core\/workflow.py &#8211; \u5de5\u4f5c\u6d41\u7f16\u6392\u5f15\u64ce<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">3.2.1 WorkflowEngine \u7c7b\u8bbe\u8ba1<\/h4>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: python; title: ; notranslate\" title=\"\">\nfrom concurrent.futures import ThreadPoolExecutor, as_completed\nfrom typing import List, Dict\nimport yaml\nimport logging\n\nclass WorkflowEngine:\n    &quot;&quot;&quot;\u5de5\u4f5c\u6d41\u7f16\u6392\u5f15\u64ce - \u6838\u5fc3\u6267\u884c\u5668&quot;&quot;&quot;\n    \n    def __init__(self, config_path: str, context: Context):\n        &quot;&quot;&quot;\n        \u521d\u59cb\u5316\u5de5\u4f5c\u6d41\u5f15\u64ce\n        \n        Args:\n            config_path: \u5de5\u4f5c\u6d41\u914d\u7f6e\u6587\u4ef6\u8def\u5f84\uff08\u5982 config\/workflows\/daily.yaml\uff09\n            context: \u6267\u884c\u4e0a\u4e0b\u6587\n        &quot;&quot;&quot;\n        self.config = self._load_config(config_path)\n        self.context = context\n        self.context.config = self.config\n        self.logger = logging.getLogger(__name__)\n        \n    def run(self) -&gt; 'WorkflowResult':\n        &quot;&quot;&quot;\n        \u6267\u884c\u5de5\u4f5c\u6d41 - \u652f\u6301\u91cd\u8bd5\u5faa\u73af\n        \n        Returns:\n            WorkflowResult: \u5305\u542b\u6700\u7ec8\u6587\u7ae0\u5217\u8868\u548c\u6267\u884c\u7edf\u8ba1\n        &quot;&quot;&quot;\n        self.logger.info(f&quot;\ud83d\ude80 \u5f00\u59cb\u6267\u884c\u5de5\u4f5c\u6d41: {self.context.workflow_name}&quot;)\n        \n        # \u91cd\u8bd5\u5faa\u73af\n        while True:\n            attempt = self.context.retry_state.attempt + 1\n            self.logger.info(f&quot;{'='*60}&quot;)\n            self.logger.info(f&quot;\ud83d\udd04 \u7b2c {attempt} \u6b21\u5c1d\u8bd5&quot;)\n            self.logger.info(f&quot;{'='*60}&quot;)\n            \n            # \u6267\u884c\u5b8c\u6574\u6d41\u7a0b\n            articles = self._execute_pipeline()\n            \n            # \u8ba1\u7b97\u5e73\u5747\u5206\n            avg_score = self._calculate_avg_score(articles)\n            threshold = self.config.get('quality_threshold', 7.0)\n            \n            self.logger.info(f&quot;\\n\ud83d\udcca \u672c\u8f6e\u7ed3\u679c: \u6587\u7ae0={len(articles)}, \u5e73\u5747\u5206={avg_score:.2f}, \u9608\u503c={threshold}&quot;)\n            \n            # \u5224\u65ad\u662f\u5426\u9700\u8981\u91cd\u8bd5\n            if not self.context.should_retry(avg_score):\n                if avg_score &gt;= threshold:\n                    self.logger.info(&quot;\u2705 \u8d28\u91cf\u8fbe\u6807\uff0c\u6d41\u7a0b\u7ed3\u675f\uff01&quot;)\n                else:\n                    self.logger.warning(&quot;\u26a0\ufe0f \u8d28\u91cf\u672a\u8fbe\u6807\u4f46\u65e0\u91cd\u8bd5\u673a\u4f1a\uff0c\u4f7f\u7528\u5f53\u524d\u7ed3\u679c&quot;)\n                \n                # \u8f93\u51fa\u6700\u7ec8\u7ed3\u679c\n                self._output_results(articles)\n                return WorkflowResult(articles=articles, stats=self._get_stats())\n            \n            # \u9700\u8981\u91cd\u8bd5\uff1a\u751f\u6210\u4f18\u5316\u5efa\u8bae\n            self.logger.info(&quot;\u274c \u8d28\u91cf\u672a\u8fbe\u6807\uff0c\u51c6\u5907\u91cd\u8bd5...&quot;)\n            hints = self._generate_optimization_hints(articles)\n            self.context.optimization_hints = hints.processor_hints\n            self.context.retry_state.record_attempt(avg_score, &quot;\u8d28\u91cf\u4e0d\u8fbe\u6807&quot;, hints.processor_hints)\n    \n    def _execute_pipeline(self) -&gt; List&#x5B;Article]:\n        &quot;&quot;&quot;\u6267\u884c\u5b8c\u6574\u6d41\u7a0b&quot;&quot;&quot;\n        # \u9636\u6bb51: \u6570\u636e\u91c7\u96c6\n        self.logger.info(&quot;\\n\ud83d\udce5 &#x5B;\u9636\u6bb51] \u6570\u636e\u91c7\u96c6...&quot;)\n        raw_articles = self._run_sources()\n        \n        # \u9636\u6bb52: \u5185\u5bb9\u5904\u7406\n        self.logger.info(&quot;\\n\ud83d\udd27 &#x5B;\u9636\u6bb52] \u5185\u5bb9\u5904\u7406...&quot;)\n        processed_articles = self._run_processors(raw_articles)\n        \n        # \u9636\u6bb53: \u8d28\u91cf\u8bc4\u4f30\n        self.logger.info(&quot;\\n\u2b50 &#x5B;\u9636\u6bb53] \u8d28\u91cf\u8bc4\u4f30...&quot;)\n        scored_articles = self._run_quality_evaluation(processed_articles)\n        \n        # \u9636\u6bb54: \u7b5b\u9009Top N\n        self.logger.info(&quot;\\n\ud83c\udfaf &#x5B;\u9636\u6bb54] \u7b5b\u9009Top\u6587\u7ae0...&quot;)\n        top_articles = self._select_top_n(scored_articles)\n        \n        return top_articles\n    \n    def _run_sources(self) -&gt; List&#x5B;Article]:\n        &quot;&quot;&quot;\n        \u5e76\u53d1\u6267\u884c\u6240\u6709\u6570\u636e\u6e90\n        \n        Returns:\n            \u6240\u6709\u6570\u636e\u6e90\u6293\u53d6\u7684\u6587\u7ae0\u5217\u8868\n        &quot;&quot;&quot;\n        sources_config = self.config&#x5B;'stages']&#x5B;0]&#x5B;'sources']\n        max_workers = self.config.get('concurrency', {}).get('sources', 10)\n        \n        all_articles = &#x5B;]\n        source_instances = self._create_source_instances(sources_config)\n        \n        # \u5e76\u53d1\u6267\u884c\n        with ThreadPoolExecutor(max_workers=max_workers) as executor:\n            future_to_source = {\n                executor.submit(source.fetch, self.context): source \n                for source in source_instances\n            }\n            \n            for future in as_completed(future_to_source):\n                source = future_to_source&#x5B;future]\n                try:\n                    articles = future.result(timeout=30)  # 30\u79d2\u8d85\u65f6\n                    all_articles.extend(articles)\n                    self.logger.info(f&quot;  \u2713 {source.name}: {len(articles)} \u6761&quot;)\n                except Exception as e:\n                    self.logger.error(f&quot;  \u2717 {source.name} \u5931\u8d25: {e}&quot;)\n        \n        # \u4fdd\u5b58\u539f\u59cb\u6570\u636e\n        self._save_raw_data(all_articles)\n        \n        self.logger.info(f&quot;  \u603b\u8ba1: {len(all_articles)} \u6761&quot;)\n        return all_articles\n    \n    def _run_processors(self, articles: List&#x5B;Article]) -&gt; List&#x5B;Article]:\n        &quot;&quot;&quot;\n        \u4f9d\u6b21\u6267\u884c\u5904\u7406\u5668\u94fe\n        \n        Args:\n            articles: \u8f93\u5165\u6587\u7ae0\u5217\u8868\n            \n        Returns:\n            \u5904\u7406\u540e\u7684\u6587\u7ae0\u5217\u8868\n        &quot;&quot;&quot;\n        processors_config = self.config&#x5B;'stages']&#x5B;1]&#x5B;'processors']\n        processor_instances = self._create_processor_instances(processors_config)\n        \n        for processor in processor_instances:\n            self.logger.info(f&quot;  \u6267\u884c: {processor.name}&quot;)\n            articles = processor.process(articles, self.context)\n            self.logger.info(f&quot;    \u5269\u4f59: {len(articles)} \u6761&quot;)\n        \n        # \u4fdd\u5b58\u5904\u7406\u540e\u6570\u636e\n        self._save_processed_data(articles)\n        \n        return articles\n    \n    def _run_quality_evaluation(self, articles: List&#x5B;Article]) -&gt; List&#x5B;Article]:\n        &quot;&quot;&quot;\n        \u8d28\u91cf\u8bc4\u4f30\n        \n        Args:\n            articles: \u5f85\u8bc4\u4f30\u6587\u7ae0\u5217\u8868\n            \n        Returns:\n            \u5e26\u8bc4\u5206\u7684\u6587\u7ae0\u5217\u8868\n        &quot;&quot;&quot;\n        from quality.evaluator import QualityEvaluator\n        \n        evaluator = QualityEvaluator(self.config.get('quality_config'))\n        \n        for i, article in enumerate(articles, 1):\n            score = evaluator.evaluate(article, self.context)\n            article.quality_score = score\n            self.logger.info(f&quot;  &#x5B;{i}\/{len(articles)}] {article.title&#x5B;:30]}... \u5f97\u5206: {score.total:.1f}&quot;)\n        \n        return articles\n    \n    def _select_top_n(self, articles: List&#x5B;Article]) -&gt; List&#x5B;Article]:\n        &quot;&quot;&quot;\u9009\u62e9\u5f97\u5206\u6700\u9ad8\u7684N\u7bc7\u6587\u7ae0&quot;&quot;&quot;\n        top_n = self.config.get('output_config', {}).get('top_n', 10)\n        \n        sorted_articles = sorted(\n            articles,\n            key=lambda x: x.quality_score.total,\n            reverse=True\n        )\n        \n        selected = sorted_articles&#x5B;:top_n]\n        self.logger.info(f&quot;  \u9009\u4e2d\u524d {top_n} \u7bc7&quot;)\n        return selected\n    \n    def _generate_optimization_hints(self, articles: List&#x5B;Article]) -&gt; OptimizationHints:\n        &quot;&quot;&quot;\u751f\u6210\u4f18\u5316\u5efa\u8bae&quot;&quot;&quot;\n        from quality.optimizer import QualityOptimizer\n        \n        optimizer = QualityOptimizer()\n        evaluation_result = EvaluationResult(\n            articles=articles,\n            avg_score=self._calculate_avg_score(articles)\n        )\n        \n        hints = optimizer.generate_hints(evaluation_result, self.context)\n        self.logger.info(f&quot;  \u4f18\u5316\u5efa\u8bae: {hints.processor_hints}&quot;)\n        return hints\n    \n    def _output_results(self, articles: List&#x5B;Article]):\n        &quot;&quot;&quot;\u8f93\u51fa\u6700\u7ec8\u7ed3\u679c&quot;&quot;&quot;\n        outputs_config = self.config&#x5B;'stages']&#x5B;-1]&#x5B;'outputs']\n        output_instances = self._create_output_instances(outputs_config)\n        \n        for output in output_instances:\n            self.logger.info(f&quot;  \u8f93\u51fa\u5230: {output.name}&quot;)\n            output.write(articles, self.context)\n\n<\/pre><\/div>\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">3.3 plugins\/sources\/base.py &#8211; \u6570\u636e\u6e90\u57fa\u7c7b<\/h3>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: python; title: ; notranslate\" title=\"\">\nfrom abc import ABC, abstractmethod\nfrom typing import List\nfrom core.models import Article, Context\n\nclass BaseSource(ABC):\n    &quot;&quot;&quot;\u6570\u636e\u6e90\u57fa\u7c7b - \u6240\u6709\u6570\u636e\u6e90\u5fc5\u987b\u7ee7\u627f\u6b64\u7c7b&quot;&quot;&quot;\n    \n    def __init__(self, config: dict):\n        &quot;&quot;&quot;\n        \u521d\u59cb\u5316\u6570\u636e\u6e90\n        \n        Args:\n            config: \u6570\u636e\u6e90\u914d\u7f6e\uff08\u6765\u81ea config\/sources.yaml\uff09\n        &quot;&quot;&quot;\n        self.config = config\n        self.name = self.__class__.__name__\n    \n    @abstractmethod\n    def fetch(self, context: Context) -&gt; List&#x5B;Article]:\n        &quot;&quot;&quot;\n        \u6293\u53d6\u6570\u636e - \u5b50\u7c7b\u5fc5\u987b\u5b9e\u73b0\n        \n        Args:\n            context: \u6267\u884c\u4e0a\u4e0b\u6587\n            \n        Returns:\n            \u6587\u7ae0\u5217\u8868\n            \n        Raises:\n            SourceException: \u6293\u53d6\u5931\u8d25\u65f6\u629b\u51fa\n        &quot;&quot;&quot;\n        pass\n    \n    def _create_article_id(self, source_name: str, original_id: str) -&gt; str:\n        &quot;&quot;&quot;\u751f\u6210\u7edf\u4e00\u7684\u6587\u7ae0ID&quot;&quot;&quot;\n        return f&quot;{source_name}_{original_id}&quot;\n\n<\/pre><\/div>\n\n\n<h4 class=\"wp-block-heading\">3.3.1 GitHub \u6570\u636e\u6e90\u5b9e\u73b0\u793a\u4f8b<\/h4>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: python; title: ; notranslate\" title=\"\">\nimport requests\nfrom datetime import datetime\nfrom plugins.sources.base import BaseSource\nfrom core.models import Article\nfrom core.exceptions import SourceException\n\nclass GitHubSource(BaseSource):\n    &quot;&quot;&quot;GitHub Trending \u6570\u636e\u6e90&quot;&quot;&quot;\n    \n    def fetch(self, context: Context) -&gt; List&#x5B;Article]:\n        &quot;&quot;&quot;\u4eceGitHub\u6293\u53d6trending\u9879\u76ee&quot;&quot;&quot;\n        api_url = &quot;https:\/\/api.github.com\/search\/repositories&quot;\n        \n        # \u8ba1\u7b97\u65f6\u95f4\u8303\u56f4\uff08\u6700\u8fd124\u5c0f\u65f6\uff09\n        since_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')\n        \n        params = {\n            'q': f'created:&gt;{since_date}',\n            'sort': 'stars',\n            'order': 'desc',\n            'per_page': self.config.get('per_page', 30)\n        }\n        \n        headers = {}\n        if token := os.getenv('GITHUB_TOKEN'):\n            headers&#x5B;'Authorization'] = f'token {token}'\n        \n        try:\n            response = requests.get(api_url, params=params, headers=headers, timeout=10)\n            response.raise_for_status()\n            data = response.json()\n        except Exception as e:\n            raise SourceException(f&quot;GitHub API\u8c03\u7528\u5931\u8d25: {e}&quot;)\n        \n        articles = &#x5B;]\n        for repo in data.get('items', &#x5B;]):\n            article = Article(\n                id=self._create_article_id('github', str(repo&#x5B;'id'])),\n                source='github',\n                raw_data=repo,\n                title=repo&#x5B;'full_name'],\n                content=repo&#x5B;'description'] or '',\n                url=repo&#x5B;'html_url'],\n                published_at=datetime.fromisoformat(repo&#x5B;'created_at'].replace('Z', '+00:00')),\n                keywords=&#x5B;],\n                metadata={\n                    'stars': repo&#x5B;'stargazers_count'],\n                    'language': repo&#x5B;'language'],\n                    'topics': repo.get('topics', &#x5B;])\n                }\n            )\n            articles.append(article)\n        \n        return articles\n\n<\/pre><\/div>\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">3.4 plugins\/processors\/base.py &#8211; \u5904\u7406\u5668\u57fa\u7c7b<\/h3>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: python; title: ; notranslate\" title=\"\">\nfrom abc import ABC, abstractmethod\nfrom typing import List\nfrom core.models import Article, Context\n\nclass BaseProcessor(ABC):\n    &quot;&quot;&quot;\u5904\u7406\u5668\u57fa\u7c7b - \u6240\u6709\u5904\u7406\u5668\u5fc5\u987b\u7ee7\u627f\u6b64\u7c7b&quot;&quot;&quot;\n    \n    def __init__(self, config: dict = None):\n        &quot;&quot;&quot;\n        \u521d\u59cb\u5316\u5904\u7406\u5668\n        \n        Args:\n            config: \u5904\u7406\u5668\u914d\u7f6e\n        &quot;&quot;&quot;\n        self.config = config or {}\n        self.name = self.__class__.__name__\n    \n    @abstractmethod\n    def process(self, articles: List&#x5B;Article], context: Context) -&gt; List&#x5B;Article]:\n        &quot;&quot;&quot;\n        \u5904\u7406\u6587\u7ae0\u5217\u8868 - \u5b50\u7c7b\u5fc5\u987b\u5b9e\u73b0\n        \n        Args:\n            articles: \u8f93\u5165\u6587\u7ae0\u5217\u8868\n            context: \u6267\u884c\u4e0a\u4e0b\u6587\uff08\u5305\u542boptimization_hints\uff09\n            \n        Returns:\n            \u5904\u7406\u540e\u7684\u6587\u7ae0\u5217\u8868\n        &quot;&quot;&quot;\n        pass\n    \n    def _get_hints(self, context: Context) -&gt; dict:\n        &quot;&quot;&quot;\u4ece\u4e0a\u4e0b\u6587\u83b7\u53d6\u9488\u5bf9\u672c\u5904\u7406\u5668\u7684\u4f18\u5316\u5efa\u8bae&quot;&quot;&quot;\n        return context.optimization_hints.get(self.name, {})\n\n<\/pre><\/div>\n\n\n<h4 class=\"wp-block-heading\">3.4.1 \u53bb\u91cd\u5904\u7406\u5668\u5b9e\u73b0<\/h4>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\nfrom plugins.processors.base import BaseProcessor\n\nclass DedupProcessor(BaseProcessor):\n    &quot;&quot;&quot;\u53bb\u91cd\u5904\u7406\u5668 - \u57fa\u4e8eURL\u548c\u6807\u9898\u76f8\u4f3c\u5ea6&quot;&quot;&quot;\n    \n    def process(self, articles: List&#x5B;Article], context: Context) -&gt; List&#x5B;Article]:\n        &quot;&quot;&quot;\u53bb\u9664\u91cd\u590d\u6587\u7ae0&quot;&quot;&quot;\n        seen_urls = set()\n        seen_titles = set()\n        unique_articles = &#x5B;]\n        \n        for article in articles:\n            # URL\u53bb\u91cd\n            if article.url in seen_urls:\n                continue\n            \n            # \u6807\u9898\u76f8\u4f3c\u5ea6\u53bb\u91cd\uff08\u7b80\u5316\u7248\uff1a\u5b8c\u5168\u76f8\u540c\uff09\n            if article.title in seen_titles:\n                continue\n            \n            seen_urls.add(article.url)\n            seen_titles.add(article.title)\n            unique_articles.append(article)\n            \n            # \u8bb0\u5f55\u5904\u7406\u5386\u53f2\n            article.add_processing_record('dedup', {'status': 'kept'})\n        \n        removed_count = len(articles) - len(unique_articles)\n        if removed_count &gt; 0:\n            context.logger.info(f&quot;    \u53bb\u91cd\u79fb\u9664: {removed_count} \u6761&quot;)\n        \n        return unique_articles\n\n<\/pre><\/div>\n\n\n<h4 class=\"wp-block-heading\">3.4.2 \u7ffb\u8bd1\u5904\u7406\u5668\u5b9e\u73b0<\/h4>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: python; title: ; notranslate\" title=\"\">\nfrom plugins.processors.base import BaseProcessor\nfrom core.llm import LLMClient\nfrom core.cache import Cache\n\nclass TranslateProcessor(BaseProcessor):\n    &quot;&quot;&quot;\u7ffb\u8bd1\u5904\u7406\u5668 - \u5c06\u82f1\u6587\u5185\u5bb9\u7ffb\u8bd1\u4e3a\u4e2d\u6587&quot;&quot;&quot;\n    \n    def __init__(self, config: dict = None):\n        super().__init__(config)\n        self.llm = LLMClient()\n        self.cache = Cache()\n    \n    def process(self, articles: List&#x5B;Article], context: Context) -&gt; List&#x5B;Article]:\n        &quot;&quot;&quot;\u7ffb\u8bd1\u6587\u7ae0\u6807\u9898\u548c\u5185\u5bb9&quot;&quot;&quot;\n        hints = self._get_hints(context)\n        \n        # \u8bfb\u53d6prompt\u6a21\u677f\n        with open('config\/prompts\/translate.txt', 'r', encoding='utf-8') as f:\n            prompt_template = f.read()\n        \n        # \u6279\u91cf\u5904\u7406\n        batch_size = context.config.get('concurrency', {}).get('processors', 5)\n        \n        for i in range(0, len(articles), batch_size):\n            batch = articles&#x5B;i:i+batch_size]\n            self._translate_batch(batch, prompt_template, hints)\n        \n        return articles\n    \n    def _translate_batch(self, articles: List&#x5B;Article], prompt_template: str, hints: dict):\n        &quot;&quot;&quot;\u6279\u91cf\u7ffb\u8bd1&quot;&quot;&quot;\n        for article in articles:\n            # \u68c0\u67e5\u7f13\u5b58\n            cache_key = f&quot;translate_{article.id}&quot;\n            cached = self.cache.get(cache_key)\n            \n            if cached:\n                article.title = cached&#x5B;'title']\n                article.content = cached&#x5B;'content']\n                continue\n            \n            # \u6839\u636ehints\u8c03\u6574prompt\n            if hints.get('be_more_accurate'):\n                prompt_template += &quot;\\n\u6ce8\u610f\uff1a\u4f7f\u7528\u66f4\u51c6\u786e\u3001\u4e13\u4e1a\u7684\u7ffb\u8bd1\u3002&quot;\n            \n            # \u8c03\u7528LLM\n            prompt = prompt_template.format(\n                title=article.title,\n                content=article.content\n            )\n            \n            result = self.llm.chat(prompt, temperature=hints.get('temperature', 0.3))\n            translated = self._parse_result(result)\n            \n            # \u66f4\u65b0\u6587\u7ae0\n            article.title = translated&#x5B;'title']\n            article.content = translated&#x5B;'content']\n            \n            # \u7f13\u5b58\n            self.cache.set(cache_key, translated)\n            \n            # \u8bb0\u5f55\n            article.add_processing_record('translate', {\n                'original_title': article.raw_data.get('title'),\n                'translated_title': article.title\n            })\n    \n    def _parse_result(self, result: str) -&gt; dict:\n        &quot;&quot;&quot;\u89e3\u6790LLM\u8fd4\u56de\u7ed3\u679c&quot;&quot;&quot;\n        # \u5047\u8bbeLLM\u8fd4\u56de\u683c\u5f0f\u4e3a\uff1a\u6807\u9898: xxx\\n\u5185\u5bb9: xxx\n        lines = result.strip().split('\\n', 1)\n        return {\n            'title': lines&#x5B;0].replace('\u6807\u9898:', '').strip(),\n            'content': lines&#x5B;1].replace('\u5185\u5bb9:', '').strip() if len(lines) &gt; 1 else ''\n        }\n\n<\/pre><\/div>\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">3.5 quality\/evaluator.py &#8211; \u8d28\u91cf\u8bc4\u4f30\u5668<\/h3>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: python; title: ; notranslate\" title=\"\">\nfrom typing import List\nfrom core.models import Article, QualityScore, Context\nfrom quality.rules import LengthRule, FreshnessRule, RelevanceRule, LLMQualityRule\n\nclass QualityEvaluator:\n    &quot;&quot;&quot;\u8d28\u91cf\u8bc4\u4f30\u5668 - \u7efc\u5408\u8bc4\u4f30\u6587\u7ae0\u8d28\u91cf&quot;&quot;&quot;\n    \n    def __init__(self, config: dict = None):\n        &quot;&quot;&quot;\n        \u521d\u59cb\u5316\u8bc4\u4f30\u5668\n        \n        Args:\n            config: \u8d28\u91cf\u914d\u7f6e\uff08\u6765\u81ea config\/quality.yaml\uff09\n        &quot;&quot;&quot;\n        self.config = config or {}\n        self.rules = self._init_rules()\n        self.weights = self.config.get('weights', {\n            'length': 0.1,\n            'fresh\n\n<\/pre><\/div>","protected":false},"excerpt":{"rendered":"<p>\u76ee\u5f55\u540d \u6027\u8d28 \u6838\u5fc3\u804c\u8d23 \u5305\u542b\u4ec0\u4e48 \u63d0\u4ea4Git? \u8c01\u4f1a\u4fee\u6539 core\/ \u4ee3\u7801-\u6846\u67b6\u5c42 \u63d0\u4f9b\u5e95\u5c42\u80fd\u529b\uff0c\u652f\u6491\u6574\u4e2a\u7cfb\u7edf\u8fd0\u8f6c workflow\u3001context\u3001llm\u3001s&#8230;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"_monsterinsights_skip_tracking":false,"_monsterinsights_sitenote_active":false,"_monsterinsights_sitenote_note":"","_monsterinsights_sitenote_category":0,"footnotes":""},"categories":[10],"tags":[],"table_tags":[],"class_list":["post-2063","post","type-post","status-publish","format-standard","hentry","category-program"],"_links":{"self":[{"href":"https:\/\/www.loganblog.com\/index.php\/wp-json\/wp\/v2\/posts\/2063","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.loganblog.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.loganblog.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.loganblog.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.loganblog.com\/index.php\/wp-json\/wp\/v2\/comments?post=2063"}],"version-history":[{"count":9,"href":"https:\/\/www.loganblog.com\/index.php\/wp-json\/wp\/v2\/posts\/2063\/revisions"}],"predecessor-version":[{"id":2087,"href":"https:\/\/www.loganblog.com\/index.php\/wp-json\/wp\/v2\/posts\/2063\/revisions\/2087"}],"wp:attachment":[{"href":"https:\/\/www.loganblog.com\/index.php\/wp-json\/wp\/v2\/media?parent=2063"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.loganblog.com\/index.php\/wp-json\/wp\/v2\/categories?post=2063"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.loganblog.com\/index.php\/wp-json\/wp\/v2\/tags?post=2063"},{"taxonomy":"table_tags","embeddable":true,"href":"https:\/\/www.loganblog.com\/index.php\/wp-json\/wp\/v2\/table_tags?post=2063"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}