{"id":787,"date":"2016-02-08T21:54:18","date_gmt":"2016-02-08T12:54:18","guid":{"rendered":"http:\/\/frsw.net\/blog\/?p=787"},"modified":"2016-02-08T21:54:18","modified_gmt":"2016-02-08T12:54:18","slug":"mecab-python%e3%81%8c%e7%84%a1%e7%90%86%e3%81%a0%e3%81%a3%e3%81%9f%e4%ba%ba%e5%90%91%e3%81%91%e3%81%ae%e3%83%a2%e3%82%b8%e3%83%a5%e3%83%bc%e3%83%abwindows%e7%b7%a8","status":"publish","type":"post","link":"http:\/\/frsw.net\/blog\/mecab-python%e3%81%8c%e7%84%a1%e7%90%86%e3%81%a0%e3%81%a3%e3%81%9f%e4%ba%ba%e5%90%91%e3%81%91%e3%81%ae%e3%83%a2%e3%82%b8%e3%83%a5%e3%83%bc%e3%83%abwindows%e7%b7%a8\/","title":{"rendered":"mecab-python\u304c\u7121\u7406\u3060\u3063\u305f\u4eba\u5411\u3051\u306e\u30e2\u30b8\u30e5\u30fc\u30eb[Windows\u7de8]"},"content":{"rendered":"<p>Python\u3067mecab\u3092\u4f7f\u3044\u305f\u304b\u3063\u305f\u306e\u3067\u3059\u304c\u3001\u9762\u5012\u306a\u3053\u3068\u304c\u591a\u3059\u304e\u3066\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u306b\u632b\u6298\u3057\u307e\u3057\u305f\u3002<\/p>\n<p>\u3068\u3044\u3046\u3053\u3068\u3067\u3001\u79c1\u307f\u305f\u3044\u306a\u300cmecab-python\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u306b\u632b\u6298\u3057\u305f\u4eba\u5411\u3051\u300d\u306e\u30af\u30e9\u30b9\u3092\u66f8\u304d\u307e\u3057\u305f\u306e\u3067\u3001\u4f7f\u3044\u305f\u3044\u4eba\u306f\u4f7f\u3063\u3066\u307f\u3066\u304f\u3060\u3055\u3044\u3002<br \/>\n\u6700\u4f4e\u9650\u306e\u6a5f\u80fd\u3057\u304b\u306a\u3044\u3067\u3059\u304c\u3001\u4e00\u5fdc\u3084\u308a\u305f\u3044\u3053\u3068\u306f\u3067\u304d\u308b\u306e\u3067\u306f\u306a\u3044\u3067\u3057\u3087\u3046\u304b\u3002<\/p>\n<p><!--more--><\/p>\n<p>\u3053\u306e\u8a18\u4e8b\u672b\u5c3e\u306e\u30b3\u30fc\u30c9\u3092\u30b3\u30d4\u30da\u3057\u3066\u300cmecab.py\u300d\u3068\u304b\u540d\u524d\u3092\u4ed8\u3051\u3066\u4fdd\u5b58\u3057\u3066\u3001\u30e2\u30b8\u30e5\u30fc\u30eb\u3068\u3057\u3066\u8aad\u307f\u8fbc\u3093\u3067\u3082\u3089\u3046\u3068\u4ee5\u4e0b\u306e\u3088\u3046\u306b\u4f7f\u3046\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n<p>[python]<br \/>\nimport mecab<br \/>\nstring = &quot;\u56fd\u5883\u306e\u9577\u3044\u30c8\u30f3\u30cd\u30eb\u3092\u629c\u3051\u308b\u3068\u305d\u3053\u306f\u96ea\u56fd\u3060\u3063\u305f\u3002&quot;<br \/>\nm = mecab.Mecab(string)<\/p>\n<p># \u305f\u3060\u306e\u5206\u304b\u3061\u66f8\u304d\u306e\u30ea\u30b9\u30c8\u3092\u8fd4\u3059<br \/>\nm.wakachi()<br \/>\n#=&gt; [&#8216;\u56fd\u5883&#8217;, &#8216;\u306e&#8217;, &#8216;\u9577\u3044&#8217;, &#8216;\u30c8\u30f3\u30cd\u30eb&#8217;, &#8216;\u3092&#8217;, &#8216;\u629c\u3051\u308b&#8217;, &#8216;\u3068&#8217;, &#8216;\u305d\u3053&#8217;, &#8216;\u306f&#8217;, &#8216;\u96ea\u56fd&#8217;, &#8216;\u3060\u3063&#8217;, &#8216;\u305f&#8217;, &#8216;\u3002&#8217;]<\/p>\n<p># \u666e\u901a\u8868\u8a18\u306e\u30ea\u30b9\u30c8\u3092\u8fd4\u3059<br \/>\nm.futsu_hyouki()<br \/>\n#=&gt; [&#8216;\u56fd\u5883&#8217;, &#8216;\u306e&#8217;, &#8216;\u9577\u3044&#8217;, &#8216;\u30c8\u30f3\u30cd\u30eb&#8217;, &#8216;\u3092&#8217;, &#8216;\u629c\u3051\u308b&#8217;, &#8216;\u3068&#8217;, &#8216;\u305d\u3053&#8217;, &#8216;\u306f&#8217;, &#8216;\u96ea\u56fd&#8217;, &#8216;\u3060&#8217;, &#8216;\u305f&#8217;, &#8216;\u3002&#8217;]<br \/>\n[\/python]<\/p>\n<p>\u3068\u3044\u3046\u611f\u3058\u3067\u3059\u3002<br \/>\n\u4eca\u5f8c\u6539\u826f\u3057\u3066\u3044\u3053\u3046\u3068\u601d\u3044\u307e\u3059\u3002<\/p>\n<h3>Mecab\u30af\u30e9\u30b9\u306e\u30bd\u30fc\u30b9\u30b3\u30fc\u30c9<\/h3>\n<p>[python]<br \/>\n# -*- coding: utf-8 -*-<\/p>\n<p>class Mecab:<br \/>\n    &quot;&quot;&quot;mecab-python\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u306b\u632b\u6298\u3057\u305f\u4eba\u7528<br \/>\n    &quot;&quot;&quot;<\/p>\n<p>    def __init__(self, string=&quot;&quot;):<br \/>\n        self.ma_list = self.cmd_mecab(string)<\/p>\n<p>    def wakachi(self):<br \/>\n        &quot;&quot;&quot;\u5206\u304b\u3061\u66f8\u304d\u306e\u7d50\u679c\u3092\u8fd4\u3059(list)<br \/>\n        &quot;&quot;&quot;<br \/>\n        wakachi_list = []<br \/>\n        for word_ma in self.ma_list:<br \/>\n            wakachi_list.append(word_ma.split(&#8216;\\t&#8217;)[0])<br \/>\n        return wakachi_list<\/p>\n<p>    def futsu_hyouki(self):<br \/>\n        &quot;&quot;&quot;\u666e\u901a\u8868\u8a18\u306e\u7d50\u679c\u3092\u8fd4\u3059(list)<br \/>\n        &quot;&quot;&quot;<br \/>\n        futsu_hyouki_list = []<br \/>\n        for word_ma in self.ma_list:<br \/>\n            word, ma_info = word_ma.split(&#8216;\\t&#8217;)<br \/>\n            futsu_hyouki_info = ma_info.split(&#8216;,&#8217;)[6]<br \/>\n            word_futsu = &#8221;<br \/>\n            if futsu_hyouki_info == &#8216;*&#8217;:<br \/>\n                word_futsu = word<br \/>\n            else:<br \/>\n                word_futsu = futsu_hyouki_info<br \/>\n            futsu_hyouki_list.append(word_futsu)<br \/>\n        return futsu_hyouki_list<\/p>\n<p>    def cmd_mecab(self, string=&#8221;, opt=&#8221;):<br \/>\n        &quot;&quot;&quot;<br \/>\n        Shell\u7d4c\u7531\u3067mecab\u3092\u5b9f\u884c\u3057\u3066\u7d50\u679c\u3092\u53d6\u5f97<br \/>\n        (\u5f15\u304d\u6570)<br \/>\n          string str\uff1a \u89e3\u6790\u3057\u305f\u3044\u6587\u5b57\u5217<br \/>\n          opt str\uff1a mecab\u306e\u89e3\u6790\u30aa\u30d7\u30b7\u30e7\u30f3<br \/>\n        (\u8fd4\u308a\u5024)<br \/>\n          stdout str\uff1a \u89e3\u6790\u3057\u305f\u7d50\u679c\u305d\u306e\u307e\u307e<br \/>\n        &quot;&quot;&quot;<br \/>\n        import subprocess<br \/>\n        cmd = &#8216;echo %s | mecab&#8217; % string.replace(&#8216;\\n&#8217;, &#8221;)<br \/>\n        p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)<br \/>\n        stdout_data, stderr_data = p.communicate()<br \/>\n        stdout = str(stdout_data, encoding=&#8217;Shift-JIS&#8217;)<br \/>\n        words_with_ma_info = stdout.split(&quot;\\r\\n&quot;)<br \/>\n        words_with_ma_info.remove(&#8216;EOS&#8217;)<br \/>\n        words_with_ma_info.remove(&#8221;)<br \/>\n        return words_with_ma_info<br \/>\n[\/python]<\/p>\n","protected":false},"excerpt":{"rendered":"<p>Python\u3067mecab\u3092\u4f7f\u3044\u305f\u304b\u3063\u305f\u306e\u3067\u3059\u304c\u3001\u9762\u5012\u306a\u3053\u3068\u304c\u591a\u3059\u304e\u3066\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u306b\u632b\u6298\u3057\u307e\u3057\u305f\u3002 \u3068\u3044\u3046\u3053\u3068\u3067\u3001\u79c1\u307f\u305f\u3044\u306a\u300cmecab-python\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u306b\u632b\u6298\u3057\u305f\u4eba\u5411\u3051\u300d\u306e\u30af\u30e9\u30b9\u3092\u66f8\u304d\u307e\u3057\u305f\u306e\u3067\u3001\u4f7f\u3044\u305f\u3044\u4eba\u306f\u4f7f [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[30,4,16],"tags":[38],"_links":{"self":[{"href":"http:\/\/frsw.net\/blog\/wp-json\/wp\/v2\/posts\/787"}],"collection":[{"href":"http:\/\/frsw.net\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/frsw.net\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/frsw.net\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/frsw.net\/blog\/wp-json\/wp\/v2\/comments?post=787"}],"version-history":[{"count":1,"href":"http:\/\/frsw.net\/blog\/wp-json\/wp\/v2\/posts\/787\/revisions"}],"predecessor-version":[{"id":788,"href":"http:\/\/frsw.net\/blog\/wp-json\/wp\/v2\/posts\/787\/revisions\/788"}],"wp:attachment":[{"href":"http:\/\/frsw.net\/blog\/wp-json\/wp\/v2\/media?parent=787"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/frsw.net\/blog\/wp-json\/wp\/v2\/categories?post=787"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/frsw.net\/blog\/wp-json\/wp\/v2\/tags?post=787"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}