{"id":721,"date":"2018-04-18T06:22:44","date_gmt":"2018-04-17T21:22:44","guid":{"rendered":"https:\/\/t-office.blue\/?p=721"},"modified":"2018-04-18T06:25:54","modified_gmt":"2018-04-17T21:25:54","slug":"post-721","status":"publish","type":"post","link":"https:\/\/wp.t-office.blue\/?p=721","title":{"rendered":"AKB48\u3068\u4e43\u6728\u574246\u306e\u30b5\u30a4\u30c8\u304b\u3089\u753b\u50cf\u3068\u540d\u524d\u3092\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u3057\u305f\u3088\u3002"},"content":{"rendered":"<h1>AKB48\u3068\u4e43\u6728\u574246\u306e\u30b5\u30a4\u30c8\u304b\u3089\u753b\u50cf\u3068\u540d\u524d\u3092\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u3057\u305f\u3088\u3002<\/h1>\n<p>Python\u3067\u306e\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u306e\u7d9a\u304d\u3067\u3059\u3002<\/p>\n<p>AKB48\u306e\u30b5\u30a4\u30c8\u3068\u4e43\u6728\u574246\u306e\u516c\u5f0f\u30b5\u30a4\u30c8\u306b\u30e1\u30f3\u30d0\u30fc\u4e00\u89a7\u304c\u3042\u308a\u307e\u3059\u3002\u3053\u3053\u304b\u3089\u3001\u540d\u524d\u3068\u753b\u50cf\u306eURL\u3092CSV\u306b\u53d6\u5f97\u3057\u307e\u3057\u305f\u3002<\/p>\n<p><script async src=\"\/\/pagead2.googlesyndication.com\/pagead\/js\/adsbygoogle.js\"><\/script><br \/>\n<ins class=\"adsbygoogle\"\n     style=\"display:block\"\n     data-ad-format=\"fluid\"\n     data-ad-layout-key=\"-ei+6a+g-c3+k8\"\n     data-ad-client=\"ca-pub-4003048870046052\"\n     data-ad-slot=\"1589408991\"><\/ins><\/p>\n<p><script>\n     (adsbygoogle = window.adsbygoogle || []).push({});\n<\/script><\/p>\n<p>[\u4f7f\u7528\u30d1\u30c3\u30b1\u30fc\u30b8]<\/p>\n<ul>\n<li>pandas<\/li>\n<li>selenium<\/li>\n<li>time<\/li>\n<li>subprocess<\/li>\n<\/ul>\n<p>\u4f5c\u3063\u305f\u6d41\u308c\u306f\u3053\u3093\u306a\u611f\u3058<\/p>\n<p>1 nogisaka-sc.py\u30d5\u30a1\u30a4\u30eb\u3092\u4f5c\u308a\u3001\u4e43\u6728\u5742\u306e\u30b5\u30a4\u30c8\u304b\u3089\u30c7\u30fc\u30bf\u53d6\u5f97<br \/>\n2 akb-sc.py\u30d5\u30a1\u30a4\u30eb\u3092\u4f5c\u308a\u3001AKB\u306e\u30b5\u30a4\u30c8\u304b\u3089\u30c7\u30fc\u30bf\u53d6\u5f97<br \/>\n3 \u4e21\u65b9\u306e\u30d5\u30a1\u30a4\u30eb\u3092\u5b9f\u884c\u3059\u308b\u30d5\u30a1\u30a4\u30ebget-idol.py\u3092\u4f5c\u6210\u30571,2\u306e\u30d5\u30a1\u30a4\u30eb\u3092\u9806\u306b\u5b9f\u884c\u3059\u308b\u30d5\u30a1\u30a4\u30eb\u3092\u4f5c\u6210\u3059\u308b<\/p>\n<p>1\u306e\u30b3\u30fc\u30c9\u306f\u4ee5\u4e0b<\/p>\n<pre><code>from selenium import webdriver\nimport pandas\nimport time\n\n\"\"\"***************************************\n\u8a2d\u5b9a\n***************************************\"\"\"\nbrowser = webdriver.Chrome()\ndf = pandas.read_csv('default.csv', index_col=0)\nurl = \"http:\/\/www.nogizaka46.com\/member\/\" #\u53d6\u5f97\u30b5\u30a4\u30c8\n\n\"\"\"******************************\nCSS SELECTOR\u306e\u8a2d\u5b9a\n******************************\"\"\"\nPOSTS = \"div.unit\"\nACTRESS_NAME = \"span.main\" #\u540d\u524d\n\n\"\"\"***************************************\n\u5b9f\u884c\u90e8\u5206\n***************************************\"\"\"\n\nbrowser.get(url)\n\nprint(\"Starting to get posts...\")\nposts = browser.find_elements_by_css_selector(POSTS) #\u30e1\u30f3\u30d0\u30fc\u6570\nprint (len(posts))\n\nfor post in posts:\n   try:\n       name = post.find_element_by_css_selector(\"span.main\").text\n       class_name = post.find_element_by_tag_name(\"img\").get_attribute(\"class\")\n       print(name)\n       thumnailURL = \"http:\/\/img.nogizaka46.com\/www\/member\/img\/\" + class_name + \"_prof.jpg\"\n       print(thumnailURL)\n       se = pandas.Series([name,thumnailURL,\"nogisaka\"],[\"name\", \"image\",\"group\"])\n       df = df.append(se, ignore_index=True)\n   except Exception as e:\n       print(e)\n\n\"\"\"***************************************\n\b\u66f8\u304d\u51fa\u3057\u90e8\u5206\n***************************************\"\"\"\n\nprint(\"Finished Scraping. Writing CSV.......\")\ndf.to_csv(\"idol.csv\", encoding=\"UTF-8\", mode=\"a\", header=\"false\")\nprint(\"CSV Writed.\")\n\n\"\"\"***************************************\n\u5f8c\u51e6\u7406\u90e8\u5206\n***************************************\"\"\"\n\nbrowser.close()\nprint(\"DONE\")\n\n<\/code><\/pre>\n<p>\u6700\u521d\u306b\u8aad\u307f\u8fbc\u3093\u3067\u3044\u308bdefault.csv\u306f ,name,url \u3060\u3051\u304c\u66f8\u3044\u3066\u3042\u308bCSV\u30d5\u30a1\u30a4\u30eb\u3067\u3059\u3002\u6700\u7d42\u66f8\u304d\u51fa\u3057\u6642\u306b\u30d8\u30c3\u30c0\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n<pre><code>from selenium import webdriver\nimport pandas\nimport time\n\n\"\"\"***************************************\n\u8a2d\u5b9a\n***************************************\"\"\"\nbrowser = webdriver.Chrome()\ndf = pandas.read_csv('default.csv', index_col=0)\nurl = \"https:\/\/www.akb48.co.jp\/about\/members\/\" #\u53d6\u5f97\u30b5\u30a4\u30c8\n\n\"\"\"******************************\nCSS SELECTOR\u306e\u8a2d\u5b9a\n******************************\"\"\"\nPOSTS = \"li\"\nACTRESS_NAME = \"h4.memberListNamej\" #\u540d\u524d\n\n\"\"\"***************************************\n\u5b9f\u884c\u90e8\u5206\n***************************************\"\"\"\n\nbrowser.get(url)\n\nprint(\"Starting to get posts...\")\nposts = browser.find_elements_by_css_selector(POSTS) #\u30e1\u30f3\u30d0\u30fc\u6570\nprint (len(posts))\n\nfor post in posts:\n   try:\n       name = post.find_element_by_css_selector(ACTRESS_NAME).text\n       #class_name = post.find_element_by_tag_name(\"img\").get_attribute(\"class\")\n       print(name)\n       thumnailURL = post.find_element_by_tag_name(\"img\").get_attribute(\"src\")\n#       thumnailURL = \"http:\/\/img.nogizaka46.com\/www\/member\/img\/\" + class_name + \"_prof.jpg\"\u3000#\b\u753b\u50cf\u306f\u56fa\u5b9a\u3067\n       print(thumnailURL)\n       se = pandas.Series([name,thumnailURL,\"akb\"],[\"name\", \"image\",\"group\"])\n       df = df.append(se, ignore_index=True)\n   except Exception as e:\n       print(e)\n\n\"\"\"***************************************\n\b\u66f8\u304d\u51fa\u3057\u90e8\u5206\n***************************************\"\"\"\n\nprint(\"Finished Scraping. Writing CSV.......\")\ndf.to_csv(\"idol.csv\", encoding=\"UTF-8\", mode=\"a\", header=\"true\")\nprint(\"CSV Writed.\")\n\n\"\"\"***************************************\n\u5f8c\u51e6\u7406\u90e8\u5206\n***************************************\"\"\"\n\nbrowser.close()\nprint(\"DONE\")\n\n<\/code><\/pre>\n<p>\u3060\u3044\u305f\u3044\u4e00\u7dd2\u3067\u3059\u306d\u3002<\/p>\n<p>\u3067\u3053\u308c\u3092\u8aad\u307f\u51fa\u3059\u30d5\u30a1\u30a4\u30eb<\/p>\n<pre><code>import subprocess\n\npath = ['akb-sc.py', 'nogisaka-sc.py']\n\ndef main():\n  ret1 = subprocess.run([\"python\", path[0]],check=True)\n  print(ret1)\n  ret2 = subprocess.run([\"python\", path[1]],check=True)\n  print(ret2)\n\nif __name__ == \"__main__\":\n  main()\n<\/code><\/pre>\n<p><script async src=\"\/\/pagead2.googlesyndication.com\/pagead\/js\/adsbygoogle.js\"><\/script><br \/>\n<ins class=\"adsbygoogle\"\n     style=\"display:block\"\n     data-ad-format=\"fluid\"\n     data-ad-layout-key=\"-ei+6a+g-c3+k8\"\n     data-ad-client=\"ca-pub-4003048870046052\"\n     data-ad-slot=\"1589408991\"><\/ins><\/p>\n<p><script>\n     (adsbygoogle = window.adsbygoogle || []).push({});\n<\/script><\/p>\n","protected":false},"excerpt":{"rendered":"<p>AKB48\u3068\u4e43\u6728\u574246\u306e\u30b5\u30a4\u30c8\u304b\u3089\u753b\u50cf\u3068\u540d\u524d\u3092\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u3057\u305f\u3088\u3002 Python\u3067\u306e\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u306e\u7d9a\u304d\u3067\u3059\u3002 AKB48\u306e\u30b5\u30a4\u30c8\u3068\u4e43\u6728\u574246\u306e\u516c\u5f0f\u30b5\u30a4\u30c8\u306b\u30e1\u30f3\u30d0\u30fc\u4e00\u89a7\u304c\u3042\u308a\u307e\u3059\u3002\u3053\u3053\u304b\u3089\u3001\u540d\u524d\u3068\u753b\u50cf\u306eURL\u3092CSV\u306b [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[33,5,73],"tags":[74,77,69,76,70,75],"class_list":["post-721","post","type-post","status-publish","format-standard","hentry","category-python","category-5","category-73","tag-akb48","tag-pandas","tag-python","tag-selenium","tag-70","tag-75"],"_links":{"self":[{"href":"https:\/\/wp.t-office.blue\/index.php?rest_route=\/wp\/v2\/posts\/721","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/wp.t-office.blue\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/wp.t-office.blue\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/wp.t-office.blue\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/wp.t-office.blue\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=721"}],"version-history":[{"count":1,"href":"https:\/\/wp.t-office.blue\/index.php?rest_route=\/wp\/v2\/posts\/721\/revisions"}],"predecessor-version":[{"id":723,"href":"https:\/\/wp.t-office.blue\/index.php?rest_route=\/wp\/v2\/posts\/721\/revisions\/723"}],"wp:attachment":[{"href":"https:\/\/wp.t-office.blue\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=721"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/wp.t-office.blue\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=721"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/wp.t-office.blue\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=721"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}