Skip to content

Instantly share code, notes, and snippets.

@luaxlou
Created August 3, 2016 13:50
# 简书同步到Hexo工具
# Gemfile
# source 'https://gems.ruby-china.org/'
#
# gem 'test-unit'
# gem "selenium-webdriver"
# gem 'mechanize'
#
require "json"
require "pp"
require "selenium-webdriver"
require "test/unit"
require "mechanize"
class TestT < Test::Unit::TestCase
ACCOUNT_FILE = './cache/jianshu.account'
COOKIE_FILE = './cache/jianshu.cookie'
HEXO_POST_PATH = './source/_posts'
HEXO_IMAGE_PATH = './source/images'
BASE_URL = "http://www.jianshu.com"
def setup
@client = Mechanize.new
end
def test_suite
sync
end
#同步简书到hexo
def sync
articles = get_articles
#同步jianshu文章到hexo 草稿目录
articles.each do |a|
id = a['id']
head = "---\ntitle: #{a['title']}\ndate: #{a['create_time']}\ntags:\n - #{a['tag']}\n---"
file = HEXO_POST_PATH + '/jianshu_' + (id.to_s)+ '.md'
content = head + "\n" + a['content']
File.write file, content
p "post #{a['title']} synced.file:#{file}"
end
end
def get_notebooks
map = {}
notebooks= getJSON '/writer/notebooks'
notebooks.each do |n|
map[n['id']] = n['name']
end
map
end
def get_articles
articles= getJSON '/writer/notes'
#获得笔记本主要用于给文章打tag
notebooks = get_notebooks
articles.map do |a|
id = a['id']
a['content'] = get_article_content id
a['tag'] = notebooks[a['notebook_id']]
a['create_time'] = Time.strptime (Time.at(a['last_compiled_at']).to_s), '%F %T'
end
articles
end
def get_article_content id
data = getJSON '/writer/notes/' + (id.to_s) + '/content'
content = data['content']
#搜索文章内的图片下载并替换成本地图片
content.to_s.match /(http:\/\/upload-images.*?)(\?.*?)([")])/ do |m|
filename = File.basename m[1]
pathname = HEXO_IMAGE_PATH+'/'+filename
image_url = m[1]+m[2]
p 'downloading '+image_url+'...'
@client.download image_url, pathname until File.exists? pathname
content.gsub! m[0], "/images/#{filename}#{m[3]}"
end
content
end
def getJSON uri
login if !read_cookie
res = @client.get BASE_URL+uri
if (res.body.include? '/users/password/new') #检测是否cookie已过期
login
res = @client.get BASE_URL+uri
end
JSON.parse res.body
end
def login
@driver = Selenium::WebDriver.for :firefox
@driver.manage.timeouts.implicit_wait = 30
#将简书账户密码存储到当前cache目录下,gitignore设为忽略
account= File.read(ACCOUNT_FILE).split(/ /)
username = account[0]
password = account[1]
@driver.get(BASE_URL + "/sign_in")
@driver.find_element(:id, "sign_in_name").clear
@driver.find_element(:id, "sign_in_name").send_keys username
@driver.find_element(:id, "sign_in_password").clear
@driver.find_element(:id, "sign_in_password").send_keys password
#等待1000秒直到滑动验证码被验证成功,点击登录
wait = Selenium::WebDriver::Wait.new(:timeout => 1000)
wait.until { @driver.find_element(:class_name, "gt_ajax_tip").attribute('class').include? 'success' }
@driver.find_element(:class_name, 'ladda-button').click
save_cookie
@driver.quit
end
def save_cookie
cookies = @driver.manage.all_cookies
cookies.each do |c|
if c[:expires].nil?
c[:expires] = (DateTime.now + 30).to_s #arbitrary date in the future
else
c[:expires] = c[:expires].to_s
end
@client.cookie_jar << Mechanize::Cookie.new(c)
end
#存储登陆的cookie,以便下次不用重复登陆
File.write COOKIE_FILE, (JSON cookies)
end
def read_cookie
return true if @cookies
return false until File.exists? COOKIE_FILE
content = File.read(COOKIE_FILE)
@cookies = JSON.parse content
@cookies.each do |c|
@client.cookie_jar << Mechanize::Cookie.new(c)
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment