#coding:utf-8

将Markdown行内式链接转为文末参考式链接

import sys
import re
import urllib
import shutil
from urllib import request
from bs4 import BeautifulSoup


md_file = sys.argv[1]   # 运行参数：md地址
new_md = sys.argv[1][0:-3] + '_reference.md'
shutil.copyfile(sys.argv[1], new_md)

post = None  # 用来存放markdown文件内容
index = 0
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
with open(md_file, 'r', encoding='utf-8') as f:  # 使用utf-8 编码打开
    post = f.read()
    matches = re.compile('[^!]\\[.*?\\](\(.*?\\))').findall(post)
    matches_no_repeat = list(set(matches))
    matches_no_repeat.sort(key=matches.index)
    matches = matches_no_repeat
    print('total url : {0}'.format(matches))
    for match in matches:
        index = index + 1
        print(index)
        url = match[1:-1]
        print(url)
        # footnote_mark = '[^footnote'+('{0}'.format(index))+ ']' #用这种替换格式会生成标准的markdown脚注
        footnote_mark = '[{0}]'.format(index)         #用这种替换格式，会生成markdown reference link
        # post = post.replace(match, match+footnote_mark) # 替换md文件中的地址，这里并不是标准的脚注
        post = post.replace(match, footnote_mark)  # 替换md文件中的地址，用这种替换格式会生成标准的markdown脚注
        footnote_line = '\n'+footnote_mark + ''+ url
        url_title = ''
        if url[0:4] == 'http':  # 有title的url
            req = urllib.request.Request(url=url, headers=headers)
            try:
                content = urllib.request.urlopen(req).read()
            except: # 获取不到标题
                print('something is wrong with url {0}'.format(index))
            else: # 获取到标题才执行
                soup = BeautifulSoup(content)
                url_title = soup.title.string.replace("\n", "")
                print(url_title)
                footnote_line = footnote_line + '\n  网页标题: 《'+ url_title +'》'# 有标题就加上
        post = post + footnote_line

open(new_md, 'w', encoding='utf-8').write(post)  # 如果有内容的话，就直接覆盖写入当前的markdown文件
        # 仍然注意用uft-8编码打开