// Copyright 2016 The Hugo Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package main import ( "os" "fmt" "unicode" "strings" "bufio" "regexp" "path/filepath" ) // Global variables type attachm_t struct { // almost always images, but can be doc, pdf, etc. cnt int // counter, starts with 0 fname string // file name, i.e., basename hugoFname string // fname prefixed with /img/ } var attachm = make(map[string]*attachm_t) // counts number of occurences of attachments var config = make(map[string]string) // goes to config.toml // WordPress unfortunately inserts HTML codes into bash/C/R code. // Have to get rid of these again. var delimList = []struct { start string stop string }{ { "[googlemaps ", "]" }, { "[code", "[/code]" }, { "
", "" }, { "$latex ", "$" }, } func deHTMLify(s string) string { // glitch for WordPress foolishly changing "&" to "&", ">" to ">", etc. for _,v := range(delimList) { tx0,tx1,lenvstart,lenvstop := 0,0,len(v.start),len(v.stop) for { if tx1 = strings.Index(s[tx0:],v.start); tx1 < 0 { break } if tx2 := strings.Index(s[tx0+tx1+lenvstart:],v.stop); tx2 > 0 { //fmt.Println("\t\tv =",v,", tx0 =",tx0,", tx1 =",tx1,", tx2 =",tx2,"\n\t\ts =",s[tx1:tx1+70]) t := strings.Replace(s[tx0:tx0+tx1+tx2],"&","&",-1) t = strings.Replace(t,"<","<",-1) t = strings.Replace(t,">",">",-1) t = strings.Replace(t,""","\"",-1) s = s[0:tx0] + t + s[tx0+tx1+tx2:] } else { u := len(s[tx0+tx1:]) if u > 40 { u = 40 } // Show up to 40 chars of offending string fmt.Println("\tClosing tag",v.stop," in ",s[tx0+tx1:tx0+tx1+u]," not found") } tx0 += tx1 + lenvstart + lenvstop // + len(t) } } return s } // Use regexp to change various WordPress specific codes and // map them to the equivalent Hugo codes. // This list should be put into a configuration file. var replaceList = []struct { regx *regexp.Regexp replace string }{ // convert [code lang=bash] to ```bash { regexp.MustCompile("(\n{0,1})\\[code\\s*lang(uage|)=(\"|)([A-Za-z\\+]+)(\"|)(.*)\\]\\w*\n"), "\n```$4$6\n" }, // convert [/code] to ``` { regexp.MustCompile("\n\\[/code\\]\\s*\n"), "\n```\n" }, // handle https://www.youtube.com/watch?v=wtqfC9v0xB0 { regexp.MustCompile("\nhttp(.|)://www\\.youtube\\.com/watch\\?v=(\\w+)(&.+|)\n"), "\n{{< youtube $2 >}}\n" }, // handle [youtube=http://www.youtube.com/watch?v=IA8X1cXFo9oautoplay=0&start=0&end=0] { regexp.MustCompile(`\[youtube=http(.|)://www\.youtube\.com/watch\?v=(\w+)(&.+|)\]`), "{{< youtube $2 >}}" }, // handle [vimeo 199882338] { regexp.MustCompile(`\[vimeo (\d\d\d+)\]`), "{{< vimeo $1 >}}" }, // handle [vimeo https://vimeo.com/167845464] { regexp.MustCompile(`\[vimeo http(.|)://vimeo\.com/(\d\d\d+)\]`), "{{< vimeo $2 >}}" }, // convert
, which is used as , to ```
{ regexp.MustCompile("\n\\s*\n"), "\n```\n" },
// handle
which is used as
{ regexp.MustCompile("\n
\\s*\n"), "\n```\n" },
// convert andto ``` { regexp.MustCompile("(\n{0,1})<(/|)pre>\\s*(\n{0,1})"), "\n```\n" }, // convert $latex ...$ to `$...$`, handle multiline matches with (?s) { regexp.MustCompile(`\$latex\s+(?s)(.+?)\$`), "${}$1$" }, // convert [googlemaps ...] to