# Copyright 2021 PingCAP, Inc. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # MIT License # Copyright (c) 2021 Charlotte Liu # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-manual-line-breaks.py. import re, sys, os # Check manual line break within a paragraph. def check_manual_break(filename): two_lines = [] metadata = 0 toggle = 0 ctoggle = 0 stoggle = 0 mathtoggle = 0 lineNum = 0 mark = 0 with open(filename,'r', encoding='utf-8') as file: for line in file: lineNum += 1 # Count the number of '---' to skip metadata. if metadata < 2 : if re.match(r'(\s|\t)*(-){3}', line): metadata += 1 continue else: # Skip tables and notes. if re.match(r'(\s|\t)*(\||>)\s*\w*',line): continue # Skip html tags and markdownlint tags. if re.match(r'(\s|\t)*((<\/*(.*)>)|)\s*\w*',line): if re.match(r'(\s|\t)*(
|)',line):
ctoggle = 1
elif re.match(r'(\s|\t)*(<\/code><\/pre>|<\/table>)',line):
ctoggle = 0
else:
continue
# Skip multi-line '