# Copyright 2021 PingCAP, Inc. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # MIT License # Copyright (c) 2021 Charlotte Liu # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-file-encoding.py. import sys, os, codecs # Convert the file encoding to the default UTF-8 without BOM. def check_BOM(filename): BUFSIZE = 4096 BOMLEN = len(codecs.BOM_UTF8) with open(filename, "r+b") as fp: chunk = fp.read(BUFSIZE) if chunk.startswith(codecs.BOM_UTF8): i = 0 chunk = chunk[BOMLEN:] while chunk: fp.seek(i) fp.write(chunk) i += len(chunk) fp.seek(BOMLEN, os.SEEK_CUR) chunk = fp.read(BUFSIZE) fp.seek(-BOMLEN, os.SEEK_CUR) fp.truncate() print("\n" + filename + ": this file's encoding has been converted to UTF-8 without BOM to avoid broken metadata display.") if __name__ == "__main__": for filename in sys.argv[1:]: if os.path.isfile(filename): check_BOM(filename)