8 Useful Python scripts to save!

Good script, leave work early! In addition to writing program code, a programmer’s daily job inevitably involves testing and validation.

For example, if you cannot access a web site, you need to determine whether the address is accessible and what the server returns to determine what the problem is. To complete this task, if blindly hope to use compiled language to write such code, in practice time and energy is not enough, this time you need to play the magic role of script!

It is no exaggeration to say that the ability to write efficient and useful script code directly affects the happy life of a programmer. Here are 8 useful Python scripts to use whenever you need them.

1. Resolve the problem of unzip garbled characters in Linux.

import os
import sys
import zipfile
import argparse

s = '\x1b[%d;%dm%s\x1b[0m'       

def unzip(path):

    file = zipfile.ZipFile(path,"r")
    if args.secret:
        file.setpassword(args.secret)

    for name in file.namelist():
        try:
            utf8name=name.decode('gbk')
            pathname = os.path.dirname(utf8name)
        except:
            utf8name=name
            pathname = os.path.dirname(utf8name)

        #print s % (1, 92, ' >> extracting:'), utf8name
        #pathname = os.path.dirname(utf8name)
        ifnot os.path.exists(pathname) and pathname ! ="":
            os.makedirs(pathname)
        data = file.read(name)
        if not os.path.exists(utf8name):
            try:
                fo = open(utf8name, "w")
                fo.write(data)
                fo.close
            except:
                pass
    file.close()

def main(argv):
    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    # for argparse
    p = argparse.ArgumentParser(description='Resolve unzip garble')
    p.add_argument('xxx'.type=str, nargs=The '*', \
        help='Command object.')
    p.add_argument('-s'.'--secret', action='store', \
        default=None, help='password')
    global args
    args = p.parse_args(argv[1:])
    xxx = args.xxx

    for path in xxx:
        if path.endswith('.zip') :if os.path.exists(path):
                print s % (1, 97, ' ++ unzip:'), path
                unzip(path)
            else:
                print s % (1, 91, '!!!!! file doesn\'t exist.'), path else: print s % (1, 91, '!!!!! file isn\'t a zip file.'), path

if __name__ == '__main__':
    argv = sys.argv
    main(argv)
Copy the code

2. Count the number of code lines in the current root directory.

# coding=utf-8
import os
import time
# Set the root directory
basedir = '/'
filelists = []
# specify the type of file you want to count
whitelist = ['cpp'.'h']
Pass through the file recursively through all of the folders
def getFile(basedir):
    global filelists
    for parent,dirnames,filenames in os.walk(basedir):
        for filename in filenames:
            ext = filename.split('. ')[-1]
            # count only the specified file types, skipping some log and cache files
            if ext in whitelist:
                filelists.append(os.path.join(parent,filename))
# count the number of rows in a
def countLine(fname):
    count = 0
    # Treat files as binary,read.
    for file_line in open(fname, 'rb').readlines():
        iffile_line ! =' 'and file_line ! ='\n': Filter out blank lines
            count += 1
    print (fname + The '-' , count)
    return count
if __name__ == '__main__' :
    startTime = time.clock()
    getFile(basedir)
    totalline = 0
    for filelist in filelists:
        totalline = totalline + countLine(filelist)
    print ('total lines:',totalline)
    print ('Done! Cost Time: % 0.2 f second ' % (time.clock() - startTime))
Copy the code

3. Scan the current directory and all subdirectories and display the size.

import os
import sys      
try:
    directory = sys.argv[1]   
except IndexError:
    sys.exit("Must provide an argument.")

dir_size = 0   
fsizedicr = {'Bytes': 1,
             'Kilobytes': float(1) / 1024,
             'Megabytes': float(1)/(1024 * 1024),'Gigabytes': float(1)/(1024 * 1024 * 1024)}for (path, dirs, files) in os.walk(directory):      
    for file in files:                              
        filename = os.path.join(path, file)
        dir_size += os.path.getsize(filename)       

fsizeList = [str(round(fsizedicr[key] * dir_size, 2)) + "" + key for key in fsizedicr] 

if dir_size == 0: print ("File Empty") 
else:
  for units in sorted(fsizeList)[::-1]: 
      print ("Folder Size: " + units)
Copy the code

4. Move all files that are created in the source directory more than 240 days ago to the destination directory.

import shutil
import sys
import time
import os
import argparse

usage = 'python move_files_over_x_days.py -src [SRC] -dst [DST] -days [DAYS]'
description = 'Move files from src to dst if they are older than a certain number of days. Default is 240 days'

args_parser = argparse.ArgumentParser(usage=usage, description=description)
args_parser.add_argument('-src'.'--src'.type=str, nargs='? ', default='. '.help='(OPTIONAL) Directory where files will be moved from. Defaults to current directory')
args_parser.add_argument('-dst'.'--dst'.type=str, nargs='? ', required=True, help='(REQUIRED) Directory where files will be moved to.')
args_parser.add_argument('-days'.'--days'.type=int, nargs='? ', default=240, help='(OPTIONAL) Days value specifies the minimum age of files to be moved. Default is 240.')
args = args_parser.parse_args()

if args.days < 0:
	args.days = 0

src = args.src  Set the source directory
dst = args.dst  Set the target directory
days = args.days # set number of days
now = time.time()  Get the current time

if not os.path.exists(dst):
	os.mkdir(dst)

for f in os.listdir(src):  Pass through all files in the source directory
    if os.stat(f).st_mtime < now - days * 86400:  # Determine if it is more than 240 days
        if os.path.isfile(f):  Check if it is a file
            shutil.move(f, dst)  # move file
Copy the code

5. Scan the script directory and display the number of different types of scripts.

import os																	
import shutil																
from time import strftime												

logsdir="c:\logs\puttylogs"											
zipdir="c:\logs\puttylogs\zipped_logs"							
zip_program="zip.exe"												

for files in os.listdir(logsdir):										
	if files.endswith(".log"):										
		files1=files+"."+strftime("%Y-%m-%d") +".zip"		
		os.chdir(logsdir) 												
		os.system(zip_program + "" +  files1 +""+ files)	
		shutil.move(files1, zipdir)									 
		os.remove(files)													
Copy the code

6. Download Leetcode’s algorithm problems.

import sys
import re
import os
import argparse
import requests
from lxml import html as lxml_html

try:
    import html
except ImportError:
    import HTMLParser
    html = HTMLParser.HTMLParser()

try:
    import cPickle as pk
except ImportError:
    import pickle as pk

class LeetcodeProblems(object):
    def get_problems_info(self):
        leetcode_url = 'https://leetcode.com/problemset/algorithms'
        res = requests.get(leetcode_url)
        if not res.ok:
            print('request error')
            sys.exit()
        cm = res.text
        cmt = cm.split('tbody>')[-2]
        indexs = re.findall(r'<td>(\d+)</td>', cmt)
        problem_urls = ['https://leetcode.com' + url \
                        for url in re.findall(
                            r', cmt)]
        levels = re.findall(r"(.+?) ", cmt)
        tinfos = zip(indexs, levels, problem_urls)
        assert (len(indexs) == len(problem_urls) == len(levels))
        infos = []
        for info in tinfos:
            res = requests.get(info[-1])
            if not res.ok:
                print('request error')
                sys.exit()
            tree = lxml_html.fromstring(res.text)
            title = tree.xpath('//meta[@property="og:title"]/@content')[0]
            description = tree.xpath('//meta[@property="description"]/@content')
            if not description:
                description = tree.xpath('//meta[@property="og:description"]/@content') [0]else:
                description = description[0]
            description = html.unescape(description.strip())
            tags = tree.xpath('//div[@id="tags"]/following::a[@class="btn btn-xs btn-primary"]/text()')
            infos.append(
                {
                    'title': title,
                    'level': info[1],
                    'index': int(info[0]),
                    'description': description,
                    'tags': tags
                }
            )

        with open('leecode_problems.pk'.'wb') as g:
            pk.dump(infos, g)
        return infos

    def to_text(self, pm_infos):
        if self.args.index:
            key = 'index'
        elif self.args.title:
            key = 'title'
        elif self.args.tag:
            key = 'tags'
        elif self.args.level:
            key = 'level'
        else:
            key = 'index'

        infos = sorted(pm_infos, key=lambda i: i[key])

        text_template = '## {index} - {title}\n' \
            '~{level}~ {tags}\n' \
            '{description}\n' + '\n' * self.args.line
        text = ' '
        for info in infos:
            if self.args.rm_blank:
                info['description'] = re.sub(r'[\n\r]+', r'\n', info['description'])
            text += text_template.format(**info)

        with open('leecode problems.txt'.'w') as g:
            g.write(text)

    def run(self):
        if os.path.exists('leecode_problems.pk') and not self.args.redownload:
            with open('leecode_problems.pk'.'rb') as f:
                pm_infos = pk.load(f)
        else:
            pm_infos = self.get_problems_info()

        print('find %s problems.' % len(pm_infos))
        self.to_text(pm_infos)

def handle_args(argv):
    p = argparse.ArgumentParser(description='extract all leecode problems to location')
    p.add_argument('--index', action='store_true'.help='sort by index')
    p.add_argument('--level', action='store_true'.help='sort by level')
    p.add_argument('--tag', action='store_true'.help='sort by tag')
    p.add_argument('--title', action='store_true'.help='sort by title')
    p.add_argument('--rm_blank', action='store_true'.help='remove blank')
    p.add_argument('--line', action='store'.type=int, default=10, help='blank of two problems')
    p.add_argument('-r'.'--redownload', action='store_true'.help='redownload data')
    args = p.parse_args(argv[1:])
    return args

def main(argv):
    args = handle_args(argv)
    x = LeetcodeProblems()
    x.args = args
    x.run()

if __name__ == '__main__':
    argv = sys.argv
    main(argv)
Copy the code

7. Convert Markdown to HTML.

import sys
import os

from bs4 import BeautifulSoup
import markdown

class MarkdownToHtml:

    headTag = '<head><meta charset="utf-8" /></head>'

    def __init__(self,cssFilePath = None):
        ifcssFilePath ! = None: self.genStyle(cssFilePath) def genStyle(self,cssFilePath): with open(cssFilePath,'r') as f:
            cssString = f.read()
        self.headTag = self.headTag[:-7] + '<style type="text/css">{}</style>'.format(cssString) + self.headTag[-7:]

    def markdownToHtml(self, sourceFilePath, destinationDirectory = None, outputFileName = None):
        if not destinationDirectory:
            If the output directory is not defined, the source file directory (be sure to convert to absolute path) is used as the output directory
            destinationDirectory = os.path.dirname(os.path.abspath(sourceFilePath))
        if not outputFileName:
            If the output file name is not defined, use the input file name
            outputFileName = os.path.splitext(os.path.basename(sourceFilePath))[0] + '.html'
        ifdestinationDirectory[-1] ! ='/':
            destinationDirectory += '/'
        with open(sourceFilePath,'r', encoding='utf8') as f:
            markdownText = f.read()
        # Compile the raw HTML text
        rawHtml = self.headTag + markdown.markdown(markdownText,output_format='html5')
        Format HTML text in a more readable format
        beautifyHtml = BeautifulSoup(rawHtml,'html5lib').prettify()
        with open(destinationDirectory + outputFileName, 'w', encoding='utf8') as f:
            f.write(beautifyHtml)

if __name__ == "__main__":
    mth = MarkdownToHtml()
    Make a shallow copy of the command line argument list, without the script file name
    argv = sys.argv[1:]
    The current list argv may contain elements other than the source file path (i.e., option information)
    When markdown is finally compiled through argv, all elements in the list must be source paths
    outputDirectory = None
    if '-s' in argv:
        cssArgIndex = argv.index('-s') +1
        cssFilePath = argv[cssArgIndex]
        Check whether the stylesheet file path is valid
        if not os.path.isfile(cssFilePath):
            print('Invalid Path: '+cssFilePath)
            sys.exit()
        mth.genStyle(cssFilePath)
        # pop The order cannot be arbitrarily changed
        argv.pop(cssArgIndex)
        argv.pop(cssArgIndex-1)
    if '-o' in argv:
        dirArgIndex = argv.index('-o') +1
        outputDirectory = argv[dirArgIndex]
        Check if the output directory is valid
        if not os.path.isdir(outputDirectory):
            print('Invalid Directory: ' + outputDirectory)
            sys.exit()
        # pop The order cannot be arbitrarily changed
        argv.pop(dirArgIndex)
        argv.pop(dirArgIndex-1)
    At this point, the elements in argv are source file paths
    Pass through all source file paths
    for filePath in argv:
        Check whether the file path is valid
        if os.path.isfile(filePath):
            mth.markdownToHtml(filePath, outputDirectory)
        else:
            print('Invalid Path: ' + filePath)
Copy the code

8. Text file coding detection and conversion.

import sys
import os
import argparse
from chardet.universaldetector import UniversalDetector

parser = argparse.ArgumentParser(description = 'Text file encoding detection and Conversion')
parser.add_argument('filePaths', nargs = '+'.help = 'Detected or converted file path')
parser.add_argument('-e'.'--encoding', nargs = '? ', const = 'UTF-8'.help = ' ''Target encoding. The supported encodings are:  ASCII, (Default) UTF-8 (with or without a BOM), UTF-16 (with a BOM), UTF-32 (with a BOM), Big5, GB2312/GB18030, EUC-TW, HZ-GB-2312, ISO-2022-CN, EUC-JP, SHIFT_JIS, ISO-2022-JP, ISO-2022-KR, KOI8-R, MacCyrillic, IBM855, IBM866, ISO-8859-5, windows-1251, ISO-8859-2, windows-1250, EUC-KR, ISO-8859-5, windows-1251, ISO-8859-1, windows-1252, ISO-8859-7, windows-1253, ISO-8859-8, windows-1255, TIS-620 '' ')
parser.add_argument('-o'.'--output'.help = 'Output directory')
Parse the parameters to get a Namespace object
args = parser.parse_args()
If the output directory is not empty, conversion is enabled. If the conversion encoding is not specified, the default is UTF-8
ifargs.output ! = None:if not args.encoding:
        The default encoding is UTF-8
        args.encoding = 'UTF-8'
    Check whether the output directory provided by the user is valid
    if not os.path.isdir(args.output):
        print('Invalid Directory: ' + args.output)
        sys.exit()
    else:
        ifargs.output[-1] ! ='/':
            args.output += '/'
Instantiate a generic detector
detector = UniversalDetector()
print(a)print('Encoding (Confidence)'.':'.'File path')
for filePath in args.filePaths:
    Check whether the file path is valid, skip if it is not
    if not os.path.isfile(filePath):
        print('Invalid Path: ' + filePath)
        continue
    # Reset detector
    detector.reset()
    Read files in binary mode
    for each in open(filePath, 'rb') :# Detector reads data
        detector.feed(each)
        # Exit the loop if the test is complete
        if detector.done:
            break
    # Close detector
    detector.close()
    # fetch result
    charEncoding = detector.result['encoding']
    confidence = detector.result['confidence']
    # Print information
    if charEncoding is None:
        charEncoding = 'Unknown'Confidence = 0.99print('{} {: > 12} : {}'.format(charEncoding.rjust(8),
        '('+str(confidence*100)+'%)', filePath))
    ifargs.encoding and charEncoding ! ='Unknown'And the confidence > 0.6:Overwrite the source file if the output directory is not set
        outputPath = args.output + os.path.basename(filePath) if args.output else filePath
        with open(filePath, 'r', encoding = charEncoding, errors = 'replace') as f:
            temp = f.read()
        with open(outputPath, 'w', encoding = args.encoding, errors = 'replace') as f:
            f.write(temp)
Copy the code

The last two scripts are selected from the course “use Python3 to write a series of practical scripts” in the lab building, the course has detailed implementation of these two scripts, interested students can directly go to the lab building to learn!

Synchronous zhihu column: zhuanlan.zhihu.com/p/85728888

1. Resolve the problem of unzip garbled characters in Linux.

2. Count the number of code lines in the current root directory.

3. Scan the current directory and all subdirectories and display the size.

4. Move all files that are created in the source directory more than 240 days ago to the destination directory.

5. Scan the script directory and display the number of different types of scripts.

6. Download Leetcode’s algorithm problems.

7. Convert Markdown to HTML.

8. Text file coding detection and conversion.

Related Posts

MySql Database installation and Use

Some database optimization methods and indicators are discussed

DelayQueue series (3) : Persistence scheme