Useful usage of python.

Advanced Trick

Implement Trie (Prefix Tree)

defaultdict

from collections import defaultdict
from functools import reduce
TrieNode = lambda: defaultdict(TrieNode)

class Trie:
    def __init__(self):
        self.trie = TrieNode()
        
    def insert(self, word):
        reduce(dict.__getitem__, word, self.trie)['end'] = True
        
    def search(self, word):
        return reduce(lambda d,k: d[k] if k in d else TrieNode(), word, self.trie).get('end', False)
        
    def startsWith(self, word):
        return bool(reduce(lambda d,k: d[k] if k in d else TrieNode(), word, self.trie).keys())

dict

class TrieNode(object):
    def __init__(self):
        """
        Initialize your data structure here.
        """
        self.data = {}
        self.is_word = False
  
class Trie(object):
    def __init__(self):
        self.root = TrieNode()
  
    def insert(self, word):
        """
        Inserts a word into the trie.
        :type word: str
        :rtype: void
        """
        node = self.root
        for letter in word:
            child = node.data.get(letter)
            if not child:
                node.data[letter] = TrieNode()
            node = node.data[letter]
        node.is_word = True
  
    def search(self, word):
        """
        Returns if the word is in the trie.
        :type word: str
        :rtype: bool
        """
        node = self.root
        for letter in word:
            node = node.data.get(letter)
            if not node:
                return False
        return node.is_word 
  
    def starts_with(self, prefix):
        """
        Returns if there is any word in the trie
        that starts with the given prefix.
        :type prefix: str
        :rtype: bool
        """
        node = self.root
        for letter in prefix:
            node = node.data.get(letter)
            if not node:
                return False
        return True
  
    def get_start(self, prefix):
        """
        Returns words started with prefix
        :param prefix:
        :return: words (list)
        """
        def _get_key(pre, pre_node):
            words_list = []
            if pre_node.is_word:
                words_list.append(pre)
            for x in pre_node.data.keys():
                words_list.extend(_get_key(pre + str(x), pre_node.data.get(x)))
            return words_list
  
        words = []
        if not self.starts_with(prefix):
            return words
        if self.search(prefix):
            words.append(prefix)
            return words
        node = self.root
        for letter in prefix:
            node = node.data.get(letter)
        return _get_key(prefix, node)

2D list to 1D

1
2
3

matrix = [[1,5,9],[10,11,13],[12,13,15]]
sum(matrix, [])
# [1, 5, 9, 10, 11, 13, 12, 13, 15]

Variable

Swap value

1 2	# python way to swap values a, b = b, a

chain comparison with ops

1 2	b=6 1==b<38 # False

Statement

For else

l = list(range(5))
for e in l:
	if e == 0:
        break
else: # called if for loop does not reach break statement
    print('did not break out of for loop')

List

list to str

l = ["Python", "is", "awesome"]
" ".join(l)

l = list(range(5))
",".join(map(str, l))

2d array transpose

1 2	arr = [['a','b'], ['c','d'],['e','f']] transposed = list(zip(*arr))

most frequent element in a list

a = [1,2,3,1,2,3,2,2,4,5,1]
max(set(a), key=a.count)

""" use Counter from collections"""
from collections import Counter
cnt = Counter(a)
cnt.most_common(3)

copy array

# a fast way of shallow copy of a list
b = a[:]

b = a.copy() # python3 only

# copy nested lists using copy.deepcopy
import copy
l = [[1,2], [3,4]]
l2 = copy.deepcopy(l)

Remove duplicates in list

items = [2,2,2,3,3,4,1]
list(set(items))

""" remove dups and keep order """
items = ['foo', 'bar', 'bar', 'foo']
from collections import OrderedDict
list(OrderedDict.fromkeys(items).keys())

Find index of Min/Max

l = list(range(5))

def minIndex(lst):
	return min(range(len(lst)), key=lst.__getitem__)


def maxIndex(lst):
	return max(range(len(lst)), key=lst.__getitem__)

minIndex(l)
maxIndex(l)

Str

check if strs are permutation

1 2	from collections import Counter Counter(str1) == Counter(str2)

Reverse string

a ='abcdefghijklmnopqrstuvwxyz'
a[::-1]

"".join([ch for ch in reversed(a)])

find index

1
2
3

# check the index of a char, else -1
a ='abcdefghijklmnopqrstuvwxyz'
a.find(a) # 0

Dict

Dict get

1 2	d = {'a': 1, 'b': 2} d.get('c', 3)

sort dict

sorted(d.items(), key=lambda x:x[1])

from operator import itemgetter
sorted(d.items(), key=itemgetter(1))

sorted(d, key=d.get)

merge dict

d1 = {'a': 1}
d2 = {'b': 2}

{**d1, **d2}

dict(d1.items() | d2.items())

d1.update(d2)

**kwargs

dictionary.pop(keyname, defaultvalue)

def fn(*args, **kwargs):
    v1 = kwargs.pop("k1", 'default1')
    v2 = kwargs.pop("k2", 'default2')
    v3 = kwargs.pop("k3", 'default3')

Builtin functions

Binary, Octal, Hexadecimal integers

# base 10 (decimal)
dec = 5

"""
base 2 (binary)
"""
# decimal to binary (base 2)
bin_dec = bin(dec) # str: '0b101'
# binary to decimal
dec = int(bin_dec, 2) # int: 5

"""
base 8 (octal)
"""
# decimal to octal (base 8)
oct_dec = oct(dec) # str: '05'
# octal to decimal
dec = int(oct_dec, 8) # int: 5

"""
base 16 (hexadecimal)
"""
# decimal to hexadecimal (base 16)
hex_dec = hex(dec) # str: '0x5'
# hexadecimal to decimal
dec = int(hex_dec, 16) # int: 5

Bit-wise Op

&: binary AND. Op copies a bit to the result if it exists in both operands.
|: binary OR. It copies a bit to the result if it exists in either operand.
^: binary XOR. It copies a bit if it is set in one operand but not both.
~: binary Ones Complement. It is unary and has the effect of ‘flipping’ bits.
<<: binary left shift.
>>: binary right shift.

Array index

1 2	~i # -i-1 arr[~i] # is equal to arr[-i-1]

`\r` application

\r puts the location of the cursor at the beginning of the current row.
\b backtracks one bit of the cursor.

Count down

import time

count_down = 10  # count down total time ( seconds )
for i in range(count_down, 0, -1):
    msg = u"\r System will exit in " + str(i) + "secs"
    print(msg, end="")
    time.sleep(1)
end_msg = "end" + "  "*(len(msg)-len("end"))  # cover all the console output with whitespace 
print(u"\r"+end_msg)

Progress bar

import time

count_down = 10  
interval = 1  
for i in range(0, int(count_down/interval)+1):
    print("\r"+"▇"*i+" "+str(i*10)+"%", end="")
    time.sleep(interval)
print("\n Finished")

Loading

import time

count_down = 10 
interval = 0.25  
for i in range(0, int(count_down/interval)):
    ch_list = ["\\", "|", "/", "-"]
    index = i % 4
    msg = "\r Program is running " + ch_list[index]
    print(msg, end="")
    time.sleep(interval)
print(u"\r Finished" + "  "*len(msg))

Functional programming

Advanced function

`map`

def f(x):
	return x*x
    
r = map(f, [1,2,3,4]) # => Iterator
list(r) # [1,4,9,16]

`reduce`

1	reduce(f, [x1,x2,x3]) <=> f(f(x1, x2), x3)

Application: str2int

from functools import reduce

DIGITS = {'0':0, '1':1, '2':2, '3':3, '4':4, '5':5, '6':6, '7':7, '8':8, '9':9, '0':0}

def char2num(s):
	return DIGIT[s]
    
def str2int(s):
	return reduce(lambda x,y: x*10+y, map(char2sum, s))

`filter`

filter filters out sequence by applying on each element and selecting by return value (True means keep that element)

Filter out even numbers:

def is_odd(n):
	return n%2 == 1

list(filter(is_odd, [1,2,3,4,5]))
# [1,3,5]

Get the prime number

def _odd_iter():
	n=1
    while True:
    	n += 2
        yield n

def _not_divisible(n):
	return lambda x: x%n > 0

def primes():
	yield 2
    it = _odd_iter()
    while True:
    	n = next(it)
        yield n
        it = filter(_not_divisible(n), it)

for n in primes():
	if n<1000:
    	print(n)
    else:
    	break

`sorted`

1
2
3

sorted(['bob', 'about', 'Zoo', 'Credit'], key=str.lower, reverse=True)
# ['Zoo', 'Credit', 'bob', 'about']

Decorator

Print the call time of a function

import time

def time_metric(func):
	# print the call time and call counts
    count = 0
    def wrapper(*args, **kwargs):
    	nonlocal count
        start_time = time.time()
        data = func(*args, **kwargs)
        time_delta = time.time() - start_time
        count += 1
        print("Call {} times. It costs {} secs".format(count, time_delta))
        return data
    return wrapper

@time_metric
def f():
    time.sleep(0.5)

Deep copy and shallow copy

= operator only creates a new variable that shares the reference of the original object.
Deep copy is a process in which the copy process occurs recursively. It first construct a new collection object and then recursively populating it with copies of the child objects found in the original. Any changes made to a copy of object do not reflect in the original object.
1
2
3
4
5
6
7
import copy

l = [1,2,[3,4],5]
l_deep = copy.deepcopy(l) # deep copy
l[2][0] = 6
print(l) # [1,2,[6,4],5]
print(l_deep) # [1,2,[3,4],5]
Shallow copy constructs a new collection object and then populating it with references to the children objects found in the original. The copying process does not recurse and therefore won’t create copies of the child objects themselves.

import copy

l = [1,2,[3,4],5]
l_deep = copy.copy(l) # shallow copy
l[2][0] = 6 
print(l)      # [1,2,[6,4],5]
print(l_deep) # [1,2,[6,4],5]

multiprocess v.s. threading

multiprocessing

Pros:

Separate memory space
Takes advantages of multiple CPUs and cores
Avoids GIL(Global Interpreter Lock) limitations for cPython
Children processes are interruptible
A must with cPython for CPU-bound processing

Cons:

IPC a little more complicated with more overhead
Larger memory footprint

threading

Pros:

Lightweight -> low memory footprint
Shared memory -> makes access to state from another context easier
Allows you to easily make responsive UIs
cPython C extension modules that properly release the GIL will run in parallel
Great option for I/O-bound applications

Cons:

CPython -> subject to the GIL
Not interruptible/killable

Random

# approach 1
import numpy as np
np.random.choice(items, p)

# approach 2
def random_select(items, p):
    x = random.uniform(0,1)
    cum_p = .0
    for item, prob in zip(items, p):
        cum_p += prob
        if x < cum_p:
            break
    return item

Pathlib

The python std library Pathlib module can be used to manipulate windows paths on a Unix machine(or vice versa). It can also be used to manipulate paths without accessing the os module.

>>> from pathlib import Path
# listing subdirectories
>>> p = Path('.')
>>> [x for x in p.iterdir() if x.is_dir()]

# listing py src files in the directory tree
>>> list(p.glob('**/*.py'))

# navigate inside a directory tree
>>> p = Path('/etc')
>>> q = p / 'init.d' / 'reboot'
>>> q
PosixPath('/etc/init.d/reboot')

# query path properties
>>> q.exists()
True
>>> q.is_dir()
False

# open a file
>>> with q.open() as f: f.readline()

>>> p.name # filename
>>> p.stem # filename without suffix
>>> p.suffix # file suffix
>>> p.parent # the dirname

# create directory
>>> p=Path(r'./a/b/c')
>>> p.mkdir(exist_ok=True) # create if the parent directory exists
## iteratively create the directory
>>> p.mkdir(exist_ok=True, parents=True)

# file details
>>> p.stat() # detail information
>>> p.stat().st_size # file size
>>> p.stat().st_ctime # create time
>>> p.stat().st_mtime # modified time

itertools

itertools.count()

Iteration over infinite numbers

$ import itertools
$ results = []
$ for x in itertools.count(start=0):
$     if <condition>:
$        break
$     results.append(<element>)

itertools.permutations()

itertools.permutations(iterable, r=None)

Return successive $r$ length permutations of elements in the iterable. If None, then $r$ defaults to the length of the iterable and alll possible full-length permutations.
1
2
# permutations('ABCD', 2) --> AB AC AD BA BC BD CA CB CD DA DB DC
# permutations(range(3)) --> 012 021 102 120 201 210

itertools.zip_longest()

1 2	# Similar to zip but return the longest length of inputs with filling default values, whereas zip returns the shortest length itertools.zip_longest(*iterables, fillvalue=None)

File

os

filepath='../test/test/test.txt'
# split filename
os.path.splittext(filepath) # ('../test/test', 'test.txt')
# split extention name
os.path.splitext(filepath) # ('../test/test/test', '.txt')

glob

1 2	# search for files glob.glob('dir/subdir/.txt') # return .txt in the directory 'dir/subdir/'

My utils

echo with colors


def print_format_table():
    """
    prints table of formatted text format options
    """
    for style in range(8):
        for fg in range(30, 38):
            s1 = ''
            for bg in range(40, 48):
                format = ';'.join([str(style), str(fg), str(bg)])
                s1 += '\x1b[%sm %s \x1b[0m' % (format, format)
            print(s1)
    print('\n')


def cyk_print(content, font_color='green', bg_color=None, display='highlight'):
    assert font_color or bg_color or display is not None
    display_map = {'default': 0, "highlight": 1, "underline": 4, "flicker": 5, "reverse": 7, "invisible": 8}
    font_map = {'black': 30, 'red': 31, 'green': 32, 'yellow': 33, 'blue': 34, 'purple': 35, 'yank': 36,
                'white': 37}
    bg_map = {'black': 40, 'red': 41, 'green': 42, 'yellow': 43, 'blue': 44, 'purple': 45, 'yank': 46,
              'white': 47}
    pp = "\033[{}{}{}m{}\033[0m".format(display_map.get(display, 'highlight'),
                                        ";{}".format(font_map.get(font_color, 30)),
                                        ";{}".format(bg_map.get(bg_color, '')), content)
    print(pp)

print name and cost time of a function

def printer(f):
    @wraps(f)
    def wrapped(*args, **kwargs):
        print('-' * 60)
        print("Start to run \033[1;36m{}\033[0m ...".format(f.__name__))
        t1 = time.time()
        r = f(*args, **kwargs)
        print('the result is \033[1;31m{:.4f}\033[0m'.format(r), end=', ')
        print("time cost: {:.5g} \033[1msecs\033[0m".format(time.time() - t1))
        # return r
    return wrapped

@printer
def func(...):
    pass

if __name__ == '__main__':
    func()

Errors

Default argument is mutable

When we assign the default argument values, avoid using mutable object, like list(), dict().

def error_f(item, my_list=[]):
	my_list.append(item)
    print(my_list)
    
error_f('a') ['a']

error_f('b') # ['a', 'b']

error_f('c') # ['a', 'b', 'c']

Python’s default arguments are evaluated once when the function is defined, not each time the function is called (like it is in say, Ruby). This means that if you use a mutable default argument and mutate it, you will and have mutated that object for all future calls to the function as well.
Important warning: The default value is evaluated only once. This makes a difference when the default is a mutable object such as a list, dictionary, or instances of most classes.

WTF

Deleting a list item while iterating

Never change the object you’re iterating over!

l1 = [1,2,3,4]
l2 = [1,2,3,4]
l3 = [1,2,3,4]
l4 = [1,2,3,4]

for idx, item in enumerate(l1):
	del item
   
for idx, item in enumerate(l2):
	l2.remove(item)

# the correct way to change the object you iterate over
for idx, item in enumerate(l3[:]):
	l3.remove(item)

for idx, item in enumerate(l4):
	l4.pop(idx)

>>> l1
[1, 2, 3, 4]
>>> l2
[2, 4]
>>> l3
[]
>>> l4
[2, 4]

Difference between del, remove and pop:

del var_name just removes the binding of the var_name form the local/global namespace
remove removes the fist matching value (raise ValueError if the value is not found)
pop removes the element at a specific index and returns it (raise IndexError if an invalid index)

Why the output is [2,4] ? ^[2]

The list iteration is done index by index. When removing 1 from l1 or l4, the contents of lists are [2,3,4]. The remaining elements are shifted down, i.e. 2 is at index 0, 3 at index 1. Since the next iteration is going to loo at index 1 (element 3), the 2 is skipped entirely.

Run

stderr directly outputs to the screen, whilst the stdout is usually saved in the buffer util full.
python -u can directly print out the stdout without buffers.

1 2	# unbuffered python -u <xxx>.py