Introduction to Biostatistical Computing PHC 6937

Python data structure

Zhiguang Huo (Caleb)

Wednesday Oct 26th, 2022

Outlines

list: creation and assignment

list("Hello")
## ['H', 'e', 'l', 'l', 'o']
x = [1, 1, 1]
x
## [1, 1, 1]
x = [1, 2, 3]
x[1] = 0
x
## [1, 0, 3]

list: deletion and slice assignment

names = ["Alice", "Beth", "Carl", "Dan", "Emily"]
names
## ['Alice', 'Beth', 'Carl', 'Dan', 'Emily']
del names[2]
names
## ['Alice', 'Beth', 'Dan', 'Emily']
names = list("Lucas")
names[3:] = list("ky")
names
## ['L', 'u', 'c', 'k', 'y']
"".join(names)
## 'Lucky'
names = list("Lucas")
names[1:] = list("emonade")
"".join(names)
## 'Lemonade'
numbers = [1, 5]
numbers[1:1] = [2, 3, 4]
numbers
## [1, 2, 3, 4, 5]
numbers = list(range(1,6))
numbers
## [1, 2, 3, 4, 5]
numbers[1:4] = []
numbers
## [1, 5]

list: append and count

alist = [0,1,2]
alist.append(3)
alist
## [0, 1, 2, 3]
asentence = "to be or not to be"
alist = asentence.split()
alist.count("to")
## 2
x = [[1,2], 1, 2, 1, [2, 1, [1,2]]]
x.count(1)
## 2
x.count([1,2])
## 1

list: extend

a = [0,1,2]; b = [3,4,5]
a.extend(b)
a
## [0, 1, 2, 3, 4, 5]
a = [0,1,2]; b = [3,4,5]
a + b
## [0, 1, 2, 3, 4, 5]
a
## [0, 1, 2]
a = [0,1,2]; b = [3,4,5]
a[len(a):] = b
a
## [0, 1, 2, 3, 4, 5]

list: index

asentence = "to be or not to be"
alist = asentence.split()
alist
## ['to', 'be', 'or', 'not', 'to', 'be']
alist.index("to")
## 0
alist.index("not")
## 3
alist[3]
## 'not'
alist.index("XX")

list: insert

alist = [1,2,3,5,6]
alist.insert(3, "four")
alist
## [1, 2, 3, 'four', 5, 6]
alist = [1,2,3,5,6]
alist[3:3] = ["four"]
alist
## [1, 2, 3, 'four', 5, 6]

list: pop

x = list(range(10))
x.pop()
## 9
x
## [0, 1, 2, 3, 4, 5, 6, 7, 8]
x.pop()
## 8
x
## [0, 1, 2, 3, 4, 5, 6, 7]

list: remove

asentence = "to be or not to be"
alist = asentence.split()
alist
## ['to', 'be', 'or', 'not', 'to', 'be']
alist.remove("to")
alist
## ['be', 'or', 'not', 'to', 'be']
alist.remove("XX")

list: reverse and sort

x = ["a", "b", "c"]
x.reverse()
x
## ['c', 'b', 'a']
x = [5, 3, 4]
x.sort() 
x
## [3, 4, 5]
y = ["b", "c", "a"]
y.sort()
y
## ['a', 'b', 'c']
x = [5, 3, 4]
y = x.sort() 
print(y)
## None
x = [5, 3, 4]
y = sorted(x) 
print(y)
## [3, 4, 5]

list: sort

x = [5, 3, 4]
y = x ## x and y are pointing to the same list
y.sort() 

print(x)
## [3, 4, 5]
print(y)
## [3, 4, 5]
x = [5, 3, 4]
y = x[:] ## y is a slice assignment of x, thus a new variable
y.sort() 

print(x)
## [5, 3, 4]
print(y)
## [3, 4, 5]

list: sort

x = ["aaa", "bb", "cccc"]
x.sort(key = len) 
x
## ['bb', 'aaa', 'cccc']
x = [5, 3, 4]
x.sort(reverse = True) 
print(x)
## [5, 4, 3]

dictionary: basic operator

phonebook = {"Alice": 2341, 
            "Beth": 4971,
            "Carl": 9401
}
phonebook
## {'Alice': 2341, 'Beth': 4971, 'Carl': 9401}
len(phonebook)
## 3
phonebook["Beth"]
## 4971

dictionary: update and delete

phonebook["Alice"] = 1358
phonebook
## {'Alice': 1358, 'Beth': 4971, 'Carl': 9401}
adict = {"Alice": 9572}
phonebook.update(adict)
phonebook
## {'Alice': 9572, 'Beth': 4971, 'Carl': 9401}
del phonebook["Carl"]
"Beth" in phonebook
## True

dictionary: clear

d = {}
d['name'] = "Amy"
d['age'] = 24
d
## {'name': 'Amy', 'age': 24}
d.clear()
d
## {}

why clear is useful

x = {}
y = x
x['key'] = 'value'
y
## {'key': 'value'}
x = {} ## associate x with an empty dictionary
y ## y points to the original dictionary
## {'key': 'value'}
x = {}
y = x
x['key'] = 'value'
y
## {'key': 'value'}
x.clear() ## clear the original dictionary
y ## y still points to the original dictionary
## {}

copy

d = {}
d['username'] = "admin"
d['machines'] = ["foo", "bar"]
d
## {'username': 'admin', 'machines': ['foo', 'bar']}
c = d.copy()
c['username'] = "Alex" ## replacement will not change the original dictionary
print(c)
## {'username': 'Alex', 'machines': ['foo', 'bar']}
print(d)
## {'username': 'admin', 'machines': ['foo', 'bar']}
c['machines'].remove("bar") ## modification will change the original dictionary
print(c)
## {'username': 'Alex', 'machines': ['foo']}
print(d)
## {'username': 'admin', 'machines': ['foo']}

copy

from copy import deepcopy

d = {}
d['username'] = "admin"
d['machines'] = ["foo", "bar"]
d
## {'username': 'admin', 'machines': ['foo', 'bar']}
c = d.copy()
dc = deepcopy(d)
d['machines'].remove("bar") 
print(c)
## {'username': 'admin', 'machines': ['foo']}
print(dc)
## {'username': 'admin', 'machines': ['foo', 'bar']}

dictionary initialization: fromkeys

{}.fromkeys(["name", "age"])
## {'name': None, 'age': None}
dict.fromkeys(["name", "age"])
## {'name': None, 'age': None}
dict.fromkeys(["name", "age"], "unknown")
## {'name': 'unknown', 'age': 'unknown'}

dictionary: get

d = {"name": "Amy", "age": 24}
d["name"]
## 'Amy'
d.get("name")
## 'Amy'
d["XX"]
d.get("XX")
d.get("XX", "No exist") ## set your own return value for get

dictionary: items

phonebook = {"Alice": 2341, 
            "Beth": 4971,
            "Carl": 9401
}
phonebook
## {'Alice': 2341, 'Beth': 4971, 'Carl': 9401}
phonebook.items()
## dict_items([('Alice', 2341), ('Beth', 4971), ('Carl', 9401)])
list(phonebook.items())
## [('Alice', 2341), ('Beth', 4971), ('Carl', 9401)]

dictionary: loops

it = phonebook.items()
for key, value in it:
    print(key +  "--> " + str(value))
## Alice--> 2341
## Beth--> 4971
## Carl--> 9401
it = phonebook.items()
for _, value in it:
    print(str(value))
## 2341
## 4971
## 9401
for key in phonebook:
    print(key +  "--> " + str(phonebook[key]))
## Alice--> 2341
## Beth--> 4971
## Carl--> 9401
phonebook.values()
## dict_values([2341, 4971, 9401])
list(phonebook.values())
## [2341, 4971, 9401]
for i in phonebook.values():
    print(i)
## 2341
## 4971
## 9401

dictionary: list comprehension

names = ["Amy", "Beth", "Carl", "Dan", "Emily", "Frank"]

import random

students_scores = {name: random.randint(0, 100) for name in names}

passed_students = {key: value for (key, value) in students_scores.items() if value > 60}

passed_students
## {'Frank': 93}

dictionary: pop and popitem

phonebook = {"Alice": 2341, 
            "Beth": 4971,
            "Carl": 9401
}
phonebook.pop("Alice")
## 2341
phonebook
## {'Beth': 4971, 'Carl': 9401}
phonebook = {"Alice": 2341, 
            "Beth": 4971,
            "Carl": 9401
}
phonebook.popitem()
## ('Carl', 9401)
phonebook
## {'Alice': 2341, 'Beth': 4971}

tuple: review basics

atuple = (0,1,2)
atuple += (3,4,5)
atuple
## (0, 1, 2, 3, 4, 5)
btuple = (0, 1, 1, ['I', 'like',  'python'])
btuple[3][0] = 'You'
print(btuple)
## (0, 1, 1, ['You', 'like', 'python'])
print(btuple.count(1))
## 2
print(btuple.index(['You', "like", 'python']))
## 3

set: create and add

this_set = {1, 1, 2, 3, 3, 3, 4} #create set
print(this_set)
## {1, 2, 3, 4}
this_set = {0, 1, 2} #create set
this_set.add(3)
this_set
## {0, 1, 2, 3}

set: operators

set_a = {"a", "b", "c", "d"}
set_b = {"c", "d", "e", "f"}
print(set_a.union(set_b), '----------', set_a | set_b)
## {'e', 'd', 'b', 'c', 'a', 'f'} ---------- {'e', 'd', 'b', 'c', 'a', 'f'}
print(set_a.intersection(set_b), '----------', set_a & set_b)
## {'d', 'c'} ---------- {'d', 'c'}
print(set_a.difference(set_b), '----------', set_a - set_b)
## {'a', 'b'} ---------- {'a', 'b'}
print(set_a.symmetric_difference(set_b), '----------', set_a ^ set_b)
## {'e', 'b', 'a', 'f'} ---------- {'e', 'b', 'a', 'f'}
set_a.clear()
set_a
## set()

Reference