Introduction to Biostatistical Computing PHC 6937

Basics of Python programming

Zhiguang Huo (Caleb)

Monday Oct 17th, 2022

Outlines

Jupyter Notebook

How to start Jupyter Notebook

Recommended approach in this class

How to Run Jupyter Notebook

Jupyter Notebook markdown (1)

# Header 1
## Header 2
### Header 3
#### Header 4
##### Header 5
###### Header 6
Plain text  
End a line with two spaces to start a new paragraph.  
*italics* and _italics_  
**bold** and __bold__  
<span style="color:red">color</span>  
superscript^2^  
~~strikethrough~~  
[link](www.rstudio.com)   

Jupyter Notebook markdown (2)

![](https://caleb-huo.github.io/teaching/2022FALL/logo.png){width=50%}
$A = \pi \times r^{2}$
***

Jupyter Notebook markdown (3)

* unordered list
* item 2
    + sub-item 1
    + sub-item 2
1. ordered list
2. item 2
    + sub-item 1
    + sub-item 2

Three ways to run python code

print("hello")
print("hello")
python hello.py

4 basic data type

type(1)
## <class 'int'>
type(3.14)
## <class 'float'>
type("Hello")
## <class 'str'>
type(True)
## <class 'bool'>
type(False)

Conversion among 4 basic data types

int(1.1)
## 1
str(1.1)
## '1.1'
float("3.15")
## 3.15
int(True)
## 1
bool(0)
## False
str(False)
## 'False'

3 basci python data structure

list

names = ["Alice", "Beth", "Carl"]
len(names)
## 3
names[0]
## 'Alice'
names[-1]
## 'Carl'
names[0] = "Alex"
names
## ['Alex', 'Beth', 'Carl']

Tuple

names = ("Alice", "Beth", "Carl")
names2 = "Alice", "Beth", "Carl"
len(names)
## 3
names[0]
## 'Alice'
names[-1]
## 'Carl'
names[0] = "Alex"
names

list and tuple

a = [3,1]
tuple(a)
## (3, 1)
b = (3,1)
list(b)
## [3, 1]
sorted(a)
## [1, 3]
max(a)
## 3
min(b)
## 1

subsetting of list

data = [[1,2,3], [2,3,4]]
len(data)
## 2
data[1]
## [2, 3, 4]
data[1][1]
## 3

dictionary

phonebook = {"Alice": 2341, 
            "Beth": 4971,
            "Carl": 9401
}
phonebook
## {'Alice': 2341, 'Beth': 4971, 'Carl': 9401}
phonebook["Alice"]
## 2341
items = [("Name","Smith"), ("Age", 44)]
d = dict(items)
d
## {'Name': 'Smith', 'Age': 44}
d = dict(Name="Smith", Age=44)
d
## {'Name': 'Smith', 'Age': 44}

Basic python operators

2*4
## 8
2**4 ## raise to the power
## 16
7//4
## 1
"Hello" + "World"
## 'HelloWorld'
False and True
## False
False or True
## True
a = 10
a = a * 5
print(a)
## 50
a = 10
a *= 5
print(a)
## 50

Basic string operators

a = "greetings"
len(a)
## 9
a[0]
## 'g'
a[len(a) - 1]
## 's'
a[0:4]
## 'gree'
a[4]
## 't'

Basic string operators

a = "Hello"
a = a + "World"
print(a)
## HelloWorld
a = "Hello"
a += "World"
print(a)
## HelloWorld

Basic string operators

a = "greetings"
a[1:]
## 'reetings'
a[:3]
## 'gre'
a[:-1]
## 'greeting'
a[0:7:2] ## step size 2, default is 1 
## 'getn'
a[-1:0:-1]
## 'sgniteer'
a[::-1]
## 'sgniteerg'
b = "Hi"
b * 3
## 'HiHiHi'
name = "Lucas"
b + " " + "Lucas" + ", " + a
## 'Hi Lucas, greetings'
res = b + " " + "Lucas" + "\n" + a
print(res)
## Hi Lucas
## greetings

python comments

1+10 ## here is a comment
## 11
"""
XXX
XXXX
"""
a = """
XXX
XXXX
"""

a
## '\nXXX\nXXXX\n'
print(a)
## 
## XXX
## XXXX

python string formatting

name = 'Alex'
age = 27

print('%s is %d years old' % (name, age))
## Alex is 27 years old
print('{} is {} years old'.format(name, age))
## Alex is 27 years old
print(f'{name} is {age} years old') 
## Alex is 27 years old

We will focus on the f-string in this class

f'{}'

Python f-string expressions

bags = 4
apples_in_bag = 10

print(f'There are total of {bags * apples_in_bag} apples')
## There are total of 40 apples
print(f'There are {bags} bags, and {apples_in_bag} apples in each bag.\nSo there are a total of {bags * apples_in_bag} apples')
## There are 4 bags, and 10 apples in each bag.
## So there are a total of 40 apples

f-string formatting float (precision)

val = 12.3

print(f'{val:.2f}')
## 12.30
print(f'{val:.5f}')
## 12.30000

f-string formatting float (width)

val = 12.3

print(f'{val:2}')
## 12.3
print(f'{val:7}')
##    12.3
print(f'{val:07}')
## 00012.3
for x in range(1, 11):
    print(f'{x:02} {x*x:3} {x*x*x:4}')

f-string formatting float

f'{value:{width}.{precision}}'
f'{5.5:10.3f}'
## '     5.500'
from math import pi ## will introduce more about python module in the fuction lecture
pi
## 3.141592653589793
f'{pi:10.6f}'
## '  3.141593'
f'{pi*100000:,.2f}'

Input function

username = input("What is your name?")
print("Hello " + username)
print(f"Hello {username}")
num1 = input("First number:")
num2 = input("Second number:")
res = int(num1) + int(num2)
print(res)
print(f"{num1} plus {num2} is {res}")

Basic string operators

find

title = "I love introduction to Biostatistical computing!"
title.find("I")
## 0
title.find("love")
## 2
title.find("o")
## 3
title.find("o")
## 3
title.find("o", 4) ## starting searching index is 4
## 11
title.find("XX")
## -1
title.index("love")
title.index("XX")

pattern detection

title = "I love introduction to Biostatistical computing!"
"love" in title
## True
"computing" in title
## True
"XX" in title
## False
title.endswith("computing!")
## True
title.startswith("I love")
## True
title.count("l")
## 2

join

seq = ["1", "2", "3", "4", "5"]
sep = "+"
sep.join(seq)
## '1+2+3+4+5'
"".join(seq)
## '12345'
dirs =( "", "usr", "bin", "env")
"/".join(dirs)
## '/usr/bin/env'
sep = "+"
print("C:" + "\\".join(dirs)) ## single \ has special meaning: treating special symbol as regular symbol 
## C:\usr\bin\env

split

longSeq = "1+2+3+4+5"
longSeq.split("+")
## ['1', '2', '3', '4', '5']
longSeq.split("3")
## ['1+2+', '+4+5']
"Using the default value".split()
## ['Using', 'the', 'default', 'value']

lower, upper, title

sentence = "I like introduction to biostatistical computing!"
sentence.lower()
## 'i like introduction to biostatistical computing!'
sentence.upper()
## 'I LIKE INTRODUCTION TO BIOSTATISTICAL COMPUTING!'
sentence.title()
## 'I Like Introduction To Biostatistical Computing!'
sentence.islower()
sentence.isupper()
sentence.istitle()

strip

a = "   internal   whitespace is kept     "
a.strip()
## 'internal   whitespace is kept'
b = "*** SPAM * for * everyone!!! ***"
b.strip(" *!")
## 'SPAM * for * everyone'
c = "\na\nb\n\n\nc\n\n"
c.strip()
## 'a\nb\n\n\nc'

replace

a = "This is a cat!"
a.replace("This", "That")
## 'That is a cat!'
a.replace("is", "eez")
## 'Theez eez a cat!'

Control flows

name = input("What is your name? ")
if name.endswith("Smith"):
    print("Hello, Mr. Smith")
name = input("What is your name? ")
if name.endswith("Smith"):
    print("Hello, Mr. Smith")
    print("Have a good night")

Indentation

if else elif

num = input("Enter a number: ")
if num > 0:
    print("The number is positive")
else:
    print("The number is non-positive")
num = input("Enter a number: ")
if num > 0:
    print("The number is positive")
elif num < 0:
    print("The number is negative")
else:
    print("The number is zero")

nested if else conditions

name = input("What is your name? ")
if name.endswith("Smith"):
    if name.startswith("Mr."):
        print("Hello, Mr. Smith")
    elif name.startswith("Mrs."):
        print("Hello, Mrs. Smith")
    else:
        print("Hello, Smith")
else:
    print("Hello, Stranger")

if else same line

number = input("Please enter a number: ")
if int(number) % 2 == 0:
    print("even")
else:
    print("odd")
number = input("Please enter a number: ")
print("even") if int(number) % 2 == 0 else print("odd")

True or False conditions

1>2
## False
4 == 5
## False
"ap" in "apple"
## True
"apple" in ["apple", "orange"]
## True
True and False
## False
True or False
## True
not True
## False

True or False conditions

a = 4
b = 6
c = 9

a < b and b < c
## True
a < b < c
## True
a < c > b
## True

match (available for python >= 3.10)

status = 400    

match status:
    case 400:
        print("Bad request")
    case 404:
        print("Not found")
    case 418:
        print("I'm a teapot")
    case _:
        print("Something's wrong with the internet")

for loops

words = ["cat", "dog", "gator"]
for w in words:
     print(w)
## cat
## dog
## gator
words = ["cat", "dog", "gator"]
for w in words:
     print(f"{w} has {len(w)} letters in it.")
## cat has 3 letters in it.
## dog has 3 letters in it.
## gator has 5 letters in it.

range() function

for i in range(3):
    print(i)
## 0
## 1
## 2
list(range(3))
## [0, 1, 2]
list(range(3,7))
## [3, 4, 5, 6]

range() function

list(range(3,8,2))
## [3, 5, 7]
list(range(7,2,-2))
## [7, 5, 3]
words = ["cat", "dog", "gator"]
for i in range(len(words)):
     print(i, words[i])
## 0 cat
## 1 dog
## 2 gator

break

for num in range(1, 10):
  if num % 5 == 0:
    print(f"{num} can be divided by 5")
    break
  print(f"{num} cannot be divided by 5")
## 1 cannot be divided by 5
## 2 cannot be divided by 5
## 3 cannot be divided by 5
## 4 cannot be divided by 5
## 5 can be divided by 5

continue

for num in range(1, 10):
  if num % 5 == 0:
    continue
  print(f"{num} cannot be divided by 5")
## 1 cannot be divided by 5
## 2 cannot be divided by 5
## 3 cannot be divided by 5
## 4 cannot be divided by 5
## 6 cannot be divided by 5
## 7 cannot be divided by 5
## 8 cannot be divided by 5
## 9 cannot be divided by 5

pass

sequence = {'p', 'a', 's', 's'}
for val in sequence:
    pass
a = 33
b = 200

if b > a:
  pass

while loop

num = 1
while num<10:
  if num % 5 == 0:
    print(f"{num} can be divided by 5")
    break
  print(f"{num} cannot be divided by 5")
  num+=1
## 1 cannot be divided by 5
## 2 cannot be divided by 5
## 3 cannot be divided by 5
## 4 cannot be divided by 5
## 5 can be divided by 5
num = 0
while num<10:
  num+=1
  if num % 5 == 0:
    continue
  print(f"{num} cannot be divided by 5")
## 1 cannot be divided by 5
## 2 cannot be divided by 5
## 3 cannot be divided by 5
## 4 cannot be divided by 5
## 6 cannot be divided by 5
## 7 cannot be divided by 5
## 8 cannot be divided by 5
## 9 cannot be divided by 5

file operation (read)

https://caleb-huo.github.io/teaching/data/misc/my_file.txt

file = open("my_file.txt")
contents = file.read()
print(contents)
## Hello, my name is Caleb. Hello World!
## I like computing
file.close()
with open("my_file.txt") as file:
    contents = file.read()
    print(contents)
## Hello, my name is Caleb. Hello World!
## I like computing

file operation (read)

myfile = "my_file.txt"
with open(myfile) as file:
    lines = file.readlines()

for aline in lines:
    print(aline.strip())
## Hello, my name is Caleb. Hello World!
## I like computing

file operation (write)

with open("new_file.txt", mode="w") as file:
    file.write("I like biostatistical computing!")
## 32
with open("new_file.txt", mode="a") as file:
    file.write("We like biostatistical computing!")
## 33

Exceptions

with open("a_file.txt") as file:
    file.read()
fruit_list = ["Apple", "Banana", "Pear"]
fruit_list[3]
text = "abc"
print(text + 5)
raise TypeError("This is an error that I made up!")

Handle exceptions

try:
    file = open("a_file.txt")
    print(1 + "2")
except FileNotFoundError:
    print("Catch FileNotFoundError")
except TypeError as error_message:
    print(f"Here is the error: {error_message}.")
else:
    content = file.read()
    print(content)
finally: ## will happen no matter what happens
    file.close()
    print("File was closed.")
## Catch FileNotFoundError
## File was closed.

datetime

import datetime as dt
now = dt.date.today() ## date only
now.year
## 2022
now.month
## 10
now.day
# now.weekday()
## 17
birthday = dt.date(1995, 7, 31)
age = now - birthday
age.days
## 9940

datetime

now = dt.datetime.now()
now.year
## 2022
now.month
## 10
now.day
## 17
now.hour
## 11
now.minute
## 40
now.second
## 11
now.microsecond
## 431972
now.weekday()
## 0

datetime

now = dt.datetime.now()

print(f'{now:%Y-%m-%d %H:%M}')
## 2022-10-17 11:40

Reference