-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsession.py
92 lines (86 loc) · 2.71 KB
/
session.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# coding: utf-8
input = list(range(6))
input
doubled = [x * 2 for x in input]
doubled
summed = sum(doubled)
summed
input = range(6)
input
doubled = (x * 2 for x in input)
doubled
summed = sum(doubled)
summed
def verbose_range(n):
print('range started')
for elem in range(n):
print('yielding %i' % n)
yield i
print('range done')
list(range(3))
list(verbose_range(3))
def verbose_range(n):
print('range started')
for elem in range(n):
print('yielding %i' % elem)
yield elem
print('range done')
list(verbose_range(3))
def verbose_double(iterator):
print('double start')
for elem in iterator:
print('multiplying %i by 2 to get %i' % (elem, elem * 2))
yield elem * 2
print('doubled done')
list(doubled(range(3))
)
list(verbose_doubled(verbose_range(3)))
list(verbose_double(verbose_range(3)))
def verbose_sum(iterator):
print('sum started')
acc = 0
for elem in iterator:
print('adding %i to %i to get %i' % (elem, acc, acc + elem))
acc += elem
print('sum done')
return acc
verbose_sum(verbose_double(verbose_range(5)))
import toolz as tz
tz.pipe(5, verbose_range, verbose_double, verbose_sum)
def double(x): return x * 2
from toolz import curried
list(map(double, [0, 1, 2]))
double_all = curried.map(double)
list(double_all([0, 1, 2]))
from sklearn import decomposition
import numpy as np
def streaming_pca(samples, n_components=2, batch_size=50):
ipca = decomposition.IncrementalPCA(n_components=n_components,
batch_size=batch_size)
_ = list(tz.pipe(samples, curried.partition(batch_size),
curried.map(np.array),
curried.map(ipca.partial_fit)))
return ipca
def array_from_txt(line):
return np.array(line.rstrip().split(','), dtype=np.float)
with open('iris.csv') as fin:
pca_obj = tz.pipe(fin, curried.map(array_from_txt), streaming_pca)
with open('iris.csv') as fin:
components = np.squeeze(list(tz.pipe(fin,
curried.map(array_from_txt),
curried.map(pca_obj.transform))))
from matplotlib import pyplot as plt
plt.scatter(*components.T)
type(open('iris.csv').readlines())
import glob
counts = tz.pipe(glob.glob('sample.fasta'), curried.map(open),
tz.concat, curried.filter(lambda x: not x.startswith('>')),
curried.interpose('$'), tz.concat,
curried.sliding_window(6), curried.map(''.join),
curried.filter(lambda x: '$' not in x),
tz.frequencies)
for k in counts:
print(k, counts[k])
break
counts_list = list(counts.values())
plt.hist(counts_list, bins=25)