-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
155 lines (124 loc) · 5 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# -*- coding: utf-8 -*-
"""
Simple windows notepad.exe todo notes parser
- makes use of F5 timestamp in notepad.exe: 23:30 06.02.2016
- aims to convert .txt file to a datastructure of tasks with subtasks/timestamps.
- similar in spirit to http://todotxt.com/
"""
# *** Rules section ***
#Rules for parsing doc2:
# text starting with no offset (at start of string) is title
# four spaces offset is subtask line
# parsing subtasks
# single letter in square brackets anywhere in subtask line is status flag + only one [x] is taken
# one timestamp is 'last checked' date and time
# two timestamps is 'started' and 'ended' date and time
# remaining text after popping out status and timestamp(s) is 'desc'
from datetime import datetime
import re
def ts_to_datetime(timestamp):
"""Converts notepad.exe timestamp to datetime instance"""
return datetime.strptime(timestamp.strip(), '%H:%M %d.%m.%Y')
# catches one letter inside brackets, like '[d]'
STATUS_PAT = re.compile(r'\s*\[\s*(\w)\s*\]\s*')
# catches notepad.exe F5 timestamp
DATETIME_PAT = re.compile(r'\s*([012]?\d:\d\d [0123]\d\.[01]\d\.\d\d\d\d)\s*')
def get_status_letter(string, status_re = STATUS_PAT):
"""Return a letter inside first brackets"""
# will skip any '[x]' in '[y] [x] [x]' after first occurence of '[y]'
status_list = status_re.findall(string)
if len(status_list) > 0:
return status_list[0]
def get_datetime_list(string, datetime_re = DATETIME_PAT):
return [ts_to_datetime(ts) for ts in datetime_re.findall(string)]
def get_description(s, status_re = STATUS_PAT, datetime_re = DATETIME_PAT):
s = datetime_re.sub(' ', s)
s = status_re.sub(' ', s)
return s.strip()
def parse_subtask(str_):
result = {
'status': None,
'last checked': None,
'started': None,
'ended': None,
'desc': ''
}
# get status letter like 's' from '[s]'
result['status'] = get_status_letter(str_)
# get timestamps, allocate to dictionary
datetime_list = get_datetime_list(str_)
if len(datetime_list) == 1:
result['last checked'] = datetime_list[0]
elif len(datetime_list) == 2:
result['started'] = datetime_list[0]
result['ended'] = datetime_list[1]
else:
# not date provided
pass
# get test line description for subtask
result['desc'] = get_description(str_)
return result
doc2 = """worktitle
[s] 23:30 06.02.2016 subtask description 3
[s] 23:30 06.02.2016 subtask description 3
worktitle2
[s] 23:30 06.02.2016 subtask description 3
[s] 23:30 06.02.2016 subtask description 3
"""
ds = {
'status': 's',
'last checked': datetime(2016, 2, 6, 23, 30),
'started': None,
'ended': None,
'desc': 'subtask description 3'
}
def is_task(s):
return not s.startswith((' ', '\t'))
def is_subtask(s):
return s.startswith((' ', '\t'))
def filled_lines(doc):
for line in doc.splitlines(False):
if len(line.strip()) > 0:
yield line
# question: что делает filter
# что такое False в .splitlines(False)
# чем это лучше doc.split('\n')
#
# answer: filter в данном случае отбросит все пустые строки.
# А вообще фильтр filter(ff, collection) оставляет только
# те элементы коллекции на которых функция ff принимает значение True.
# Если ff это None, то используется id функция, возвращающая сам объект.
# Такое использование не очень то наглядно, но вполне общепринято,
# насколько я могу судить.
#
# False означает, что в конце разбитых строчек \n не нужен
#
# splitlines разобъет строчки и в случае использования в тексте
# Windows переносов строк ('\r\n')
# return filter(None, doc.splitlines(False))
assert list(not_vacuous_lines(doc2)) == filter(None, doc2.splitlines(False))
def parse_tasks(doc):
tasks = []
for line in not_vacuous_lines(doc):
if is_task(line):
# note: line.strip() may be substituted with parse_subtask(line), but different test is needed
tasks.append({'title': line.strip(),
'subtasks': []})
elif is_subtask(line):
# add to previous element in list
tasks[-1]['subtasks'].append(parse_subtask(line))
return tasks
z = parse_tasks(doc2)
ref = [{'title':'worktitle', 'subtasks':[ds,ds]},
{'title':'worktitle2', 'subtasks':[ds,ds]}]
assert z == ref
# alternative rules:
#
# timestamp
# some work
# empty line - do nothing
#
# workflow:
# reorder and overwrite
# file statistics
#