forked from lucasrla/remarks
-
Notifications
You must be signed in to change notification settings - Fork 3
/
test_initial.py
209 lines (176 loc) · 7.73 KB
/
test_initial.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
from enum import Enum
import pytest
from fitz import fitz
from parsita import lit, reg, rep, Parser, opt, until, Failure
from returns.result import Success
from remarks.metadata import ReMarkableAnnotationsFileHeaderVersion
from test_support import with_remarks
from pdf_test_support import is_valid_pdf, assert_scrybble_warning_appears_on_page
UNKNOWN = None
"""
This value is meant to be used in metadata objects, when data field value is not yet manually reviewed.
"""
class ReMarkableNotebookType(Enum):
NOTEBOOK = "Notebook"
EBOOK = "EBook"
PDF = "PDF"
# A metadata object MUST be entirely hand-crafted and hand-checked
gosper_notebook = {
# ReMarkable document name
"notebook_name": "Gosper",
# Where the ReMarkable document can be found
".rmn_source": "tests/in/v2_notebook_complex",
"notebook_type": ReMarkableNotebookType.NOTEBOOK,
# The amount of pages that are coming from a source PDF
"pdf_pages": 0,
".rm_files": [
{
".rm_file_version": ReMarkableAnnotationsFileHeaderVersion.V3,
"output_document_position": 0
}, {
".rm_file_version": ReMarkableAnnotationsFileHeaderVersion.V3,
"output_document_position": 1
}, {
".rm_file_version": ReMarkableAnnotationsFileHeaderVersion.V3,
"output_document_position": 2
}
]
}
on_computable_numbers = {
# ReMarkable document name
"notebook_name": "1936 On Computable Numbers, with an Application to the Entscheidungsproblem - A. M. Turing",
# Where the ReMarkable document can be found
".rmn_source": "tests/in/on-computable-numbers",
"notebook_type": ReMarkableNotebookType.PDF,
# The amount of pages that are coming from a source PDF
"pdf_pages": 36,
".rm_files": [
{
".rm_file_version": ReMarkableAnnotationsFileHeaderVersion.V5,
"output_document_position": 0
},
{
".rm_file_version": ReMarkableAnnotationsFileHeaderVersion.V5,
"output_document_position": 1
},
{
".rm_file_version": ReMarkableAnnotationsFileHeaderVersion.V5,
"output_document_position": 27
}
]
}
empty_document = {
"notebook_name": "Empty",
"description": """
This document is empty, and has only one page. It contains the handwritten annotation with the word "empty"
""",
".rmn_source": "tests/in/empty_document.rmn",
"pdf_pages": 1,
".rm_files": [
{
".rm_file_version": ReMarkableAnnotationsFileHeaderVersion.V6,
"output_document_position": 0
}
]
}
r"""
_____ _____ ______
| __ \| __ \| ____|
| |__) | | | | |__
| ___/| | | | __|
| | | |__| | |
|_| |_____/|_|
"""
@with_remarks(on_computable_numbers['.rmn_source'])
def test_v5_document():
on_computable_numbers_rmc = fitz.open(f"tests/out/{on_computable_numbers['notebook_name']} _rmc.pdf")
assert is_valid_pdf(on_computable_numbers_rmc)
assert on_computable_numbers_rmc.page_count == on_computable_numbers['pdf_pages']
# There should be a warning, since v5 is not yet supported by the rmc-renderer
assert_scrybble_warning_appears_on_page(on_computable_numbers_rmc, on_computable_numbers['.rm_files'][0]['output_document_position'])
assert_scrybble_warning_appears_on_page(on_computable_numbers_rmc, on_computable_numbers['.rm_files'][1]['output_document_position'])
assert_scrybble_warning_appears_on_page(on_computable_numbers_rmc, on_computable_numbers['.rm_files'][2]['output_document_position'])
@with_remarks(empty_document[".rmn_source"])
def test_supports_rmn_notebook_as_input():
empty_document_rmc = fitz.open(f"tests/out/{empty_document['notebook_name']} _rmc.pdf")
assert is_valid_pdf(empty_document_rmc)
@with_remarks(gosper_notebook['.rmn_source'])
def test_pdf_output():
gosper_rmc = fitz.open(f"tests/out/{gosper_notebook['notebook_name']} _rmc.pdf")
assert is_valid_pdf(gosper_rmc)
assert gosper_rmc.page_count == len(gosper_notebook[".rm_files"])
# There should be a warning, since v3 is not yet supported by the rmc-renderer
assert_scrybble_warning_appears_on_page(gosper_rmc, gosper_notebook['.rm_files'][0]['output_document_position'])
assert_scrybble_warning_appears_on_page(gosper_rmc, gosper_notebook['.rm_files'][1]['output_document_position'])
assert_scrybble_warning_appears_on_page(gosper_rmc, gosper_notebook['.rm_files'][2]['output_document_position'])
r"""
__ __ _ _
| \/ | | | | |
| \ / | __ _ _ __| | ____| | _____ ___ __
| |\/| |/ _` | '__| |/ / _` |/ _ \ \ /\ / / '_ \
| | | | (_| | | | < (_| | (_) \ V V /| | | |
|_| |_|\__,_|_| |_|\_\__,_|\___/ \_/\_/ |_| |_|
Lessons about parsita.
1. When invoking a parser, you _must_ consume all the tokens until the EOD or you will get a failure
You can do this with
`{...} << whatever`
2. When you want to extract _one_ value out of a big text. You can say the following:
parser_that_must_exist_around_it >> parser_that_follows >> another_parser << the_parser_you_care_about >> after_the_parser_you_care_about
So:
`no >> yes << no` => `Success<yes>`
3. Lambdas are evil. Do not use lambdas to create abstractions.
While it may seem attractive to write a lambda to express a common pattern, this is not a good idea.
The operators in parsita have specific meaning, and parsita is a language expressed with operators.
When you write a function, the result of the operator is lost.
"""
def assert_parser_succeeds(parser: Parser, input_string: str, expected_output=None):
result = parser.parse(input_string)
match result:
case Success(value):
output = value
if expected_output:
assert expected_output == output
case Failure(error):
raise error
assert type(result) is Success, result.failure()
any_char = reg(r'.') | lit("\n")
whatever = rep(any_char)
newline = lit('\n')
to_newline = reg(r'[^\n]+')
obsidian_tag = reg(r"#([a-z/])+")
frontmatter = opt(
lit('---') >> newline >>
lit("tags") >> lit(":\n") >> lit("- ") >> lit("'") >> obsidian_tag << lit("'") << rep(newline) <<
lit("---") << rep(newline)
)
autogeneration_warning = lit("""> [!WARNING] **Do not modify** this file
> This file is automatically generated by Scrybble and will be overwritten whenever this file in synchronized.
> Treat it as a reference.""")
h = lambda n, c: lit(n + " ") >> c
@with_remarks("tests/in/highlighter-test")
@pytest.mark.markdown
def test_generated_markdown_has_autogeneration_warning():
has_warning = (until(autogeneration_warning) << autogeneration_warning >> whatever)
with open("tests/out/docsfordevelopers _obsidian.md") as f:
assert_parser_succeeds(has_warning, f.read())
@with_remarks("tests/in/v3_markdown_tags")
@pytest.mark.markdown
def test_yaml_frontmatter_is_valid():
with open('tests/out/tags test _obsidian.md') as f:
content = f.read()
assert_parser_succeeds(frontmatter << whatever, content, ["#remarkable/obsidian"])
# @with_remarks("tests/in/v3_markdown_tags")
# @with_remarks("tests/in/highlighter-test")
# @pytest.mark.markdown
# def test_generated_markdown_heading_is_positioned_correctly():
# rmdoc_title = h("#", to_newline)
#
# with open("tests/out/docsfordevelopers _obsidian.md") as f:
# content = f.read()
# assert_parser_succeeds(frontmatter >> rmdoc_title << whatever, content, "docsfordevelopers")
# with open("tests/out/tags test _obsidian.md") as f:
# content = f.read()
# assert_parser_succeeds(frontmatter >> rmdoc_title << whatever, content, "tags test")
# @with_remarks("tests/in/v3_typed_text")
# def test_something():
# raise Exception("hi")