-
Notifications
You must be signed in to change notification settings - Fork 5
/
extract_raindrop.py
executable file
·129 lines (107 loc) · 3.82 KB
/
extract_raindrop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python3
"""Process raindrop.io export into format accepted by `extract_dictation.py`."""
__author__ = "Joseph Reagle"
__copyright__ = "Copyright (C) 2009-2023 Joseph Reagle"
__license__ = "GLPv3"
__version__ = "1.0"
# TODO: 2024-10-18 : add DOI ability since some papers are web pages
import argparse # http://docs.python.org/dev/library/argparse.html
import logging as log
import re
import sys
import webbrowser
from pathlib import Path # https://docs.python.org/3/library/pathlib.html
from send2trash import send2trash # type: ignore
import busy
HOME = Path.home()
def main(argv: list[str]) -> argparse.Namespace:
"""Process arguments."""
# https://docs.python.org/3/library/argparse.html
arg_parser = argparse.ArgumentParser(
description="""Format raindrop.io annotations for use with
dictation-extract.py in
https://github.com/reagle/thunderdell
"""
)
arg_parser.add_argument(
"-p",
"--publish",
action="store_true",
default=False,
help="publish to social networks (can also `-p` in editor)",
)
# positional arguments
arg_parser.add_argument("file_names", nargs="+", type=Path, metavar="FILE_NAMES")
# optional arguments
arg_parser.add_argument(
"-L",
"--log-to-file",
action="store_true",
default=False,
help="log to file %(prog)s.log",
)
arg_parser.add_argument(
"-V",
"--verbose",
action="count",
default=0,
help="increase verbosity from critical though error, warning, info, and debug",
)
arg_parser.add_argument("--version", action="version", version="0.1")
args = arg_parser.parse_args(argv)
log_level = (log.CRITICAL) - (args.verbose * 10)
LOG_FORMAT = "%(levelname).4s %(funcName).10s:%(lineno)-4d| %(message)s"
if args.log_to_file:
log.basicConfig(
filename="extract-raindrop.log",
filemode="w",
level=log_level,
format=LOG_FORMAT,
)
else:
log.basicConfig(level=log_level, format=LOG_FORMAT)
return args
def process_files(args: argparse.Namespace, file_paths: list[Path]) -> None:
"""Process files for highlights and annotations using console annotation syntax.
.summary
excerpt
, annotation.
"""
URL_RE = re.compile(r"https?://\S+")
for file_path in file_paths:
log.info(f"{file_path=}")
url_found = False
url = ""
comment = [""] # initial line for summary
for line in file_path.read_text().splitlines():
line = line.strip()
if not url_found:
if match := URL_RE.search(line):
url = match.group()
url_found = True
print(f"URL found in {file_path}: {url}")
continue
if not line:
continue
# lines are presumed to be excerpts, so remove list marker
if line.startswith("- "):
comment.append(line[2:].strip())
# else, mark with comma as it's a user annotation
else:
comment.append(f", {line}")
text = "\n".join(comment)
webbrowser.open(url)
params = {"scheme": "c", "url": url, "comment": ""}
scraper = busy.get_scraper(params["url"].strip(), text)
biblio = scraper.get_biblio()
biblio["tags"] = "misc" # default keyword
del biblio["excerpt"] # no need for auto excerpt
busy.log2mm(args, biblio)
if __name__ == "__main__":
args = main(sys.argv[1:])
log.info("==================================")
log.info(f"{args=}")
process_files(args, args.file_names)
user_input = input("\nTrash processed file? 'y' for yes,\n")
if user_input == "y":
send2trash(args.file_names)