forked from quickwit-oss/tantivy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfooter.rs
232 lines (198 loc) · 8.09 KB
/
footer.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
use std::io;
use std::io::Write;
use common::{BinarySerializable, CountingWriter, DeserializeFrom, FixedSize, HasLen};
use crc32fast::Hasher;
use serde::{Deserialize, Serialize};
use crate::directory::error::Incompatibility;
use crate::directory::{AntiCallToken, FileSlice, TerminatingWrite};
use crate::{Version, INDEX_FORMAT_OLDEST_SUPPORTED_VERSION, INDEX_FORMAT_VERSION};
const FOOTER_MAX_LEN: u32 = 50_000;
/// The magic byte of the footer to identify corruption
/// or an old version of the footer.
const FOOTER_MAGIC_NUMBER: u32 = 1337;
type CrcHashU32 = u32;
/// A Footer is appended to every file
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Footer {
pub version: Version,
pub crc: CrcHashU32,
}
impl Footer {
pub fn new(crc: CrcHashU32) -> Self {
let version = crate::VERSION.clone();
Footer { version, crc }
}
pub fn crc(&self) -> CrcHashU32 {
self.crc
}
pub fn append_footer<W: io::Write>(&self, mut write: &mut W) -> io::Result<()> {
let mut counting_write = CountingWriter::wrap(&mut write);
counting_write.write_all(serde_json::to_string(&self)?.as_ref())?;
let footer_payload_len = counting_write.written_bytes();
BinarySerializable::serialize(&(footer_payload_len as u32), write)?;
BinarySerializable::serialize(&FOOTER_MAGIC_NUMBER, write)?;
Ok(())
}
pub fn extract_footer(file: FileSlice) -> io::Result<(Footer, FileSlice)> {
if file.len() < 4 {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
format!(
"File corrupted. The file is smaller than 4 bytes (len={}).",
file.len()
),
));
}
let footer_metadata_len = <(u32, u32)>::SIZE_IN_BYTES;
let (footer_len, footer_magic_byte): (u32, u32) = file
.slice_from_end(footer_metadata_len)
.read_bytes()?
.as_ref()
.deserialize()?;
if footer_magic_byte != FOOTER_MAGIC_NUMBER {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Footer magic byte mismatch. File corrupted or index was created using old an \
tantivy version which is not supported anymore. Please use tantivy 0.15 or above \
to recreate the index.",
));
}
if footer_len > FOOTER_MAX_LEN {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"Footer seems invalid as it suggests a footer len of {footer_len}. File is \
corrupted, or the index was created with a different & old version of \
tantivy."
),
));
}
let total_footer_size = footer_len as usize + footer_metadata_len;
if file.len() < total_footer_size {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
format!(
"File corrupted. The file is smaller than it's footer bytes \
(len={total_footer_size})."
),
));
}
let footer: Footer =
serde_json::from_slice(&file.read_bytes_slice(
file.len() - total_footer_size..file.len() - footer_metadata_len,
)?)?;
let body = file.slice_to(file.len() - total_footer_size);
Ok((footer, body))
}
/// Confirms that the index will be read correctly by this version of tantivy
/// Has to be called after `extract_footer` to make sure it's not accessing uninitialised memory
pub fn is_compatible(&self) -> Result<(), Incompatibility> {
const SUPPORTED_INDEX_FORMAT_VERSION_RANGE: std::ops::RangeInclusive<u32> =
INDEX_FORMAT_OLDEST_SUPPORTED_VERSION..=INDEX_FORMAT_VERSION;
let library_version = crate::version();
if !SUPPORTED_INDEX_FORMAT_VERSION_RANGE.contains(&self.version.index_format_version) {
return Err(Incompatibility::IndexMismatch {
library_version: library_version.clone(),
index_version: self.version.clone(),
});
}
Ok(())
}
}
pub(crate) struct FooterProxy<W: TerminatingWrite> {
/// always Some except after terminate call
hasher: Option<Hasher>,
/// always Some except after terminate call
writer: Option<W>,
}
impl<W: TerminatingWrite> FooterProxy<W> {
pub fn new(writer: W) -> Self {
FooterProxy {
hasher: Some(Hasher::new()),
writer: Some(writer),
}
}
}
impl<W: TerminatingWrite> Write for FooterProxy<W> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
let count = self.writer.as_mut().unwrap().write(buf)?;
self.hasher.as_mut().unwrap().update(&buf[..count]);
Ok(count)
}
fn flush(&mut self) -> io::Result<()> {
self.writer.as_mut().unwrap().flush()
}
}
impl<W: TerminatingWrite> TerminatingWrite for FooterProxy<W> {
fn terminate_ref(&mut self, _: AntiCallToken) -> io::Result<()> {
let crc32 = self.hasher.take().unwrap().finalize();
let footer = Footer::new(crc32);
let mut writer = self.writer.take().unwrap();
footer.append_footer(&mut writer)?;
writer.terminate()
}
}
#[cfg(test)]
mod tests {
use std::io;
use std::sync::Arc;
use common::BinarySerializable;
use crate::directory::footer::{Footer, FOOTER_MAGIC_NUMBER};
use crate::directory::{FileSlice, OwnedBytes};
#[test]
fn test_deserialize_footer() {
let mut buf: Vec<u8> = vec![];
let footer = Footer::new(123);
footer.append_footer(&mut buf).unwrap();
let owned_bytes = OwnedBytes::new(buf);
let fileslice = FileSlice::new(Arc::new(owned_bytes));
let (footer_deser, _body) = Footer::extract_footer(fileslice).unwrap();
assert_eq!(footer_deser.crc(), footer.crc());
}
#[test]
fn test_deserialize_footer_missing_magic_byte() {
let mut buf: Vec<u8> = vec![];
BinarySerializable::serialize(&0_u32, &mut buf).unwrap();
let wrong_magic_byte: u32 = 5555;
BinarySerializable::serialize(&wrong_magic_byte, &mut buf).unwrap();
let owned_bytes = OwnedBytes::new(buf);
let fileslice = FileSlice::new(Arc::new(owned_bytes));
let err = Footer::extract_footer(fileslice).unwrap_err();
assert_eq!(
err.to_string(),
"Footer magic byte mismatch. File corrupted or index was created using old an tantivy \
version which is not supported anymore. Please use tantivy 0.15 or above to recreate \
the index."
);
}
#[test]
fn test_deserialize_footer_wrong_filesize() {
let mut buf: Vec<u8> = vec![];
BinarySerializable::serialize(&100_u32, &mut buf).unwrap();
BinarySerializable::serialize(&FOOTER_MAGIC_NUMBER, &mut buf).unwrap();
let owned_bytes = OwnedBytes::new(buf);
let fileslice = FileSlice::new(Arc::new(owned_bytes));
let err = Footer::extract_footer(fileslice).unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::UnexpectedEof);
assert_eq!(
err.to_string(),
"File corrupted. The file is smaller than it\'s footer bytes (len=108)."
);
}
#[test]
fn test_deserialize_too_large_footer() {
let mut buf: Vec<u8> = vec![];
let footer_length = super::FOOTER_MAX_LEN + 1;
BinarySerializable::serialize(&footer_length, &mut buf).unwrap();
BinarySerializable::serialize(&FOOTER_MAGIC_NUMBER, &mut buf).unwrap();
let owned_bytes = OwnedBytes::new(buf);
let fileslice = FileSlice::new(Arc::new(owned_bytes));
let err = Footer::extract_footer(fileslice).unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::InvalidData);
assert_eq!(
err.to_string(),
"Footer seems invalid as it suggests a footer len of 50001. File is corrupted, or the \
index was created with a different & old version of tantivy."
);
}
}