generated from fastai/fast_template
-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathreferences.bib
458 lines (458 loc) · 19.2 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
@article{clip,
author = {Alec Radford and
Jong Wook Kim and
Chris Hallacy and
Aditya Ramesh and
Gabriel Goh and
Sandhini Agarwal and
Girish Sastry and
Amanda Askell and
Pamela Mishkin and
Jack Clark and
Gretchen Krueger and
Ilya Sutskever},
title = {Learning Transferable Visual Models From Natural Language Supervision},
journal = {CoRR},
volume = {abs/2103.00020},
year = {2021},
url = {https://arxiv.org/abs/2103.00020},
eprinttype = {arXiv},
eprint = {2103.00020},
timestamp = {Thu, 04 Mar 2021 17:00:40 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2103-00020.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
},
@software{openclip,
author = {Ilharco, Gabriel and
Wortsman, Mitchell and
Wightman, Ross and
Gordon, Cade and
Carlini, Nicholas and
Taori, Rohan and
Dave, Achal and
Shankar, Vaishaal and
Namkoong, Hongseok and
Miller, John and
Hajishirzi, Hannaneh and
Farhadi, Ali and
Schmidt, Ludwig},
title = {OpenCLIP},
month = jul,
year = 2021,
note = {If you use this software, please cite it as below.},
publisher = {Zenodo},
version = {0.1},
doi = {10.5281/zenodo.5143773},
url = {https://doi.org/10.5281/zenodo.5143773}
},
@article{align,
author = {Chao Jia and
Yinfei Yang and
Ye Xia and
Yi{-}Ting Chen and
Zarana Parekh and
Hieu Pham and
Quoc V. Le and
Yun{-}Hsuan Sung and
Zhen Li and
Tom Duerig},
title = {Scaling Up Visual and Vision-Language Representation Learning With
Noisy Text Supervision},
journal = {CoRR},
volume = {abs/2102.05918},
year = {2021},
url = {https://arxiv.org/abs/2102.05918},
eprinttype = {arXiv},
eprint = {2102.05918},
timestamp = {Wed, 05 May 2021 14:06:23 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2102-05918.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
},
@article{resnet,
author = {Kaiming He and
Xiangyu Zhang and
Shaoqing Ren and
Jian Sun},
title = {Deep Residual Learning for Image Recognition},
journal = {CoRR},
volume = {abs/1512.03385},
year = {2015},
url = {http://arxiv.org/abs/1512.03385},
eprinttype = {arXiv},
eprint = {1512.03385},
timestamp = {Wed, 25 Jan 2023 11:01:16 +0100},
biburl = {https://dblp.org/rec/journals/corr/HeZRS15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
},
@article{bag_of_tricks,
author = {Tong He and
Zhi Zhang and
Hang Zhang and
Zhongyue Zhang and
Junyuan Xie and
Mu Li},
title = {Bag of Tricks for Image Classification with Convolutional Neural Networks},
journal = {CoRR},
volume = {abs/1812.01187},
year = {2018},
url = {http://arxiv.org/abs/1812.01187},
eprinttype = {arXiv},
eprint = {1812.01187},
timestamp = {Tue, 22 Sep 2020 07:46:20 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1812-01187.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
},
@article{vit,
author = {Alexey Dosovitskiy and
Lucas Beyer and
Alexander Kolesnikov and
Dirk Weissenborn and
Xiaohua Zhai and
Thomas Unterthiner and
Mostafa Dehghani and
Matthias Minderer and
Georg Heigold and
Sylvain Gelly and
Jakob Uszkoreit and
Neil Houlsby},
title = {An Image is Worth 16x16 Words: Transformers for Image Recognition
at Scale},
journal = {CoRR},
volume = {abs/2010.11929},
year = {2020},
url = {https://arxiv.org/abs/2010.11929},
eprinttype = {arXiv},
eprint = {2010.11929},
timestamp = {Fri, 20 Nov 2020 14:04:05 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2010-11929.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
},
@article{blurpool,
author = {Richard Zhang},
title = {Making Convolutional Networks Shift-Invariant Again},
journal = {CoRR},
volume = {abs/1904.11486},
year = {2019},
url = {http://arxiv.org/abs/1904.11486},
eprinttype = {arXiv},
eprint = {1904.11486},
timestamp = {Fri, 12 Aug 2022 13:45:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1904-11486.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
},
@article{rotation_equivalent_cnn,
doi = {10.48550/ARXIV.1806.03962},
url = {https://arxiv.org/abs/1806.03962},
author = {Veeling, Bastiaan S. and Linmans, Jasper and Winkens, Jim and Cohen, Taco and Welling, Max},
keywords = {Computer Vision and Pattern Recognition (cs.CV), Machine Learning (cs.LG), Machine Learning (stat.ML), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {Rotation Equivariant CNNs for Digital Pathology},
publisher = {arXiv},
year = {2018},
copyright = {arXiv.org perpetual, non-exclusive license}
},
@article{attention,
author = {Ashish Vaswani and
Noam Shazeer and
Niki Parmar and
Jakob Uszkoreit and
Llion Jones and
Aidan N. Gomez and
Lukasz Kaiser and
Illia Polosukhin},
title = {Attention Is All You Need},
journal = {CoRR},
volume = {abs/1706.03762},
year = {2017},
url = {http://arxiv.org/abs/1706.03762},
eprinttype = {arXiv},
eprint = {1706.03762},
timestamp = {Sat, 23 Jan 2021 01:20:40 +0100},
biburl = {https://dblp.org/rec/journals/corr/VaswaniSPUJGKP17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
},
@article{laminilm,
title={LaMini-LM: A Diverse Herd of Distilled Models from Large-Scale Instructions},
author={Minghao Wu and Abdul Waheed and Chiyu Zhang and Muhammad Abdul-Mageed and Alham Fikri Aji},
year={2023},
eprint={2304.14402},
archivePrefix={arXiv},
primaryClass={cs.CL}
},
@misc{selfinstruct,
title={Self-Instruct: Aligning Language Model with Self Generated Instructions},
author={Yizhong Wang and Yeganeh Kordi and Swaroop Mishra and Alisa Liu and Noah A. Smith and Daniel Khashabi and Hannaneh Hajishirzi},
year={2022},
eprint={2212.10560},
archivePrefix={arXiv},
primaryClass={cs.CL}
},
@misc{p3,
title={Multitask Prompted Training Enables Zero-Shot Task Generalization},
author={Victor Sanh and Albert Webson and Colin Raffel and Stephen H. Bach and Lintang Sutawika and Zaid Alyafeai and Antoine Chaffin and Arnaud Stiegler and Teven Le Scao and Arun Raja and Manan Dey and M Saiful Bari and Canwen Xu and Urmish Thakker and Shanya Sharma Sharma and Eliza Szczechla and Taewoon Kim and Gunjan Chhablani and Nihal Nayak and Debajyoti Datta and Jonathan Chang and Mike Tian-Jian Jiang and Han Wang and Matteo Manica and Sheng Shen and Zheng Xin Yong and Harshit Pandey and Rachel Bawden and Thomas Wang and Trishala Neeraj and Jos Rozen and Abheesht Sharma and Andrea Santilli and Thibault Fevry and Jason Alan Fries and Ryan Teehan and Tali Bers and Stella Biderman and Leo Gao and Thomas Wolf and Alexander M. Rush},
year={2022},
eprint={2110.08207},
archivePrefix={arXiv},
primaryClass={cs.LG}
},
@misc{flan,
title={The Flan Collection: Designing Data and Methods for Effective Instruction Tuning},
author={Shayne Longpre and Le Hou and Tu Vu and Albert Webson and Hyung Won Chung and Yi Tay and Denny Zhou and Quoc V. Le and Barret Zoph and Jason Wei and Adam Roberts},
year={2023},
eprint={2301.13688},
archivePrefix={arXiv},
primaryClass={cs.AI}
},
@misc{alpaca,
author = {Rohan Taori and Ishaan Gulrajani and Tianyi Zhang and Yann Dubois and Xuechen Li and Carlos Guestrin and Percy Liang and Tatsunori B. Hashimoto },
title = {Stanford Alpaca: An Instruction-following LLaMA model},
year = {2023},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/tatsu-lab/stanford_alpaca}},
},
@misc{flant5,
title={Scaling Instruction-Finetuned Language Models},
author={Hyung Won Chung and Le Hou and Shayne Longpre and Barret Zoph and Yi Tay and William Fedus and Yunxuan Li and Xuezhi Wang and Mostafa Dehghani and Siddhartha Brahma and Albert Webson and Shixiang Shane Gu and Zhuyun Dai and Mirac Suzgun and Xinyun Chen and Aakanksha Chowdhery and Alex Castro-Ros and Marie Pellat and Kevin Robinson and Dasha Valter and Sharan Narang and Gaurav Mishra and Adams Yu and Vincent Zhao and Yanping Huang and Andrew Dai and Hongkun Yu and Slav Petrov and Ed H. Chi and Jeff Dean and Jacob Devlin and Adam Roberts and Denny Zhou and Quoc V. Le and Jason Wei},
year={2022},
eprint={2210.11416},
archivePrefix={arXiv},
primaryClass={cs.LG}
},
@misc{cerebrasgpt,
title={Cerebras-GPT: Open Compute-Optimal Language Models Trained on the Cerebras Wafer-Scale Cluster},
author={Nolan Dey and Gurpreet Gosal and Zhiming and Chen and Hemant Khachane and William Marshall and Ribhu Pathria and Marvin Tom and Joel Hestness},
year={2023},
eprint={2304.03208},
archivePrefix={arXiv},
primaryClass={cs.LG}
},
@misc{pile,
title={The Pile: An 800GB Dataset of Diverse Text for Language Modeling},
author={Leo Gao and Stella Biderman and Sid Black and Laurence Golding and Travis Hoppe and Charles Foster and Jason Phang and Horace He and Anish Thite and Noa Nabeshima and Shawn Presser and Connor Leahy},
year={2020},
eprint={2101.00027},
archivePrefix={arXiv},
primaryClass={cs.CL}
},
@software{eval-harness,
author = {Gao, Leo and
Tow, Jonathan and
Biderman, Stella and
Black, Sid and
DiPofi, Anthony and
Foster, Charles and
Golding, Laurence and
Hsu, Jeffrey and
McDonell, Kyle and
Muennighoff, Niklas and
Phang, Jason and
Reynolds, Laria and
Tang, Eric and
Thite, Anish and
Wang, Ben and
Wang, Kevin and
Zou, Andy},
title = {A framework for few-shot language model evaluation},
month = sep,
year = 2021,
publisher = {Zenodo},
version = {v0.0.1},
doi = {10.5281/zenodo.5371628},
url = {https://doi.org/10.5281/zenodo.5371628}
},
@misc{openbookqa,
title={Can a Suit of Armor Conduct Electricity? A New Dataset for Open Book Question Answering},
author={Todor Mihaylov and Peter Clark and Tushar Khot and Ashish Sabharwal},
year={2018},
eprint={1809.02789},
archivePrefix={arXiv},
primaryClass={cs.CL}
},
@misc{boolq,
title={BoolQ: Exploring the Surprising Difficulty of Natural Yes/No Questions},
author={Christopher Clark and Kenton Lee and Ming-Wei Chang and Tom Kwiatkowski and Michael Collins and Kristina Toutanova},
year={2019},
eprint={1905.10044},
archivePrefix={arXiv},
primaryClass={cs.CL}
},
@misc{piqa,
title={PIQA: Reasoning about Physical Commonsense in Natural Language},
author={Yonatan Bisk and Rowan Zellers and Ronan Le Bras and Jianfeng Gao and Yejin Choi},
year={2019},
eprint={1911.11641},
archivePrefix={arXiv},
primaryClass={cs.CL}
},
@misc{llama,
title={LLaMA: Open and Efficient Foundation Language Models},
author={Hugo Touvron and Thibaut Lavril and Gautier Izacard and Xavier Martinet and Marie-Anne Lachaux and Timothée Lacroix and Baptiste Rozière and Naman Goyal and Eric Hambro and Faisal Azhar and Aurelien Rodriguez and Armand Joulin and Edouard Grave and Guillaume Lample},
year={2023},
eprint={2302.13971},
archivePrefix={arXiv},
primaryClass={cs.CL}
},
@misc{pythia,
title={Pythia: A Suite for Analyzing Large Language Models Across Training and Scaling},
author={Stella Biderman and Hailey Schoelkopf and Quentin Anthony and Herbie Bradley and Kyle O'Brien and Eric Hallahan and Mohammad Aflah Khan and Shivanshu Purohit and USVSN Sai Prashanth and Edward Raff and Aviya Skowron and Lintang Sutawika and Oskar van der Wal},
year={2023},
eprint={2304.01373},
archivePrefix={arXiv},
primaryClass={cs.CL}
},
@misc{medsam,
title={Segment Anything in Medical Images},
author={Jun Ma and Bo Wang},
year={2023},
eprint={2304.12306},
archivePrefix={arXiv},
primaryClass={eess.IV}
},
@misc{simclr,
title={A Simple Framework for Contrastive Learning of Visual Representations},
author={Ting Chen and Simon Kornblith and Mohammad Norouzi and Geoffrey Hinton},
year={2020},
eprint={2002.05709},
archivePrefix={arXiv},
primaryClass={cs.LG}
},
@misc{lora,
title={LoRA: Low-Rank Adaptation of Large Language Models},
author={Edward J. Hu and Yelong Shen and Phillip Wallis and Zeyuan Allen-Zhu and Yuanzhi Li and Shean Wang and Lu Wang and Weizhu Chen},
year={2021},
eprint={2106.09685},
archivePrefix={arXiv},
primaryClass={cs.CL}
},
@misc{4m,
title={4M: Massively Multimodal Masked Modeling},
author={David Mizrahi and Roman Bachmann and Oğuzhan Fatih Kar and Teresa Yeo and Mingfei Gao and Afshin Dehghan and Amir Zamir},
year={2023},
eprint={2312.06647},
archivePrefix={arXiv},
primaryClass={cs.CV},
url={https://arxiv.org/abs/2312.06647},
}
@misc{4m-21,
title={4M-21: An Any-to-Any Vision Model for Tens of Tasks and Modalities},
author={Roman Bachmann and Oğuzhan Fatih Kar and David Mizrahi and Ali Garjani and Mingfei Gao and David Griffiths and Jiaming Hu and Afshin Dehghan and Amir Zamir},
year={2024},
eprint={2406.09406},
archivePrefix={arXiv},
primaryClass={cs.CV},
url={https://arxiv.org/abs/2406.09406},
},
@misc{imagebind,
title={ImageBind: One Embedding Space To Bind Them All},
author={Rohit Girdhar and Alaaeldin El-Nouby and Zhuang Liu and Mannat Singh and Kalyan Vasudev Alwala and Armand Joulin and Ishan Misra},
year={2023},
eprint={2305.05665},
archivePrefix={arXiv},
primaryClass={cs.CV},
url={https://arxiv.org/abs/2305.05665},
},
@misc{dinov2,
title={DINOv2: Learning Robust Visual Features without Supervision},
author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
year={2024},
eprint={2304.07193},
archivePrefix={arXiv},
primaryClass={cs.CV},
url={https://arxiv.org/abs/2304.07193},
},
@Article{albu,
AUTHOR = {Buslaev, Alexander and Iglovikov, Vladimir I. and Khvedchenya, Eugene and Parinov, Alex and Druzhinin, Mikhail and Kalinin, Alexandr A.},
TITLE = {Albumentations: Fast and Flexible Image Augmentations},
JOURNAL = {Information},
VOLUME = {11},
YEAR = {2020},
NUMBER = {2},
ARTICLE-NUMBER = {125},
URL = {https://www.mdpi.com/2078-2489/11/2/125},
ISSN = {2078-2489},
DOI = {10.3390/info11020125}
},
@misc{longformer,
title={Longformer: The Long-Document Transformer},
author={Iz Beltagy and Matthew E. Peters and Arman Cohan},
year={2020},
eprint={2004.05150},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2004.05150},
},
@misc{gqa,
title={GQA: Training Generalized Multi-Query Transformer Models from Multi-Head Checkpoints},
author={Joshua Ainslie and James Lee-Thorp and Michiel de Jong and Yury Zemlyanskiy and Federico Lebrón and Sumit Sanghai},
year={2023},
eprint={2305.13245},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2305.13245},
},
@misc{mqa,
title={Fast Transformer Decoding: One Write-Head is All You Need},
author={Noam Shazeer},
year={2019},
eprint={1911.02150},
archivePrefix={arXiv},
primaryClass={cs.NE},
url={https://arxiv.org/abs/1911.02150},
},
@article{llama3,
title={Llama 3 Model Card},
author={AI@Meta},
year={2024},
url = {https://github.com/meta-llama/llama3/blob/main/MODEL_CARD.md}
},
@misc{kd,
title={Distilling the Knowledge in a Neural Network},
author={Geoffrey Hinton and Oriol Vinyals and Jeff Dean},
year={2015},
eprint={1503.02531},
archivePrefix={arXiv},
primaryClass={stat.ML},
url={https://arxiv.org/abs/1503.02531},
},
@misc{onpolicydistillation,
title={On-Policy Distillation of Language Models: Learning from Self-Generated Mistakes},
author={Rishabh Agarwal and Nino Vieillard and Yongchao Zhou and Piotr Stanczyk and Sabela Ramos and Matthieu Geist and Olivier Bachem},
year={2024},
eprint={2306.13649},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2306.13649},
},
@misc{roformer,
title={RoFormer: Enhanced Transformer with Rotary Position Embedding},
author={Jianlin Su and Yu Lu and Shengfeng Pan and Ahmed Murtadha and Bo Wen and Yunfeng Liu},
year={2023},
eprint={2104.09864},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2104.09864},
},
@misc{yun2020,
title={Are Transformers universal approximators of sequence-to-sequence functions?},
author={Chulhee Yun and Srinadh Bhojanapalli and Ankit Singh Rawat and Sashank J. Reddi and Sanjiv Kumar},
year={2020},
eprint={1912.10077},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/1912.10077},
},
@misc{warp,
title={WARP: On the Benefits of Weight Averaged Rewarded Policies},
author={Alexandre Ramé and Johan Ferret and Nino Vieillard and Robert Dadashi and Léonard Hussenot and Pierre-Louis Cedoz and Pier Giuseppe Sessa and Sertan Girgin and Arthur Douillard and Olivier Bachem},
year={2024},
eprint={2406.16768},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2406.16768},
},
@article{mergekit,
title={Arcee's MergeKit: A Toolkit for Merging Large Language Models},
author={Goddard, Charles and Siriwardhana, Shamane and Ehghaghi, Malikeh and Meyers, Luke and Karpukhin, Vlad and Benedict, Brian and McQuade, Mark and Solawetz, Jacob},
journal={arXiv preprint arXiv:2403.13257},
year={2024}
}