-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrefs.bib
1652 lines (1540 loc) · 69.5 KB
/
refs.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@inproceedings{yang_etal_Attention_naacl2016,
title={Hierarchical attention networks for document classification},
author={Yang, Zichao and Yang, Diyi and Dyer, Chris and He, Xiaodong and Smola, Alex and Hovy, Eduard},
booktitle={Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
pages={1480--1489},
year={2016},
abstract = {We propose a hierarchical attention network for document classification. Our model has two distinctive characteristics: (i) it has a hierarchical structure that mirrors the hierarchicalhttps://pt.overleaf.com/1279382319bkqzzqdbcrtt structure of documents; (ii) it has two levels of attention mechanisms applied at the wordand sentence-level, enabling it to attend differentially to more and less important content when constructing the document representation. Experiments conducted on six large scale text classification tasks demonstrate that the proposed architecture outperform previous methods by a substantial margin. Visualization of the attention layers illustrates that the model selects qualitatively informative words and sentences.},
url = {https://www.microsoft.com/en-us/research/publication/hierarchical-attention-networks-document-classification/},
}
@conference{deCampos-VISAPP-2009,
author = { T E deCampos and B R Babu and M Varma },
title = { Character Recognition in Natural Images },
booktitle = { Proceedings of the International Conference on Computer Vision Theory and Applications },
year = { 2009 },
month = { February },
address = { Lisbon, Portugal },
url = { http://www.ee.surrey.ac.uk/CVSSP/demos/chars74k/ },
psurl = { <a href="http://www.ee.surrey.ac.uk/CVSSP/Publications/papers/deCampos-VISAPP-2009.pdf">PDF</a> },
owner = { td0005 },
}
@PhdThesis{jaderberg_dphil2015,
author = {Max Jaderberg},
title = {Deep Learning for Text Spotting},
school = {University of Oxford},
year = {2015},
note = {Project page: \url{http://www.robots.ox.ac.uk/~vgg/research/text/}},
}
@Article{jaderberg_etal_ijcv2016,
author = "Jaderberg, M. and Simonyan, K. and Vedaldi, A. and Zisserman, A.",
title = "Reading Text in the Wild with Convolutional Neural Networks",
journal = ijcv,
number = "1",
volume = "116",
pages = "1--20",
month = "January",
year = "2016",
}
% =========== NLP DATABASE ========= %
@inproceedings{Graber2018DrugDatabase,
title={Aspect-Based Sentiment Analysis of Drug Reviews Applying Cross-Domain and Cross-Data Learning},
author={Epshtein, Boris and Ofek, Eyal and Wexler, Yonatan},
booktitle={Proceedings of the 2018 International Conference on Digital Health (DH '18)},
pages={121-125},
year={2018},
organization={ACM},
address= {New York, NY, USA},
DOI={https://dl.acm.org/citation.cfm?doid=3194658.3194677}
}
@inproceedings{Sharf2017Urdu,
title={Performing Natural Language Processing on Roman Urdu Datasets},
author={Sharf, Zareen, and Saif Ur Rahman},
booktitle={Proceedings of the IJCSNS 18 },
volume= {January 2018 Volume},
number = {1},
pages={213},
year={2018},
organization={IJCSNS}
}
@inproceedings{Mouzannar2018Database,
title={A Damage Identification in Social Media Posts using Multimodal Deep Learning},
author={Mouzannar, Hussein, and Rizk, Yara and Awad, Mariette },
booktitle={Proceedings of The 15th International Conference on Information Systems for Crisis Response and Management },
pages={529-543},
year={2018},
organization={ISCRAM},
address= {Rochester, USA}
}
% =========== WORD SPOTTING ========= %
% reconhecimento de texto usando SWT
@inproceedings{epshtein2010detecting,
title={Detecting text in natural scenes with stroke width transform},
author={Epshtein, Boris and Ofek, Eyal and Wexler, Yonatan},
booktitle={Computer Vision and Pattern Recognition (CVPR), 2010 IEEE Conference on},
pages={2963--2970},
year={2010},
organization={IEEE}
}
% revisa várias tecnicas de analise de documentos
@incollection{marinai2008introduction,
title={Introduction to document analysis and recognition},
author={Marinai, Simone},
booktitle={Machine learning in document analysis and recognition},
pages={1--20},
year={2008},
publisher={Springer}
}
% revisa os layers residuais
@inproceedings{szegedy2017inception,
title={Inception-v4, inception-resnet and the impact of residual connections on learning.},
author={Szegedy, Christian and Ioffe, Sergey and Vanhoucke, Vincent and Alemi, Alexander A},
booktitle={AAAI},
volume={4},
pages={12},
year={2017}
}
% bases publicas para word spotting em portugues
@article{gonccalo2018survey,
title={A Survey on Portuguese Lexical Knowledge Bases: Contents, Comparison and Combination},
author={Gon{\c{c}}alo Oliveira, Hugo},
journal={Information},
volume={9},
number={2},
pages={34},
year={2018},
publisher={Multidisciplinary Digital Publishing Institute}
}
% revisao sobre a ResNet
@article{wu2016wider,
title={Wider or deeper: Revisiting the resnet model for visual recognition},
author={Wu, Zifeng and Shen, Chunhua and Hengel, Anton van den},
journal={arXiv preprint arXiv:1611.10080},
year={2016}
}
% usando R-CNN como um método rápido para identificação de objetos em tempo real
@inproceedings{ren2015faster,
title={Faster r-cnn: Towards real-time object detection with region proposal networks},
author={Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian},
booktitle={Advances in neural information processing systems},
pages={91--99},
year={2015}
}
% explica o procedimento para criar a R-CNN além de revisar os métodos que
% levaram até ela
@inproceedings{girshick2015fast,
title={Fast r-cnn},
author={Girshick, Ross},
booktitle={Proceedings of the IEEE international conference on computer vision},
pages={1440--1448},
year={2015}
}
% revisa os termos e as técnicas utilizada na convolução, focando em convolução 2D
@article{dumoulin1603guide,
title={A guide to convolution arithmetic for deep learning. arXiv, 2016},
author={Dumoulin, V and Visin, F},
journal={arXiv preprint arXiv:1603.07285},
year = {2016}
}
% doing
@inproceedings{liao2017textboxes,
title={TextBoxes: A Fast Text Detector with a Single Deep Neural Network.},
author={Liao, Minghui and Shi, Baoguang and Bai, Xiang and Wang, Xinggang and Liu, Wenyu},
booktitle={AAAI},
pages={4161--4167},
year={2017}
}
@Article{oliveira18,
AUTHOR = {N. de Oliveira, Robert A. and C. Junior, Methanias},
TITLE = {Experimental Analysis of Stemming on Jurisprudential Documents Retrieval},
JOURNAL = {Information},
VOLUME = {9},
YEAR = {2018},
NUMBER = {2},
ARTICLE_NUMBER = {28},
URL = {http://www.mdpi.com/2078-2489/9/2/28},
ISSN = {2078-2489},
ABSTRACT = {Stemming algorithms are commonly used during textual preprocessing phase in order to reduce data dimensionality. However, this reduction presents different efficacy levels depending on the domain that it is applied to. Thus, for instance, there are reports in the literature that show the effect of stemming when applied to dictionaries or textual bases of news. On the other hand, we have not found any studies analyzing the impact of radicalization on Brazilian judicial jurisprudence, composed of decisions handed down by the judiciary, a fundamental instrument for law professionals to play their role. Thus, this work presents two complete experiments, showing the results obtained through the analysis and evaluation of the stemmers applied on real jurisprudential documents, originating from the Court of Justice of the State of Sergipe. In the first experiment, the results showed that, among the analyzed algorithms, the RSLP (Removedor de Sufixos da Lingua Portuguesa) possessed the greatest capacity of dimensionality reduction of the data. In the second one, through the evaluation of the stemming algorithms on the legal documents retrieval, the RSLP-S (Removedor de Sufixos da Lingua Portuguesa Singular) and UniNE (University of Neuchâtel), less aggressive stemmers, presented the best cost-benefit ratio, since they reduced the dimensionality of the data and increased the effectiveness of the information retrieval evaluation metrics in one of analyzed collections.},
DOI = {10.3390/info9020028}
}
@article{golub15,
author = {Golub, Koraljka and Soergel, Dagobert and Buchanan, George and Tudhope, Douglas and Lykke, Marianne and Hiom, Debra},
title = {A framework for evaluating automatic indexing or classification in the context of retrieval},
journal = {Journal of the Association for Information Science and Technology},
volume = {67},
number = {1},
pages = {3-16},
year = {2015},
keywords = {automatic classification, automatic indexing, machine aided indexing},
doi = {10.1002/asi.23600},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/asi.23600},
eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/asi.23600},
abstract = {Tools for automatic subject assignment help deal with scale and sustainability in creating and enriching metadata, establishing more connections across and between resources and enhancing consistency. Although some software vendors and experimental researchers claim the tools can replace manual subject indexing, hard scientific evidence of their performance in operating information environments is scarce. A major reason for this is that research is usually conducted in laboratory conditions, excluding the complexities of real-life systems and situations. The article reviews and discusses issues with existing evaluation approaches such as problems of aboutness and relevance assessments, implying the need to use more than a single “gold standard” method when evaluating indexing and retrieval, and proposes a comprehensive evaluation framework. The framework is informed by a systematic review of the literature on evaluation approaches: evaluating indexing quality directly through assessment by an evaluator or through comparison with a gold standard, evaluating the quality of computer-assisted indexing directly in the context of an indexing workflow, and evaluating indexing quality indirectly through analyzing retrieval performance.}
}
@article{roitblat09,
author = {Roitblat, Herbert L. and Kershaw, Anne and Oot, Patrick},
title = {Document categorization in legal electronic discovery: computer classification vs. manual review},
journal = {Journal of the American Society for Information Science and Technology},
volume = {61},
number = {1},
pages = {70-80},
year ={2009},
doi = {10.1002/asi.21233},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/asi.21233},
eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/asi.21233},
abstract = {Abstract In litigation in the US, the parties are obligated to produce to one another, when requested, those documents that are potentially relevant to issues and facts of the litigation (called “discovery”). As the volume of electronic documents continues to grow, the expense of dealing with this obligation threatens to surpass the amounts at issue and the time to identify these relevant documents can delay a case for months or years. The same holds true for government investigations and third-parties served with subpoenas. As a result, litigants are looking for ways to reduce the time and expense of discovery. One approach is to supplant or reduce the traditional means of having people, usually attorneys, read each document, with automated procedures that use information retrieval and machine categorization to identify the relevant documents. This study compared an original categorization, obtained as part of a response to a Department of Justice Request and produced by having one or more of 225 attorneys review each document with automated categorization systems provided by two legal service providers. The goal was to determine whether the automated systems could categorize documents at least as well as human reviewers could, thereby saving time and expense. The results support the idea that machine categorization is no less accurate at identifying relevant/responsive documents than employing a team of reviewers. Based on these results, it would appear that using machine categorization can be a reasonable substitute for human review.}
}
@misc{howard_fastai2018,
title={fastai},
author={Howard, Jeremy and others},
year={2018},
publisher={GitHub},
howpublished={\url{https://github.com/fastai/fastai}},
}
@article{Howard18,
author = {Jeremy Howard and
Sebastian Ruder},
title = {Fine-tuned Language Models for Text Classification},
journal = {CoRR},
volume = {abs/1801.06146},
year = {2018},
url = {http://arxiv.org/abs/1801.06146},
archivePrefix = {arXiv},
eprint = {1801.06146},
timestamp = {Fri, 02 Feb 2018 14:20:25 +0100},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1801-06146},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{Mikolov13,
author = {Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and Corrado, Greg and Dean, Jeffrey},
title = {Distributed Representations of Words and Phrases and Their Compositionality},
booktitle = {Proceedings of the 26th International Conference on Neural Information Processing Systems - Volume 2},
series = {NIPS'13},
year = {2013},
location = {Lake Tahoe, Nevada},
pages = {3111--3119},
numpages = {9},
url = {http://dl.acm.org/citation.cfm?id=2999792.2999959},
acmid = {2999959},
publisher = {Curran Associates Inc.},
address = {USA},
}
@article{Merity16,
author = {Stephen Merity and
Caiming Xiong and
James Bradbury and
Richard Socher},
title = {Pointer Sentinel Mixture Models},
journal = {CoRR},
volume = {abs/1609.07843},
year = {2016},
url = {http://arxiv.org/abs/1609.07843},
archivePrefix = {arXiv},
eprint = {1609.07843},
timestamp = {Wed, 07 Jun 2017 14:42:14 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/MerityXBS16},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
% On the Multi-Label matter
@ARTICLE{6471714,
author={M. Zhang and Z. Zhou},
journal={IEEE Transactions on Knowledge and Data Engineering},
title={A Review on Multi-Label Learning Algorithms},
year={2014},
volume={26},
number={8},
pages={1819-1837},
keywords={learning (artificial intelligence);multilabel learning algorithms;instance learning;machine learning paradigm;formal definition;evaluation metrics;learning settings;Training;Correlation;Supervised learning;Semantics;Machine learning algorithms;Algorithm design and analysis;Vectors;Computing Methodologies;Artificial Intelligence;Learning;Information Technology and Systems;Database Management;Database Applications;Data mining;Multi-label learning;label correlations;problem transformation;algorithm adaptation},
doi={10.1109/TKDE.2013.39},
ISSN={1041-4347},
month={Aug},
}
@article{Dembczynski:2012:LDL:2339279.2339299,
author = {Dembczy\'{n}ski, Krzysztof and Waegeman, Willem and Cheng, Weiwei and Hüllermeier, Eyke},
title = {On Label Dependence and Loss Minimization in Multi-label Classification},
journal = {Mach. Learn.},
issue_date = {July 2012},
volume = {88},
number = {1-2},
month = jul,
year = {2012},
issn = {0885-6125},
pages = {5--45},
numpages = {41},
url = {https://doi.org/10.1007/s10994-012-5285-8},
doi = {10.1007/s10994-012-5285-8},
acmid = {2339299},
publisher = {Kluwer Academic Publishers},
address = {Hingham, MA, USA},
keywords = {Label dependence, Loss functions, Multi-label classification},
}
@InProceedings{Liu_etal_FOTS_CVPR_2018,
author = {Liu, Xuebo and Liang, Ding and Yan, Shi and Chen, Dagui and Qiao, Yu and Yan, Junjie},
title = {{FOTS}: Fast Oriented Text Spotting With a Unified Network},
booktitle = {The {IEEE} Conference on Computer Vision and Pattern Recognition ({CVPR})},
month = {June},
year = {2018},
note = {Available from \url{http://openaccess.thecvf.com/content_cvpr_2018/html/1699.html}},
pages = {5676--5685}
}
@article{Ye_Doermann_TextDetectionSurvey_PAMI_2015,
author={Q. Ye and D. Doermann},
journal={{IEEE} Transactions on Pattern Analysis and Machine Intelligence ({PAMI})},
title={Text Detection and Recognition in Imagery: A Survey},
year={2015},
volume={37},
number={7},
pages={1480--1500},
abstract={This paper analyzes, compares, and contrasts technical challenges, methods, and the performance of text detection and recognition research in color imagery. It summarizes the fundamental problems and enumerates factors that should be considered when addressing these problems. Existing techniques are categorized as either stepwise or integrated and sub-problems are highlighted including text localization, verification, segmentation and recognition. Special issues associated with the enhancement of degraded text and the processing of video text, multi-oriented, perspectively distorted and multilingual text are also addressed. The categories and sub-categories of text are illustrated, benchmark datasets are enumerated, and the performance of the most representative approaches is compared. This review provides a fundamental comparison and analysis of the remaining problems in the field.},
keywords={image colour analysis;image recognition;image segmentation;text detection;video signal processing;text detection;text recognition;color imagery;text localization;text verification;text segmentation;degraded text enhancement;video text processing;multioriented-perspectively distorted multilingual text;text subcategories;benchmark datasets;Text recognition;Image recognition;Character recognition;Image color analysis;Feature extraction;Color;Text detection;text localization;text recognition;survey;Text detection;text localization;text recognition;survey},
doi={10.1109/TPAMI.2014.2366765},
ISSN={0162-8828},
month={July},
}
@InProceedings{Zhou_etal_EAST_CVPR_2017,
author = {Zhou, Xinyu and Yao, Cong and Wen, He and Wang, Yuzhi and Zhou, Shuchang and He, Weiran and Liang, Jiajun},
title = {{EAST}: An Efficient and Accurate Scene Text Detector},
booktitle = {The {IEEE} Conference on Computer Vision and Pattern Recognition ({CVPR})},
month = {July},
year = {2017},
pages = {5551--5560},
note = {Available from \url{http://openaccess.thecvf.com/content_cvpr_2017/html/Zhou_EAST_An_Efficient_CVPR_2017_paper.html}}
}
@article{Aytar_etal_Torralba_CrossModalScene_PAMI_2018,
author = {Yusuf Aytar and
Lluis Castrejon and
Carl Vondrick and
Hamed Pirsiavash and
Antonio Torralba},
title = {Cross-Modal Scene Networks},
journal = {{IEEE} Transactions on Pattern Analysis and Machine Intelligence},
volume = {41},
number = {10},
year = {2018},
month = {October},
eprint = {1610.09003},
note = {DOI:\href{https://doi.org/10.1109/TPAMI.2017.2753232}{10.1109/TPAMI.2017.2753232}. Preprint available at \href{http://arxiv.org/abs/1610.09003}{arXiv:1610.09003}},
_note = {Excellent paper that shows how to combine textual and visual information extraction. I saw an early version of it presented at ICCV 2017.}
}
@article{Li_etal_Snoek_VideoLSTM_CVIU_2018,
author = {Zhenyang Li and
Kirill Gavrilyuk and
Efstratios Gavves and
Mihir Jain and
Cees G.M. Snoek},
title = {{VideoLSTM} convolves, attends and flows for action recognition},
journal = {Computer Vision and Image Understanding},
volume = {166},
pages = {41--50},
year = {2018},
ISSN = {1077-3142},
doi = {https://doi.org/10.1016/j.cviu.2017.10.011},
url = {http://www.sciencedirect.com/science/article/pii/S1077314217301741}
}
@TechReport{conneau_etal_VeryDeep_arXiv2016,
title={Very deep convolutional networks for text classification},
author={Conneau, Alexis and Schwenk, Holger and Barrault, Lo{\"\i}c and Lecun, Yann},
institution = {Cornell University Library, {CoRR/cs.CL}},
year={2016},
note = {\href{https://arxiv.org/abs/1606.01781}{arXiv:1606.01781}}
}
@Misc{stf_press_30_05_2018,
author = {{Portal do Supremo Tribunal Federal}},
title = {Inteligência artificial vai agilizar a tramitação de processos no {STF}},
howpublished = {Online: \url{http://www.stf.jus.br/portal/cms/verNoticiaDetalhe.asp?idConteudo=380038}},
month = {May 30},
year = {2018},
}
@Misc{stf_press_30_08_2018,
author = {{Portal do Supremo Tribunal Federal}},
title = {Ministra {C}ármen {L}úcia anuncia início de funcionamento do Projeto {V}ictor, de inteligência artificial},
howpublished = {Online: \url{http://www.stf.jus.br/portal/cms/verNoticiaDetalhe.asp?idConteudo=388443}},
month = {August 30},
year = {2018},
}
@Misc{stf_stats,
author = {{Portal do Supremo Tribunal Federal}},
title = {Estatísticas do {STF}},
howpublished = {Online: \url{http://www.stf.jus.br/portal/cms/verTexto.asp?servico=estatistica&pagina=comrecvisaogeral}},
month = {Website accessed in September},
year = {2018},
}
@InProceedings{luz_etal_propor2018,
author = {Pedro H. {Luz de Araujo} and Te\'{o}filo E. {de Campos} and
Renato R. R. {de Oliveira} and Matheus Stauffer and
Samuel Couto and Paulo Bermejo},
title = {LeNER-Br: a Dataset for Named Entity Recognition in Brazilian Legal Text},
booktitle = {International Conference on the Computational Processing of Portuguese ({PROPOR})},
year = {2018},
month = {September 24-26},
address = {Canela, RS, Brazil},
}
@InProceedings{luz_etal_propor2020,
author = {Pedro H. {Luz de Araujo} and Te\'{o}filo E. {de Campos} and Marcelo {Magalhaes Silva de Sousa}},
title = {Inferring the source official texts: can {SVM} beat {ULMFiT}?},
booktitle = {International Conference on the Computational Processing of Portuguese ({PROPOR})},
publisher = {Springer},
series = {Lecture Notes on Computer Science ({LNCS})},
year = {2020},
month = {March 2-4},
address = {Evora, Portugal},
url = {https://propor.di.uevora.pt/},
note = {Code and data available from \url{https://cic.unb.br/~teodecampos/KnEDLe/}}
}
@inproceedings{luz_etal_lrec2020,
title = "{VICTOR}: a Dataset for {B}razilian Legal Documents Classification",
author = "Luz de Araujo, Pedro Henrique and
de Campos, Te{\'o}filo Em{\'\i}dio and
Ataides Braz, Fabricio and
Correia da Silva, Nilton",
booktitle = "Proceedings of The 12th Language Resources and Evaluation Conference",
month = {May},
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://www.aclweb.org/anthology/2020.lrec-1.181",
pages = "1449--1458",
abstract = "This paper describes VICTOR, a novel dataset built from Brazil{'}s Supreme Court digitalized legal documents, composed of more than 45 thousand appeals, which includes roughly 692 thousand documents{---}about 4.6 million pages. The dataset contains labeled text data and supports two types of tasks: document type classification; and theme assignment, a multilabel problem. We present baseline results using bag-of-words models, convolutional neural networks, recurrent neural networks and boosting algorithms. We also experiment using linear-chain Conditional Random Fields to leverage the sequential nature of the lawsuits, which we find to lead to improvements on document type classification. Finally we compare a theme classification approach where we use domain knowledge to filter out the less informative document pages to the default one where we use all pages. Contrary to the Court experts{'} expectations, we find that using all available data is the better method. We make the dataset available in three versions of different sizes and contents to encourage explorations of better models and techniques.",
language = "English",
ISBN = "979-10-95546-34-4",
}
@TechReport{luz_msc_qualify_unb2020,
author = {Pedro Henrique {Luz de Araujo}},
title = {From Documents to Entities: A journey through Natural Language Processing tasks and domains},
institution = {University of Brasilia},
year = {2020},
address = {Departament of Computer Science},
month = {July},
note = {Available from \url{https://cic.unb.br/~teodecampos/peluz/}},
}
@TechReport{guth_msc_qualify_unb2020,
author = {Frederico Guth},
title = {The information bottleneck theory of deep learning},
institution = {University of Brasilia},
year = {2020},
address = {Departament of Computer Science},
month = {July},
note = {Available from \url{https://cic.unb.br/~teodecampos/fred_guth}},
}
@inproceedings{orengo_huyck_StemmingPt_spire2001,
title={A stemming algorithmm for the portuguese language},
author={Orengo, Viviane and Huyck, Christian},
booktitle={spire},
pages={0186},
year={2001},
organization={{IEEE}},
note = {{NLTK} implementation by Tiago Tresoldi available from \url{https://www.nltk.org/_modules/nltk/stem/rslp.html}}
}
@book{DP_NLP,
title = {Deep Learning in Natural Language Processing},
author = {Li Deng, Yang Liu},
publisher = {Springer},
isbn = {9811052085,9789811052088},
year = {2018},
}
@book{dataset,
title = {Dataset Shift in Machine Learning },
author = {Joaquin Qui\~nonero-Candela and Masashi Sugiyama and Anton Schwaighofer and Neil D. Lawrence},
publisher = {The {MIT} Press},
isbn = {0262170051,9780262170055,9780262255103},
year = {2009},
series = {Neural Information Processing},
}
@inproceedings{smith_Tesseract_icdar2007,
title={An overview of the {Tesseract} {OCR} engine},
author={Smith, Ray},
booktitle={Ninth International Conference on Document Analysis and Recognition ({ICDAR})},
volume={2},
pages={629--633},
year={2007},
organization={{IEEE}}
}
@inproceedings{harley2015icdar,
title = {Evaluation of Deep Convolutional Nets for Document Image Classification and Retrieval},
author = {Adam W Harley and Alex Ufkes and Konstantinos G Derpanis},
booktitle = {International Conference on Document Analysis and Recognition ({ICDAR})},
year = {2015}
}
@article{kolsch2017real,
title={Real-time document image classification using deep CNN and extreme learning machines},
author={K{\"o}lsch, Andreas and Afzal, Muhammad Zeshan and Ebbecke, Markus and Liwicki, Marcus},
journal={arXiv preprint arXiv:1711.05862},
year={2017}
}
@article{tensmeyer2017,
author = {Chris Tensmeyer and
Tony Martinez},
title = {Analysis of Convolutional Neural Networks for Document Image Classification},
journal = {CoRR},
volume = {abs/1708.03273},
year = {2017},
url = {http://arxiv.org/abs/1708.03273},
archivePrefix = {arXiv},
eprint = {1708.03273},
timestamp = {Mon, 13 Aug 2018 16:47:09 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1708-03273},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{noce2016,
title={Embedded Textual Content for Document Image Classification with Convolutional Neural Networks},
author={Noce, Lucia and Gallo, Ignazio and Zamberletti, Alessandro and Calefati, Alessandro},
booktitle={Proceedings of the 2016 ACM Symposium on Document Engineering},
pages={165--173},
year={2016},
organization={ACM}
}
@book{o1995document,
title={Document image analysis},
author={Kasturi, Rangachar},
volume={39},
publisher={CRC Press},
year={1995}
}
@article{nagy2000twenty,
title={Twenty years of document image analysis in {PAMI}},
author={Nagy, George},
journal={{IEEE} Transactions on Pattern Analysis \& Machine Intelligence},
number={1},
volume={1},
pages={38--62},
year={2000},
publisher={IEEE}
}
@inproceedings{papadopoulos2013impact,
title={The {IMPACT} dataset of historical document images},
author={Papadopoulos, Christos and Pletschacher, Stefan and Clausner, Christian and Antonacopoulos, Apostolos},
booktitle={Proceedings of the 2nd International Workshop on Historical Document Imaging and Processing},
pages={123--130},
year={2013},
organization={ACM}
}
@inproceedings{antonacopoulos2009realistic,
title={A realistic dataset for performance evaluation of document layout analysis},
author={Antonacopoulos, Apostolos and Bridson, David and Papadopoulos, Christos and Pletschacher, Stefan},
booktitle={Document Analysis and Recognition, 2009. ICDAR'09. 10th International Conference on},
pages={296--300},
year={2009},
organization={IEEE}
}
@article{beccaloni10,
title={10 Computerising unit-level data in natural history card archives},
author={Beccaloni, George W and Malcolm, J and Scoble, Gaden S Robinson and Downton, Andrew C and Lucas, Simon M},
journal={{ENHSIN}},
year = 2010
}
@inproceedings{clausner2015enp,
title={The {ENP} image and ground truth dataset of historical newspapers},
author={Clausner, Christian and Papadopoulos, Christos and Pletschacher, Stefan and Antonacopoulos, Apostolos},
booktitle={Document Analysis and Recognition (ICDAR), 2015 13th International Conference on},
pages={931--935},
year={2015},
organization={IEEE}
}
@article{dimmick1991nist,
title={{NIST} structured forms reference set of binary images (sfrs)},
author={Dimmick, DL and Garris, MD and Wilson, CL},
journal={NIST Special Database},
volume={2},
year={1991}
}
@inproceedings{brunessaux2014maurdor,
title={The {Maurdor} project: improving automatic processing of digital documents},
author={Brunessaux, Sylvie and Giroux, Patrick and Grilheres, Bruno and Manta, Mathieu and Bodin, Maylis and Choukri, Khalid and Galibert, Olivier and Kahn, Juliette},
booktitle={2014 11th IAPR International Workshop on Document Analysis Systems (DAS)},
pages={349--354},
year={2014},
organization={IEEE}
}
@inproceedings{karpathy2014large,
title={Large-scale video classification with convolutional neural networks},
author={Karpathy, Andrej and Toderici, George and Shetty, Sanketh and Leung, Thomas and Sukthankar, Rahul and Fei-Fei, Li},
booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
pages={1725--1732},
year={2014}
}
@inproceedings{li2015visual,
title={Visual saliency based on multiscale deep features},
author={Li, Guanbin and Yu, Yizhou},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5455--5463},
year={2015}
}
@inproceedings{ribeiro2016should,
title={Why should i trust you?: Explaining the predictions of any classifier},
author={Ribeiro, Marco Tulio and Singh, Sameer and Guestrin, Carlos},
booktitle={Proceedings of the 22nd ACM {SIGKDD} international conference on knowledge discovery and data mining},
pages={1135--1144},
year={2016},
organization={ACM}
}
@inproceedings{hong2015online,
title={Online tracking by learning discriminative saliency map with convolutional neural network},
author={Hong, Seunghoon and You, Tackgeun and Kwak, Suha and Han, Bohyung},
booktitle={International Conference on Machine Learning},
pages={597--606},
year={2015}
}
@InProceedings{Zhao_2015_CVPR,
author = {Zhao, Rui and Ouyang, Wanli and Li, Hongsheng and Wang, Xiaogang},
title = {Saliency Detection by Multi-Context Deep Learning},
booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2015}
}
@InProceedings{Liu_2016_CVPR,
author = {Liu, Nian and Han, Junwei},
title = {{DHSNet}: Deep Hierarchical Saliency Network for Salient Object Detection},
booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2016}
}
@article{Wiedemann_PageStream,
author = {Gregor Wiedemann and
Gerhard Heyer},
title = {Page Stream Segmentation with Convolutional Neural Nets Combining
Textual and Visual Features},
journal = {CoRR},
volume = {abs/1710.03006},
year = {2017},
url = {http://arxiv.org/abs/1710.03006},
archivePrefix = {arXiv},
eprint = {1710.03006},
timestamp = {Mon, 13 Aug 2018 16:46:28 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1710-03006},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
% =========== Atençao ========= %
@article{DBLP:journals/corr/abs-1803-04831,
author = {Shuai Li and
Wanqing Li and
Chris Cook and
Ce Zhu and
Yanbo Gao},
title = {Independently Recurrent Neural Network ({IndRNN}): Building {A} Longer
and Deeper {RNN}},
journal = {CoRR},
volume = {abs/1803.04831},
year = {2018},
url = {http://arxiv.org/abs/1803.04831},
archivePrefix = {arXiv},
eprint = {1803.04831},
timestamp = {Mon, 13 Aug 2018 16:48:29 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1803-04831},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@ARTICLE{2016arXiv160507725M,
author = {{Miyato}, T. and {Dai}, A.~M. and {Goodfellow}, I.},
title = "{Adversarial Training Methods for Semi-Supervised Text Classification}",
journal = {ArXiv e-prints},
archivePrefix = "arXiv",
eprint = {1605.07725},
primaryClass = "stat.ML",
keywords = {Statistics - Machine Learning, Computer Science - Machine Learning},
year = 2016,
month = may,
adsurl = {http://adsabs.harvard.edu/abs/2016arXiv160507725M},
adsnote = {Provided by the SAO/NASA Astrophysics Data System}
}
@inproceedings{Zhou2016AttentionBasedBL,
added-at = {2018-07-16T13:12:08.000+0200},
author = {Zhou, Peng and Shi, Wei and Tian, Jun and Qi, Zhenyu and Li, Bingchen and Hao, Hongwei and Xu, Bo},
biburl = {https://www.bibsonomy.org/bibtex/2bc05e78a23d5df83189113ae2b3c73bf/dallmann},
booktitle = {ACL},
description = {Attention-Based Bidirectional Long Short-Term Memory Networks for Relation Classification - Semantic Scholar},
interhash = {1beb673ab45db4cd5045e7fdab4c15b3},
intrahash = {bc05e78a23d5df83189113ae2b3c73bf},
keywords = {attention deep_learning relation_classification},
timestamp = {2018-07-16T13:12:08.000+0200},
title = {Attention-Based Bidirectional Long Short-Term Memory Networks for Relation Classification},
year = 2016
}
@inproceedings{Yang2016HierarchicalAN,
title={Hierarchical Attention Networks for Document Classification},
author={Zichao Yang and Diyi Yang and Chris Dyer and Xiaodong He and Alexander J. Smola and Eduard H. Hovy},
booktitle={HLT-NAACL},
year={2016}
}
@article{M.-1998,
doi = {10.1016/s1352-2310(97)00447-0},
title = {Artificial neural networks (the multilayer perceptron)—a review of applications in the atmospheric sciences},
author = {M.W Gardner; S.R Dorling},
publisher = {Elsevier Science},
journal = {Atmospheric Environment},
issnp = {1352-2310},
year = {1998},
volume = {32},
issue = {14-15},
page = {2627--2636},
url = {},
}
@book{book:26913,
title = {Foundations of Statistical Natural Language Processing},
author = {Manning C.D., Schütze H.},
publisher = {Unknown},
isbn = {0026213360},
year = {1999},
}
@book{book:2177490,
title = {Convolutional Neural Networks in Visual Computing: A Concise Guide},
author = {Ragav Venkatesan, Baoxin Li},
publisher = {CRC Press},
isbn = { 1498770398,978-1-4987-7039-2,9781351650328,1351650327,978-1-138-74795-1 },
year = {2018},
series = {Data-Enabled Engineering},
edition = {1},
volume = {},
url = {}
}
@book{book:1580841,
title = {Multilabel Classification : Problem Analysis, Metrics and Techniques},
author = {Francisco Herrera, Francisco Charte, Antonio J. Rivera, María J. del Jesus (auth.)},
publisher = {Springer International Publishing},
isbn = {978-3-319-41110-1,978-3-319-41111-8},
year = {2016},
}
@book{book:2238098,
title = {Statistics for Machine Learning: Techniques for exploring supervised, unsupervised, and reinforcement learning models with Python and R},
author = {Pratap Dangeti},
publisher = {Packt Publishing},
isbn = {1788295757,9781788295758},
year = {2017},
}
@article{Shelhamer-2016,
doi = {10.1109/tpami.2016.2572683},
title = {Fully Convolutional Networks for Semantic Segmentation},
author = {Shelhamer, Evan; Long, Jonathon; Darrell, Trevor},
publisher = {IEEE},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
issnp = {0162-8828},
issne = {2160-9292},
year = {2016},
page = {1--1},
url = {},
}
@inproceedings{Taylor-2012,
doi = {10.1109/embc.2012.6346402},
title = { [IEEE 2012 34th Annual International Conference of the IEEE Engineering in Medicine and Biology Society (EMBC) - San Diego, CA (2012.8.28-2012.9.1)] 2012 Annual International Conference of the IEEE Engineering in Medicine and Biology Society - Multi-label classification for the analysis of human motion quality},
author = {Taylor, P. E.; Almeida, G. J. M.; Hodgins, J. K.; Kanade, T.},
isbn = {978-1-4577-1787-1,978-1-4244-4119-8,978-1-4577-1787-1,},
year = {2012},
page = {2214--2218},
booktitle={Annals of the IEEE 2012 34th Annual International Conference of the IEEE Engineering in Medicine and Biology Society (EMBC)},
}
@book{book:1205754,
title = {Multi-Label Dimensionality Reduction},
author = {Liang Sun, Shuiwang Ji, Jieping Ye},
publisher = {Chapman and Hall/CRC},
isbn = {978-1-4398-0616-6,978-1-4398-0615-9},
year = {2013},
series = {Chapman \& Hall/CRC Machine Learning \& Pattern Recognition},
edition = {0},
volume = {},
url = {}
}
@book{book:2065172,
title = {Deep Learning for Medical Image Analysis},
author = {S. Kevin Zhou, Hayit Greenspan and Dinggang Shen (Eds.)},
publisher = { Academic Press },
isbn = { 9780128104095,9780128104088 },
year = {2017},
series = {},
edition = {1st Edition},
volume = {},
url = {}
}
@book{book:1688039,
title = {Fundamentals of Deep Learning: Designing Next-Generation Machine Intelligence Algorithms},
author = {Nikhil Buduma, Nicholas Locascio},
publisher = {O’Reilly Media},
isbn = {1491925612,9781491925614},
year = {2017},
series = {},
edition = {1},
volume = {},
url = {}
}
@book{book:1697911,
title = {Deep Learning with Python: A Hands-on Introduction},
author = {Nikhil Ketkar (auth.)},
publisher = {Apress},
isbn = {978-1-4842-2765-7, 978-1-4842-2766-4},
year = {2017},
series = {},
edition = {1},
volume = {},
url = {}
}
@book{book:2222210,
title = {Deep Learning in Natural Language Processing},
author = {Li Deng, Yang Liu},
publisher = {Springer},
isbn = {9811052085,9789811052088},
year = {2018},
series = {},
edition = {},
volume = {},
url = {}
}
@book{book:2208606,
title = {Deep Learning in Biometrics},
author = {Mayank Vatsa, Richa Singh, Angshul Majumdar},
publisher = {CRC Press},
isbn = {1138578231,9781138578234},
year = {2018},
series = {},
edition = {1},
volume = {},
url = {}
}
@book{book:2164086,
title = {Deep Learning with Python},
author = {Francois Chollet},
publisher = {Manning Publications},
isbn = {1617294438,9781617294433},
year = {2017},
series = {},
edition = {},
volume = {},
url = {}
}
@book{book:2155015,
title = { Deep Active Learning: Toward Greater Depth in University Education},
author = {Kayo Matsushita (eds.)},
publisher = {Springer Singapore},
isbn = { 978-981-10-5659-8, 978-981-10-5660-4 },
year = {2018},
series = {},
edition = {1},
volume = {},
url = {}
}
@book{book:2170397,
title = {Deep learning with Keras},
author = {Antonio Gulli, Sujit Pal},
publisher = {Packt Publishing},
isbn = {1787128423,9781787128422,1787129039,9781787129030},
year = {2017},
series = {},
edition = {},
volume = {},
url = {}
}
@book{book:2178978,
title = { Reinforcement Learning : With Open AI, TensorFlow and Keras Using Python},
author = {Abhishek Nandy,Manisha Biswas (auth.)},
publisher = {Apress},
isbn = { 978-1-4842-3284-2,978-1-4842-3285-9 },
year = {2018},
series = {},
edition = {1},
volume = {},
url = {}
}
@book{book:2177698,
title = {Hands-on data science and Python machine learning : perform data mining and machine learning efficiently using Python and Spark},
author = {Frank Kane},
publisher = {Packt Publishing - ebooks Account},
isbn = { 1787280748,978-1-78728-074-8 },
year = {2017},
series = {},
edition = {},
volume = {},
url = {}
}
@book{book:1632837,
title = {Introduction to Data Science. A Python Approach to Concepts, Techniques and Applications},
author = {Laura Igual, Santi Segu},
publisher = {Springer},
isbn = {978-3-319-50016-4,978-3-319-50017-1},
year = {2017},
series = {Undergraduate Topics in Computer Science},
edition = {},
volume = {},
url = {}
}
@inbook{Nedellec-1998,
doi = {10.1007/bfb0026683},
title = {[Lecture Notes in Computer Science] Machine Learning: ECML-98 Volume 1398 || Text categorization with Support Vector Machines: Learning with many relevant features},
author = {Nédellec, Claire; Rouveirol, Céline},
isbn = {3-540-64417-2},
year = {1998},
volume = {10.1007/BFb0026664},
issue = {Chapter 19},
pages = {137-142},
publisher = {unknown},
url = {}
}
@article{Cheng-2018,
doi = {10.1109/TGRS.2017.2783902},
title = {When Deep Learning Meets Metric Learning: Remote Sensing Image Scene Classification via Learning Discriminative CNNs},
author = {Cheng, Gong; Yang, Ceyuan; Yao, Xiwen; Guo, Lei; Han, Junwei},
publisher = {IEEE},
journal = {IEEE Transactions on Geoscience and Remote Sensing},
issnp = {0196-2892},
issne = {1558-0644},
year = {2018},
page = {1--11},
url = {},
}
@article{DBLP:journals/corr/abs-1804-06275,
author = {Kshiteesh Hegde and
Malik Magdon{-}Ismail and
Ram Ramanathan and
Bishal Thapa},
title = {Network Signatures from Image Representation of Adjacency Matrices:
Deep/Transfer Learning for Subgraph Classification},
journal = {CoRR},
volume = {abs/1804.06275},
year = {2018},
url = {http://arxiv.org/abs/1804.06275},
archivePrefix = {arXiv},
eprint = {1804.06275},
timestamp = {Mon, 13 Aug 2018 16:46:53 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1804-06275},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{Krizhevsky-2017,
doi = {10.1145/3065386},
title = {ImageNet classification with deep convolutional neural networks},
author = {Krizhevsky, Alex; Sutskever, Ilya; Hinton, Geoffrey E.},
publisher = {Association for Computing Machinery},
journal = {Communications of the ACM},
issnp = {0001-0782},
year = {2017},
month = {05},
day = {24},
volume = {60},
issue = {6},
page = {84--90},
url = {},
}
@article{Du-2018,
doi = {10.1016/j.eswa.2018.02.003},
title = {Hierarchy construction and text classification based on the relaxation strategy and least information model},
author = {Du, Yongping; Liu, Jingxuan; Ke, Weimao; Gong, Xuemei},
publisher = {Elsevier Science},
journal = {Expert Systems with Applications},
issnp = {0957-4174},
year = {2018},
month = {06},
volume = {100},
page = {157--164},
url = {},
}
@article{DBLP:journals/corr/ConneauSBL16,