diff --git a/docs/paper/overleaf/aaai.bst b/docs/paper/overleaf/aaai.bst new file mode 100644 index 0000000..cea352b --- /dev/null +++ b/docs/paper/overleaf/aaai.bst @@ -0,0 +1,1192 @@ +%Filename: AAAI.bst +% BibTeX `aaai' style file for BibTeX version 0.99c, LaTeX version 2.09 +% Version of 22 February 2008 +% Place it in a file called aaai.bst in the BibTeX search path. (Placing it +% in the same directory as the LaTeX document should also work.) +% Support for named citations is provided by named.sty +% This version was made by modifying the master file made by +% Oren Patashnik (PATASHNIK@SCORE.STANFORD.EDU) +% Copyright (C) 1985, all rights reserved. +% Modifications Copyright (C) 1988, 1989, 1994, Peter F. Patel-Schneider +% Copying of this file is authorized only if either +% (1) you make absolutely no changes to your copy, including name, or +% (2) if you do make changes, you name it something other than +% btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst. +% This restriction helps ensure that all standard styles are identical. +% This style is NOT guaranteed to work. It is provided in the hope +% that it will make the preparation of papers easier. +% +% There are undoubtably bugs in this style. If you make bug fixes, +% improvements, etc. please us know (www.aaai.org) +% The preparation of this modified file was supported by Schlumberger Palo +% Alto Research and AT\&T Bell Laboratories. +% Modifications Copyright (C) 1990. Sunil Issar si@cs.cmu.edu +% Modified the file so that the Bibliography style is closer to the +% one specified in the Instructions to Authors for AAAI papers. +% This style is NOT guaranteed to work. It is provided in the hope +% that it will make the preparation of papers easier. +% Standard disclaimers apply. Send comments etc., to si@cs.cmu.edu +% This file can be used in other conferences as long as credit to the +% authors and supporting agencies is retained, this notice is not changed, +% and further modification or reuse is not restricted. +% Name format in citations: +% single authorFirst +% two authorsFirst and Second +% three authorsFirst, Second, and Third +% more authorsFirst et al. +% +% Bibliography format +% author. date. other. +% Author format +% single authorFirst, Initials +% two authorsFirst, Initials, and Second, Initials +% more authorsFirst, Initials; ...; and Last, Initials +% +% Reference list ordering: alphabetical by author or whatever passes +% for author in the absence of one. +% +% This BibTeX style has support for short (year only) citations. This +% is done by having the citations actually look like +% \citeauthoryear{author-info}{year} +% The LaTeX style has to have support for this +ENTRY +{ address +author +booktitle +chapter +edition +editor +howpublished +institution +journal +key +month +note +number +organization +pages +publisher +school +series +title +type +volume +year +} +{} +{ label extra.label sort.label } +INTEGERS { output.state before.all mid.sentence after.sentence after.block } +FUNCTION {init.state.consts} +{ #0 'before.all := +#1 'mid.sentence := +#2 'after.sentence := +#3 'after.block := +} +STRINGS { s t } +FUNCTION {output.nonnull} +{ 's := +output.state mid.sentence = +{ ", " * write$ } +{ output.state after.block = +{ add.period$ write$ +newline$ +"\newblock " write$ +} +{ output.state before.all = +'write$ +{ add.period$ " " * write$ } +if$ +} +if$ +mid.sentence 'output.state := +} +if$ +s +} +FUNCTION {sioutput.nonnull} +{ 's := +output.state mid.sentence = +{ " " * write$ } +{ output.state after.block = +{ add.period$ write$ +newline$ +"\newblock " write$ +} +{ output.state before.all = +'write$ +{ add.period$ " " * write$ } +if$ +} +if$ +mid.sentence 'output.state := +} +if$ +s +} +FUNCTION {output} +{ duplicate$ empty$ +'pop$ +'output.nonnull +if$ +} +FUNCTION {sioutput} +{ duplicate$ empty$ +'pop$ +'sioutput.nonnull +if$ +} +FUNCTION {output.check} +{ 't := +duplicate$ empty$ +{ pop$ "empty " t * " in " * cite$ * warning$ } +'output.nonnull +if$ +} +FUNCTION {output.bibitem} +{ newline$ +"\bibitem[" write$ +label write$ +"]{" write$ +cite$ write$ +"}" write$ +newline$ +"" +before.all 'output.state := +} +FUNCTION {fin.entry} +{ add.period$ +write$ +newline$ +} +FUNCTION {new.block} +{ output.state before.all = +'skip$ +{ after.block 'output.state := } +if$ +} +FUNCTION {new.sentence} +{ output.state after.block = +'skip$ +{ output.state before.all = +'skip$ +{ after.sentence 'output.state := } +if$ +} +if$ +} +FUNCTION {not} +{ { #0 } +{ #1 } +if$ +} +FUNCTION {and} +{ 'skip$ +{ pop$ #0 } +if$ +} +FUNCTION {or} +{ { pop$ #1 } +'skip$ +if$ +} +FUNCTION {new.block.checka} +{ empty$ +'skip$ +'new.block +if$ +} +FUNCTION {new.block.checkb} +{ empty$ +swap$ empty$ +and +'skip$ +'new.block +if$ +} +FUNCTION {new.sentence.checka} +{ empty$ +'skip$ +'new.sentence +if$ +} +FUNCTION {new.sentence.checkb} +{ empty$ +swap$ empty$ +and +'skip$ +'new.sentence +if$ +} +FUNCTION {field.or.null} +{ duplicate$ empty$ +{ pop$ "" } +'skip$ +if$ +} +FUNCTION {emphasize} +{ duplicate$ empty$ +{ pop$ "" } +{ "{\em " swap$ * "}" * } +if$ +} +INTEGERS { nameptr namesleft numnames } +FUNCTION {format.publisher} +{ publisher empty$ +{ "" } +{ address empty$ +{ publisher } +{ address ": " * publisher * } +if$ +} +if$ +} +FUNCTION {format.organization} +{ organization empty$ +{ "" } +{ address empty$ +{ organization } +{ address ": " * organization * } +if$ +} +if$ +} +FUNCTION {format.names} +{ 's := +#1 'nameptr := +s num.names$ 'numnames := +numnames 'namesleft := +{ namesleft #0 > } +{ s nameptr "{vv~}{ll}{, jj}{, f.}" format.name$ 't := +nameptr #1 > +{ namesleft #1 > +{ "; " * t * } +{ numnames #2 > +{ ";" * } +{ "," * } +if$ +t "others" = +{ " et~al." * } +{ " and " * t * } +if$ +} +if$ +} +'t +if$ +nameptr #1 + 'nameptr := +namesleft #1 - 'namesleft := +} +while$ +add.period$ +} +FUNCTION {format.authors} +{ author empty$ +{ "" } +{ author format.names } +if$ +} +FUNCTION {format.editors} +{ editor empty$ +{ "" } +{ editor format.names +editor num.names$ #1 > +{ ", eds." * } +{ ", ed." * } +if$ +} +if$ +} +FUNCTION {format.title} +{ title empty$ +{ "" } +{ title "t" change.case$ } +if$ +} +FUNCTION {n.dashify} +{ 't := +"" +{ t empty$ not } +{ t #1 #1 substring$ "-" = +{ t #1 #2 substring$ "--" = not +{ "--" * +t #2 global.max$ substring$ 't := +} +{ { t #1 #1 substring$ "-" = } +{ "-" * +t #2 global.max$ substring$ 't := +} +while$ +} +if$ +} +{ t #1 #1 substring$ * +t #2 global.max$ substring$ 't := +} +if$ +} +while$ +} +FUNCTION {format.year} +{ year empty$ +{ "" } +{ year extra.label * } +if$ +} +FUNCTION {format.date} +{ year empty$ +{ month empty$ +{ "" } +{ "there's a month but no year in " cite$ * warning$ +month +} +if$ +} +{ month empty$ +'year +{ month " " * year * } +if$ +} +if$ +} +FUNCTION {format.btitle} +{ title emphasize +} +FUNCTION {tie.or.space.connect} +{ duplicate$ text.length$ #3 < +{ "~" } +{ " " } +if$ +swap$ * * +} +FUNCTION {either.or.check} +{ empty$ +'pop$ +{ "can't use both " swap$ * " fields in " * cite$ * warning$ } +if$ +} +FUNCTION {format.bvolume} +{ volume empty$ +{ "" } +{ "volume" volume tie.or.space.connect +series empty$ +'skip$ +{ " of " * series emphasize * } +if$ +"volume and number" number either.or.check +} +if$ +} +FUNCTION {format.number.series} +{ volume empty$ +{ number empty$ +{ series field.or.null } +{ output.state mid.sentence = +{ "number" } +{ "Number" } +if$ +number tie.or.space.connect +series empty$ +{ "there's a number but no series in " cite$ * warning$ } +{ " in " * series * } +if$ +} +if$ +} +{ "" } +if$ +} +FUNCTION {format.edition} +{ edition empty$ +{ "" } +{ output.state mid.sentence = +{ edition "l" change.case$ " edition" * } +{ edition "t" change.case$ " edition" * } +if$ +} +if$ +} +INTEGERS { multiresult } +FUNCTION {multi.page.check} +{ 't := +#0 'multiresult := +{ multiresult not +t empty$ not +and +} +{ t #1 #1 substring$ +duplicate$ "-" = +swap$ duplicate$ "," = +swap$ "+" = +or or +{ #1 'multiresult := } +{ t #2 global.max$ substring$ 't := } +if$ +} +while$ +multiresult +} +FUNCTION {format.pages} +{ pages empty$ +{ "" } +{ pages multi.page.check +{ "" pages n.dashify tie.or.space.connect } +{ "" pages tie.or.space.connect } +if$ +} +if$ +} +FUNCTION {format.vol.num.pages} +{ volume field.or.null +number empty$ +'skip$ +{ "(" number * ")" * * +volume empty$ +{ "there's a number but no volume in " cite$ * warning$ } +'skip$ +if$ +} +if$ +pages empty$ +'skip$ +{ duplicate$ empty$ +{ pop$ format.pages } +{ ":" * pages n.dashify * } +if$ +} +if$ +} +FUNCTION {format.chapter.pages} +{ chapter empty$ +'format.pages +{ type empty$ +{ "chapter" } +{ type "l" change.case$ } +if$ +chapter tie.or.space.connect +pages empty$ +'skip$ +{ ", " * format.pages * } +if$ +} +if$ +} +FUNCTION {format.in.ed.booktitle} +{ booktitle empty$ +{ "" } +{ editor empty$ +{ "In " booktitle emphasize * } +{ "In " format.editors * ", " * booktitle emphasize * } +if$ +} +if$ +} +FUNCTION {empty.misc.check} +{ author empty$ title empty$ howpublished empty$ +month empty$ year empty$ note empty$ +and and and and and +key empty$ not and +{ "all relevant fields are empty in " cite$ * warning$ } +'skip$ +if$ +} +FUNCTION {format.thesis.type} +{ type empty$ +'skip$ +{ pop$ +type "t" change.case$ +} +if$ +} +FUNCTION {format.tr.number} +{ type empty$ +{ "Technical Report" } +'type +if$ +number empty$ +{ "t" change.case$ } +{ number tie.or.space.connect } +if$ +} +FUNCTION {format.article.crossref} +{ key empty$ +{ journal empty$ +{ "need key or journal for " cite$ * " to crossref " * crossref * +warning$ +"" +} +{ "In {\em " journal * "\/}" * } +if$ +} +{ "In " key * } +if$ +" \shortcite{" * crossref * "}" * +} +FUNCTION {format.crossref.editor} +{ editor #1 "{vv~}{ll}" format.name$ +editor num.names$ duplicate$ +#2 > +{ pop$ " et~al." * } +{ #2 < +'skip$ +{ editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = +{ " et~al." * } +{ " and " * editor #2 "{vv~}{ll}" format.name$ * } +if$ +} +if$ +} +if$ +} +FUNCTION {format.book.crossref} +{ volume empty$ +{ "empty volume in " cite$ * "'s crossref of " * crossref * warning$ +"In " +} +{ "Volume" volume tie.or.space.connect +" of " * +} +if$ +editor empty$ +editor field.or.null author field.or.null = +or +{ key empty$ +{ series empty$ +{ "need editor, key, or series for " cite$ * " to crossref " * +crossref * warning$ +"" * +} +{ "{\em " * series * "\/}" * } +if$ +} +{ key * } +if$ +} +{ format.crossref.editor * } +if$ +" \shortcite{" * crossref * "}" * +} +FUNCTION {format.incoll.inproc.crossref} +{ editor empty$ +editor field.or.null author field.or.null = +or +{ key empty$ +{ booktitle empty$ +{ "need editor, key, or booktitle for " cite$ * " to crossref " * +crossref * warning$ +"" +} +{ "In {\em " booktitle * "\/}" * } +if$ +} +{ "In " key * } +if$ +} +{ "In " format.crossref.editor * } +if$ +" \shortcite{" * crossref * "}" * +} +FUNCTION {article} +{ output.bibitem +format.authors "author" output.check +new.block format.year "year" output.check +new.block +format.title "title" output.check +new.block +crossref missing$ +{ journal emphasize "journal" output.check +format.vol.num.pages sioutput +} +{ format.article.crossref output.nonnull +format.pages sioutput +} +if$ +new.block +note output +fin.entry +} +FUNCTION {book} +{ output.bibitem +author empty$ +{ format.editors "author and editor" output.check } +{ format.authors output.nonnull +crossref missing$ +{ "author and editor" editor either.or.check } +'skip$ +if$ +} +if$ +new.block format.year "year" output.check +new.block +format.btitle "title" output.check +crossref missing$ +{ format.bvolume output +new.block +format.number.series output +new.sentence +format.publisher "publisher" output.check +} +{ new.block +format.book.crossref output.nonnull +} +if$ +format.edition output +% format.date "year" output.check +new.block +note output +fin.entry +} +FUNCTION {booklet} +{ output.bibitem +format.authors output +new.block format.year "year" output.check +new.block +format.title "title" output.check +howpublished address new.block.checkb +howpublished output +address output +% format.date output +new.block +note output +fin.entry +} +FUNCTION {inbook} +{ output.bibitem +author empty$ +{ format.editors "author and editor" output.check } +{ format.authors output.nonnull +crossref missing$ +{ "author and editor" editor either.or.check } +'skip$ +if$ +} +if$ +new.block format.year "year" output.check +new.block +format.btitle "title" output.check +crossref missing$ +{ format.bvolume output +new.block +format.number.series output +new.sentence +format.publisher "publisher" output.check +} +{ format.book.crossref output.nonnull +} +if$ +format.edition output +% format.date "year" output.check +new.block +format.chapter.pages "chapter and pages" output.check +new.block +note output +fin.entry +} +FUNCTION {incollection} +{ output.bibitem +format.authors "author" output.check +new.block format.year "year" output.check +new.block +format.title "title" output.check +new.block +crossref missing$ +{ format.in.ed.booktitle "booktitle" output.check +format.bvolume output +format.number.series output +new.sentence +format.publisher "publisher" output.check +format.edition output +% format.date "year" output.check +new.block +format.chapter.pages output +} +{ format.incoll.inproc.crossref output.nonnull +new.block +format.chapter.pages output +} +if$ +new.block +note output +fin.entry +} +FUNCTION {inproceedings} +{ output.bibitem +format.authors "author" output.check +new.block format.year "year" output.check +new.block +format.title "title" output.check +new.block +crossref missing$ +{ format.in.ed.booktitle "booktitle" output.check +format.bvolume output +format.number.series output +format.pages output +new.block +organization empty$ +{ format.publisher output } +{ format.organization output } +if$ +} +{ format.incoll.inproc.crossref output.nonnull +format.pages output +} +if$ +new.block +note output +fin.entry +} +FUNCTION {conference} { inproceedings } +FUNCTION {manual} +{ output.bibitem +author empty$ +{ organization empty$ +'skip$ +{ organization output.nonnull +address output +} +if$ +} +{ format.authors output.nonnull } +if$ +new.block format.year "year" output.check +new.block +format.btitle "title" output.check +author empty$ +{ organization empty$ +{ address new.block.checka +address output +} +'skip$ +if$ +} +{ organization address new.block.checkb +organization output +address output +} +if$ +format.edition output +% format.date output +new.block +note output +fin.entry +} +FUNCTION {mastersthesis} +{ output.bibitem +format.authors "author" output.check +new.block format.year "year" output.check +new.block +format.title "title" output.check +new.block +"Master's thesis" format.thesis.type output.nonnull +school "school" output.check +address output +% format.date "year" output.check +new.block +note output +fin.entry +} +FUNCTION {misc} +{ output.bibitem +format.authors output +new.block format.year "year" output.check +title howpublished new.block.checkb +format.title output +howpublished new.block.checka +howpublished output +% format.date output +new.block +note output +fin.entry +empty.misc.check +} +FUNCTION {phdthesis} +{ output.bibitem +format.authors "author" output.check +new.block format.year "year" output.check +new.block +format.btitle "title" output.check +new.block +"Ph.D. Dissertation" format.thesis.type output.nonnull +school "school" output.check +address output +% format.date "year" output.check +new.block +note output +fin.entry +} +FUNCTION {proceedings} +{ output.bibitem +editor empty$ +{ organization output } +{ format.editors output.nonnull } +if$ +new.block format.year "year" output.check +new.block +format.btitle "title" output.check +format.bvolume output +format.number.series output +editor empty$ +{ publisher empty$ +{ address output } +{ format.publisher output } +if$ +} +{ organization empty$ +{ publisher new.sentence.checka +format.publisher output +} +{ organization new.sentence.checkb +format.organization output +} +if$ +} +if$ +new.block +note output +fin.entry +} +FUNCTION {techreport} +{ output.bibitem +format.authors "author" output.check +new.block format.year "year" output.check +new.block +format.title "title" output.check +new.block +format.tr.number output.nonnull +institution "institution" output.check +address output +% format.date "year" output.check +new.block +note output +fin.entry +} +FUNCTION {unpublished} +{ output.bibitem +format.authors "author" output.check +new.block format.year "year" output.check +new.block +format.title "title" output.check +new.block +note "note" output.check +% format.date output +fin.entry +} +FUNCTION {default.type} { misc } +MACRO {jan} {"January"} +MACRO {feb} {"February"} +MACRO {mar} {"March"} +MACRO {apr} {"April"} +MACRO {may} {"May"} +MACRO {jun} {"June"} +MACRO {jul} {"July"} +MACRO {aug} {"August"} +MACRO {sep} {"September"} +MACRO {oct} {"October"} +MACRO {nov} {"November"} +MACRO {dec} {"December"} +MACRO {acmcs} {"ACM Computing Surveys"} +MACRO {acta} {"Acta Informatica"} +MACRO {cacm} {"Communications of the ACM"} +MACRO {ibmjrd} {"IBM Journal of Research and Development"} +MACRO {ibmsj} {"IBM Systems Journal"} +MACRO {ieeese} {"IEEE Transactions on Software Engineering"} +MACRO {ieeetc} {"IEEE Transactions on Computers"} +MACRO {ieeetcad} +{"IEEE Transactions on Computer-Aided Design of Integrated Circuits"} +MACRO {ipl} {"Information Processing Letters"} +MACRO {jacm} {"Journal of the ACM"} +MACRO {jcss} {"Journal of Computer and System Sciences"} +MACRO {scp} {"Science of Computer Programming"} +MACRO {sicomp} {"SIAM Journal on Computing"} +MACRO {tocs} {"ACM Transactions on Computer Systems"} +MACRO {tods} {"ACM Transactions on Database Systems"} +MACRO {tog} {"ACM Transactions on Graphics"} +MACRO {toms} {"ACM Transactions on Mathematical Software"} +MACRO {toois} {"ACM Transactions on Office Information Systems"} +MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"} +MACRO {tcs} {"Theoretical Computer Science"} +READ +FUNCTION {sortify} +{ purify$ +"l" change.case$ +} +INTEGERS { len } +FUNCTION {chop.word} +{ 's := +'len := +s #1 len substring$ = +{ s len #1 + global.max$ substring$ } +'s +if$ +} +FUNCTION {format.lab.names} +{ 's := +s num.names$ 'numnames := +numnames #1 = +{ s #1 "{vv }{ll}" format.name$ } +{ numnames #2 = +{ s #1 "{vv }{ll} and " format.name$ s #2 "{vv }{ll}" format.name$ * +} +{ numnames #3 = +{ s #1 "{vv }{ll}, " format.name$ +s #2 "{vv }{ll}, and " format.name$ * +s #3 "{vv }{ll}" format.name$ * +} +{ s #1 "{vv }{ll }\bgroup et al\mbox{.}\egroup " format.name$ +} +if$ +} +if$ +} +if$ +} +FUNCTION {author.key.label} +{ author empty$ +{ key empty$ +{ cite$ #1 #3 substring$ } +{ key } +if$ +} +{ author format.lab.names } +if$ +} +FUNCTION {author.editor.key.label} +{ author empty$ +{ editor empty$ +{ key empty$ +{ cite$ #1 #3 substring$ } +{ key } +if$ +} +{ editor format.lab.names } +if$ +} +{ author format.lab.names } +if$ +} +FUNCTION {author.key.organization.label} +{ author empty$ +{ key empty$ +{ organization empty$ +{ cite$ #1 #3 substring$ } +{ "The " #4 organization chop.word #3 text.prefix$ } +if$ +} +{ key } +if$ +} +{ author format.lab.names } +if$ +} +FUNCTION {editor.key.organization.label} +{ editor empty$ +{ key empty$ +{ organization empty$ +{ cite$ #1 #3 substring$ } +{ "The " #4 organization chop.word #3 text.prefix$ } +if$ +} +{ key } +if$ +} +{ editor format.lab.names } +if$ +} +FUNCTION {calc.label} +{ type$ "book" = +type$ "inbook" = +or +'author.editor.key.label +{ type$ "proceedings" = +'editor.key.organization.label +{ type$ "manual" = +'author.key.organization.label +'author.key.label +if$ +} +if$ +} +if$ +duplicate$ +"\protect\citeauthoryear{" swap$ * "}{" * +year field.or.null purify$ * % CHANGED - pfps - 15 Feb 1989 +'label := +year field.or.null purify$ * +sortify 'sort.label := +} +FUNCTION {sort.format.names} +{ 's := +#1 'nameptr := +"" +s num.names$ 'numnames := +numnames 'namesleft := +{ namesleft #0 > } +{ nameptr #1 > +{ " " * } +'skip$ +if$ +s nameptr "{vv{ } }{ll{ }}{ ff{ }}{ jj{ }}" format.name$ 't := +nameptr numnames = t "others" = and +{ "et al" * } +{ t sortify * } +if$ +nameptr #1 + 'nameptr := +namesleft #1 - 'namesleft := +} +while$ +} +FUNCTION {sort.format.title} +{ 't := +"A " #2 +"An " #3 +"The " #4 t chop.word +chop.word +chop.word +sortify +#1 global.max$ substring$ +} +FUNCTION {author.sort} +{ author empty$ +{ key empty$ +{ "to sort, need author or key in " cite$ * warning$ +"" +} +{ key sortify } +if$ +} +{ author sort.format.names } +if$ +} +FUNCTION {author.editor.sort} +{ author empty$ +{ editor empty$ +{ key empty$ +{ "to sort, need author, editor, or key in " cite$ * warning$ +"" +} +{ key sortify } +if$ +} +{ editor sort.format.names } +if$ +} +{ author sort.format.names } +if$ +} +FUNCTION {author.organization.sort} +{ author empty$ +{ organization empty$ +{ key empty$ +{ "to sort, need author, organization, or key in " cite$ * warning$ +"" +} +{ key sortify } +if$ +} +{ "The " #4 organization chop.word sortify } +if$ +} +{ author sort.format.names } +if$ +} +FUNCTION {editor.organization.sort} +{ editor empty$ +{ organization empty$ +{ key empty$ +{ "to sort, need editor, organization, or key in " cite$ * warning$ +"" +} +{ key sortify } +if$ +} +{ "The " #4 organization chop.word sortify } +if$ +} +{ editor sort.format.names } +if$ +} +FUNCTION {presort} +{ calc.label +sort.label +" " +* +type$ "book" = +type$ "inbook" = +or +'author.editor.sort +{ type$ "proceedings" = +'editor.organization.sort +{ type$ "manual" = +'author.organization.sort +'author.sort +if$ +} +if$ +} +if$ +* +" " +* +year field.or.null sortify +* +" " +* +title field.or.null +sort.format.title +* +#1 entry.max$ substring$ +'sort.key$ := +} +ITERATE {presort} +SORT +STRINGS { longest.label last.sort.label next.extra } +INTEGERS { longest.label.width last.extra.num } +FUNCTION {initialize.longest.label} +{ "" 'longest.label := +#0 int.to.chr$ 'last.sort.label := +"" 'next.extra := +#0 'longest.label.width := +#0 'last.extra.num := +} +FUNCTION {forward.pass} +{ last.sort.label sort.label = +{ last.extra.num #1 + 'last.extra.num := +last.extra.num int.to.chr$ 'extra.label := +} +{ "a" chr.to.int$ 'last.extra.num := +"" 'extra.label := +sort.label 'last.sort.label := +} +if$ +} +FUNCTION {reverse.pass} +{ next.extra "b" = +{ "a" 'extra.label := } +'skip$ +if$ +label extra.label * "}" * 'label := % CHANGED - pfps 15 Feb 1989 +label width$ longest.label.width > +{ label 'longest.label := +label width$ 'longest.label.width := +} +'skip$ +if$ +extra.label 'next.extra := +} +EXECUTE {initialize.longest.label} +ITERATE {forward.pass} +REVERSE {reverse.pass} +FUNCTION {begin.bib} +{ preamble$ empty$ +'skip$ +{ preamble$ write$ newline$ } +if$ +"\begin{thebibliography}{}" write$ newline$ +} +EXECUTE {begin.bib} +EXECUTE {init.state.consts} +ITERATE {call.type$} +FUNCTION {end.bib} +{ newline$ +"\end{thebibliography}" write$ newline$ +} +EXECUTE {end.bib} \ No newline at end of file diff --git a/docs/paper/overleaf/aaai.sty b/docs/paper/overleaf/aaai.sty new file mode 100644 index 0000000..326d427 --- /dev/null +++ b/docs/paper/overleaf/aaai.sty @@ -0,0 +1,295 @@ +%Filename: aaai.sty +% +\typeout{Conference Style for AAAI for LaTeX 2e -- version of 1 December 2013} +% WARNING: IF YOU ARE USING THIS STYLE SHEET FOR AN AAAI PUBLICATION, YOU +% MAY NOT MODIFY IT FOR ANY REASON. MODIFICATIONS (IN YOUR SOURCE +% OR IN THIS STYLE SHEET WILL RESULT IN REJECTION OF YOUR PAPER). + +% NOTICE: DO NOT MODIFY THIS FILE WITHOUT CHANGING ITS NAME. This style +% file is called aaai.sty. Modifications to this file are permitted, +% provided that your modified version does not include the acronym "aaai" +% in its name, that credit to the authors and supporting agencies is +% retained, and that further modification or reuse is not restricted. This +% file was originally prepared by Peter F. Patel-Schneider, liberally +% using the ideas of other style hackers, including Barbara Beeton. It was +% modified in April 1999 by J. Scott Penberthy and George Ferguson. It was +% modified in 2007 by AAAI. It was modified in February 2009 +% and in November 2009 by Hans W. Guesgen and Giuseppe De Giacomo. It +% was further modified in March 2010 by AAAI. +% The original preparation of this file was supported by +% Schlumberger Palo Alto Research, AT\&T Bell Laboratories, AAAI, and +% Morgan Kaufmann Publishers. +% +% WARNING: This style is NOT guaranteed to work. It is provided in the +% hope that it might make the preparation of papers easier, but this style +% file is provided "as is" without warranty of any kind, either express or +% implied, including but not limited to the implied warranties of +% merchantability, fitness for a particular purpose, or noninfringement. +% You use this style file at your own risk. Standard disclaimers apply. +% +% Do not use this file unless you are an experienced LaTeX user. To +% satisfy AAAI's requirements, you must change your paper's configuration +% to use Times fonts. AAAI will not accept your paper if it is formatted +% using obsolete type 3 Computer Modern bitmapped fonts. Please ensure +% that your version of dvips maps to type 1 fonts. Place this document in +% a file called aaai.sty in the TeX search path. (Placing it in the same +% directory as the paper should also work.) +% +% You must also format your paper for US letter-sized paper. +% +% There are undoubtably bugs in this style. If you would like to submit +% bug fixes, improvements, etc. please let us know. Please use the contact form +% at www.aaai.org. +% +% USE OF PDFTeX IS NOW REQUIRED +% \documentclass[letterpaper]{article} +% \usepackage{aaai} +% \usepackage{times} +% \usepackage{helvet} +% \usepackage{courier} +% \setlength{\pdfpagewidth}{8.5in} +% \setlength{\pdfpageheight}{11in} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% IMPORTANT -- ADDITION OF A PDF MARK WITH YOUR PAPER TITLE +% AND ALL AUTHOR NAMES IS REQUIRED ON ALL AAAI PAPERS +% COMMENT OUT AUTHOR NAMES FOR SUBMISSION +% ENABLE AUTHOR NAMES FOR FINAL CAMERA READY COPY +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% PDFINFO for PDFTeX +% Uncomment and complete the following for metadata if +% your paper is typeset using PDFTeX +% \pdfinfo{ +% /Title (Input Your Paper Title Here) +% /Subject (Input the Proceedings Title Here) +% /Author (John Doe, Jane Doe) +% } +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Section Numbers +% Uncomment if you want to use section numbers +% and change the 0 to a 1 or 2 +% \setcounter{secnumdepth}{0} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% \title{Title} +% \author{Author 1 \and Author 2 \\ Address line \\ Address line \And +% Author 3 \\ Address line \\ Address line} +% \begin{document} +% \maketitle +% ... +% \bibliography{Bibliography-File} +% \bibliographystyle{aaai} +% \end{document} +% \pubnote{\em To appear, AAAI-10} % optional, remove for submission +% +% \pubnote is for printing the paper yourself, and should not be used in +% submitted versions!!!! +% Author information can be set in various styles: +% For several authors from the same institution: +% \author{Author 1 \and ... \and Author n \\ +% Address line \\ ... \\ Address line} +% if the names do not fit well on one line use +% \author{Author 1 \\ {\bf Author 2} \\ ... \\ {\bf Author n} \\ +% Address line \\ ... \\ Address line} +% For authors from different institutions: +% \author{Author 1 \\ Address line \\ ... \\ Address line +% \And ... \And +% Author n \\ Address line \\ ... \\ Address line} +% To start a separate ``row'' of authors use \AND, as in +% \author{Author 1 \\ Address line \\ ... \\ Address line +% \AND +% Author 2 \\ Address line \\ ... \\ Address line \And +% Author 3 \\ Address line \\ ... \\ Address line} +% If the title and author information does not fit in the area allocated, +% place \setlength\titlebox{height} +% after the \documentstyle line +% where {height} is something like 2.5in +% PHYSICAL PAGE LAYOUT +\setlength\topmargin{-0.25in} \setlength\oddsidemargin{-0.25in} +\setlength\textheight{9.0in} \setlength\textwidth{7.0in} +\setlength\columnsep{0.375in} \newlength\titlebox \setlength\titlebox{2.25in} +\setlength\headheight{0pt} \setlength\headsep{0pt} +%\setlength\footheight{0pt} \setlength\footskip{0pt} +\thispagestyle{empty} \pagestyle{empty} +\flushbottom \twocolumn \sloppy +% We're never going to need a table of contents, so just flush it to +% save space --- suggested by drstrip@sandia-2 +\def\addcontentsline#1#2#3{} +% gf: PRINT COPYRIGHT NOTICE +\def\copyright@year{\number\year} +\def\copyright@text{Copyright \copyright\space \copyright@year, +Association for the Advancement of Artificial Intelligence (www.aaai.org). +All rights reserved.} +\def\copyright@on{T} +\def\nocopyright{\gdef\copyright@on{}} +\def\copyrighttext#1{\gdef\copyright@on{T}\gdef\copyright@text{#1}} +\def\copyrightyear#1{\gdef\copyright@on{T}\gdef\copyright@year{#1}} +% gf: End changes for copyright notice (used in \maketitle, below) +% Title stuff, taken from deproc. +\def\maketitle{\par +\begingroup % to make the footnote style local to the title +\def\thefootnote{\fnsymbol{footnote}} +% gf: Don't see why we'd want the footnotemark to be 0pt wide +%\def\@makefnmark{\hbox to 0pt{$^{\@thefnmark}$\hss}} +\twocolumn[\@maketitle] \@thanks +\endgroup +% gf: Insert copyright slug unless turned off +\if T\copyright@on\insert\footins{\noindent\footnotesize\copyright@text}\fi +% gf: And now back to your regular programming +\setcounter{footnote}{0} +\let\maketitle\relax \let\@maketitle\relax +\gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax} +\def\@maketitle{\vbox to \titlebox{\hsize\textwidth +%%% AAAI changed: 03/05/2010 +%%\linewidth\hsize \vskip 0.625in minus 0.125in \centering +\linewidth\hsize \vskip 0.625in minus 0.125in \centering +%%% END changed +{\LARGE\bf \@title \par} \vskip 0.2in plus 1fil minus 0.1in +{\def\and{\unskip\enspace{\rm and}\enspace}% +\def\And{\end{tabular}\hss \egroup \hskip 1in plus 2fil + \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\Large\bf}% +\def\AND{\end{tabular}\hss\egroup \hfil\hfil\egroup +\vskip 0.25in plus 1fil minus 0.125in +% hg: Changed Large to normalsize on next line + \hbox to \linewidth\bgroup\normalsize \hfil\hfil +\hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\Large\bf} +\hbox to \linewidth\bgroup\normalsize \hfil\hfil +\hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\Large\bf\@author +\end{tabular}\hss\egroup +\hfil\hfil\egroup} +\vskip 0.3in plus 2fil minus 0.1in +}} +\renewenvironment{abstract}{\centerline{\bf +Abstract}\vspace{0.5ex}\begin{quote}\small}{\par\end{quote}\vskip 1ex} +% jsp added: +\def\pubnote#1{\thispagestyle{myheadings} +\pagestyle{myheadings} +\markboth{#1}{#1} +\setlength\headheight{10pt} \setlength\headsep{10pt} +} +% SECTIONS with less space +\def\section{\@startsection {section}{1}{\z@}{-2.0ex plus +-0.5ex minus -.2ex}{3pt plus 2pt minus 1pt}{\Large\bf\centering}} +\def\subsection{\@startsection{subsection}{2}{\z@}{-2.0ex plus +-0.5ex minus -.2ex}{3pt plus 2pt minus 1pt}{\large\bf\raggedright}} +\def\subsubsection{\@startsection{subparagraph}{3}{\z@}{-6pt plus +%%% DIEGO changed: 29/11/2009 +%% 2pt minus 1pt}{-1em}{\normalsize\bf}} +-2pt minus -1pt}{-1em}{\normalsize\bf}} +%%% END changed +\setcounter{secnumdepth}{0} +% add period to section (but not subsection) numbers, reduce space after +%\renewcommand{\thesection} +% {\arabic{section}.\hskip-0.6em} +%\renewcommand{\thesubsection} +% {\arabic{section}.\arabic{subsection}\hskip-0.6em} +% FOOTNOTES +\footnotesep 6.65pt % +\skip\footins 9pt plus 4pt minus 2pt +\def\footnoterule{\kern-3pt \hrule width 5pc \kern 2.6pt } +\setcounter{footnote}{0} +% LISTS AND PARAGRAPHS +\parindent 10pt +\topsep 4pt plus 1pt minus 2pt +\partopsep 1pt plus 0.5pt minus 0.5pt +\itemsep 2pt plus 1pt minus 0.5pt +\parsep 2pt plus 1pt minus 0.5pt +\leftmargin 10pt \leftmargini\leftmargin \leftmarginii 10pt +\leftmarginiii 5pt \leftmarginiv 5pt \leftmarginv 5pt \leftmarginvi 5pt +\labelwidth\leftmargini\advance\labelwidth-\labelsep \labelsep 5pt +\def\@listi{\leftmargin\leftmargini} +\def\@listii{\leftmargin\leftmarginii +\labelwidth\leftmarginii\advance\labelwidth-\labelsep +\topsep 2pt plus 1pt minus 0.5pt +\parsep 1pt plus 0.5pt minus 0.5pt +\itemsep \parsep} +\def\@listiii{\leftmargin\leftmarginiii +\labelwidth\leftmarginiii\advance\labelwidth-\labelsep +\topsep 1pt plus 0.5pt minus 0.5pt +\parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt +\itemsep \topsep} +\def\@listiv{\leftmargin\leftmarginiv +\labelwidth\leftmarginiv\advance\labelwidth-\labelsep} +\def\@listv{\leftmargin\leftmarginv +\labelwidth\leftmarginv\advance\labelwidth-\labelsep} +\def\@listvi{\leftmargin\leftmarginvi +\labelwidth\leftmarginvi\advance\labelwidth-\labelsep} +\abovedisplayskip 7pt plus2pt minus5pt% +\belowdisplayskip \abovedisplayskip +\abovedisplayshortskip 0pt plus3pt% +\belowdisplayshortskip 4pt plus3pt minus3pt% +% Less leading in most fonts (due to the narrow columns) +% The choices were between 1-pt and 1.5-pt leading +\def\normalsize{\@setfontsize\normalsize\@xpt{11}} % 10 point on 11 +\def\small{\@setfontsize\small\@ixpt{10}} % 9 point on 10 +\def\footnotesize{\@setfontsize\footnotesize\@ixpt{10}} % 9 point on 10 +\def\scriptsize{\@setfontsize\scriptsize\@viipt{10}} % 7 point on 8 +\def\tiny{\@setfontsize\tiny\@vipt{7}} % 6 point on 7 +\def\large{\@setfontsize\large\@xipt{12}} % 11 point on 12 +\def\Large{\@setfontsize\Large\@xiipt{14}} % 12 point on 14 +\def\LARGE{\@setfontsize\LARGE\@xivpt{16}} % 14 point on 16 +\def\huge{\@setfontsize\huge\@xviipt{20}} % 17 point on 20 +\def\Huge{\@setfontsize\Huge\@xxpt{23}} % 20 point on 23 +%%%% named style for aaai, included here for ease of use +% This section implements citations for the ``named'' bibliography style, +% modified for AAAI use. +% This file can be modified and used in other conferences as long +% as credit to the authors and supporting agencies is retained, this notice +% is not changed, and further modification or reuse is not restricted. +% The ``named'' bibliography style creates citations with labels like +% \citeauthoryear{author-info}{year} +% these labels are processed by the following commands: +% \cite{keylist} +% which produces citations with both author and year, +% enclosed in square brackets +% \shortcite{keylist} +% which produces citations with year only, +% enclosed in square brackets +% \citeauthor{key} +% which produces the author information only +% \citeyear{key} +% which produces the year information only +\def\leftcite{(}\def\rightcite{)} +\def\cite{\def\citeauthoryear##1##2{\def\@thisauthor{##1}% +\ifx \@lastauthor \@thisauthor \relax \else##1 \fi ##2}\@icite} +\def\shortcite{\def\citeauthoryear##1##2{##2}\@icite} +\def\citeauthor{\def\citeauthoryear##1##2{##1}\@nbcite} +\def\citeyear{\def\citeauthoryear##1##2{##2}\@nbcite} +% internal macro for citations with () and with breaks between citations +% used in \cite and \shortcite +\def\@icite{\leavevmode\def\@citeseppen{-1000}% +\def\@cite##1##2{\leftcite\nobreak\hskip 0in{##1\if@tempswa , ##2\fi}\rightcite}% +\@ifnextchar [{\@tempswatrue\@citex}{\@tempswafalse\@citex[]}} +% internal macro for citations without [] and with no breaks +% used in \citeauthor and \citeyear +\def\@nbcite{\leavevmode\def\@citeseppen{1000}% +\def\@cite##1##2{{##1\if@tempswa , ##2\fi}}% +\@ifnextchar [{\@tempswatrue\@citex}{\@tempswafalse\@citex[]}} +% don't box citations, separate with ; and a space +% also, make the penalty between citations a parameter, +% it may be a good place to break +\def\@citex[#1]#2{% +\def\@lastauthor{}\def\@citea{}% +\@cite{\@for\@citeb:=#2\do +{\@citea\def\@citea{;\penalty\@citeseppen\ }% +\if@filesw\immediate\write\@auxout{\string\citation{\@citeb}}\fi +\@ifundefined{b@\@citeb}{\def\@thisauthor{}{\bf ?}\@warning +{Citation `\@citeb' on page \thepage \space undefined}}% +{\csname b@\@citeb\endcsname}\let\@lastauthor\@thisauthor}}{#1}} +%Ignore the key when generating the Reference section. +\def\@lbibitem[#1]#2{\item\if@filesw +{ \def\protect##1{\string ##1\space}\immediate +\write\@auxout{\string\bibcite{#2}{#1}}}\fi\ignorespaces} +\def\thebibliography#1{\section*{References\@mkboth +{REFERENCES}{REFERENCES}}\list +{}{\labelwidth 0in\leftmargin\labelwidth +%%% DIEGO removed +%%\advance\leftmargin\labelsep +%%% END removed +%%% DIEGO changed +\itemsep .01in % original +%%\itemsep -.0125in % reduced space between bib entries +%%% END changed +} +\def\newblock{\hskip .11em plus .33em minus .07em} +\sloppy\clubpenalty4000\widowpenalty4000 +\sfcode`\.=1000\relax} +\let\endthebibliography=\endlist \ No newline at end of file diff --git a/docs/paper/overleaf/fixbib.sty b/docs/paper/overleaf/fixbib.sty new file mode 100644 index 0000000..9e311fa --- /dev/null +++ b/docs/paper/overleaf/fixbib.sty @@ -0,0 +1,138 @@ +%%%% This sty file contains all necessary bibliographic code to +%%%% produce AAAI / AI Magazine author-year style referecnes. +%%%% Stolen from ijcai97.sty by Dan Weld 1/27/99 + +% Lists +\leftmargini 2em +\leftmarginii 2em +\leftmarginiii 1em +\leftmarginiv 0.5em +\leftmarginv 0.5em +\leftmarginvi 0.5em + +\leftmargin\leftmargini +\labelsep 5pt +\labelwidth\leftmargini\advance\labelwidth-\labelsep + +\def\@listI{\leftmargin\leftmargini +\parsep 2pt plus 1pt minus 0.5pt% +\topsep 4pt plus 1pt minus 2pt% +\itemsep 2pt plus 1pt minus 0.5pt% +\partopsep 1pt plus 0.5pt minus 0.5pt} + +\let\@listi\@listI +\@listi + +\def\@listii{\leftmargin\leftmarginii + \labelwidth\leftmarginii\advance\labelwidth-\labelsep + \parsep 1pt plus 0.5pt minus 0.5pt + \topsep 2pt plus 1pt minus 0.5pt + \itemsep \parsep} +\def\@listiii{\leftmargin\leftmarginiii + \labelwidth\leftmarginiii\advance\labelwidth-\labelsep + \parsep 0pt plus 1pt + \partopsep 0.5pt plus 0pt minus 0.5pt + \topsep 1pt plus 0.5pt minus 0.5pt + \itemsep \topsep} +\def\@listiv{\leftmargin\leftmarginiv + \labelwidth\leftmarginiv\advance\labelwidth-\labelsep} +\def\@listv{\leftmargin\leftmarginv + \labelwidth\leftmarginv\advance\labelwidth-\labelsep} +\def\@listvi{\leftmargin\leftmarginvi + \labelwidth\leftmarginvi\advance\labelwidth-\labelsep} + +% We're never going to need a table of contents, so just flush it to +% save space --- suggested by drstrip@sandia-2 +%\def\addcontentsline#1#2#3{} + +%%%% named.sty + +\typeout{Named Citation Style, version of 30 November 1994} + +% This file implements citations for the ``named'' bibliography style. +% Place it in a file called named.sty in the TeX search path. (Placing it +% in the same directory as the LaTeX document should also work.) + +% Prepared by Peter F. Patel-Schneider, with the assistance of several, +% since forgotten, LaTeX hackers. +% This style is NOT guaranteed to work. It is provided in the hope +% that it will make the preparation of papers easier. +% +% There are undoubtably bugs in this style. If you make bug fixes, +% improvements, etc. please let me know. My e-mail address is: +% pfps@research.att.com + +% The preparation of this file was supported by Schlumberger Palo Alto +% Research and AT\&T Bell Laboratories. + +% This file can be modified and used in other conferences as long +% as credit to the authors and supporting agencies is retained, this notice +% is not changed, and further modification or reuse is not restricted. + +% The ``named'' bibliography style creates citations with labels like +% \citeauthoryear{author-info}{year} +% these labels are processed by the following commands: +% \cite{keylist} +% which produces citations with both author and year, +% enclosed in square brackets +% \shortcite{keylist} +% which produces citations with year only, +% enclosed in square brackets +% \citeauthor{key} +% which produces the author information only +% \citeyear{key} +% which produces the year information only + +\def\leftcite{\@up[}\def\rightcite{\@up]} + +\def\cite{\def\citeauthoryear##1##2{\def\@thisauthor{##1}% + \ifx \@lastauthor \@thisauthor \relax \else##1, \fi ##2}\@icite} +\def\shortcite{\def\citeauthoryear##1##2{##2}\@icite} + +\def\citeauthor{\def\citeauthoryear##1##2{##1}\@nbcite} +\def\citeyear{\def\citeauthoryear##1##2{##2}\@nbcite} + +% internal macro for citations with [] and with breaks between citations +% used in \cite and \shortcite +\def\@icite{\leavevmode\def\@citeseppen{-1000}% + \def\@cite##1##2{\leftcite\nobreak\hskip 0in{##1\if@tempswa , ##2\fi}\rightcite}% + \@ifnextchar [{\@tempswatrue\@citex}{\@tempswafalse\@citex[]}} +% internal macro for citations without [] and with no breaks +% used in \citeauthor and \citeyear +\def\@nbcite{\leavevmode\def\@citeseppen{1000}% + \def\@cite##1##2{{##1\if@tempswa , ##2\fi}}% + \@ifnextchar [{\@tempswatrue\@citex}{\@tempswafalse\@citex[]}} + +% don't box citations, separate with ; and a space +% also, make the penalty between citations a parameter, +% it may be a good place to break +\def\@citex[#1]#2{% + \def\@lastauthor{}\def\@citea{}% + \@cite{\@for\@citeb:=#2\do + {\@citea\def\@citea{;\penalty\@citeseppen\ }% + \if@filesw\immediate\write\@auxout{\string\citation{\@citeb}}\fi + \@ifundefined{b@\@citeb}{\def\@thisauthor{}{\bf ?}\@warning + {Citation `\@citeb' on page \thepage \space undefined}}% + {\csname b@\@citeb\endcsname}\let\@lastauthor\@thisauthor}}{#1}} + +% raise the brackets in bibliography labels +\def\@biblabel#1{\def\citeauthoryear##1##2{##1, ##2}\@up{[}#1\@up{]}\hfill} + +\def\@up#1{\leavevmode\raise.2ex\hbox{#1}} + +% Optional changes + +%%%% use parentheses in the reference list and citations +%\def\leftcite{(}\def\rightcite{)} +%\def\@biblabel#1{\def\citeauthoryear##1##2{##1, ##2}(#1)\hfill} + +%%%% no key in the reference list +%\def\@lbibitem[#1]#2{\item\if@filesw +% { \def\protect##1{\string ##1\space}\immediate +% \write\@auxout{\string\bibcite{#2}{#1}}}\fi\ignorespaces} +%\def\thebibliography#1{\section*{References\@mkboth +% {REFERENCES}{REFERENCES}}\list +% {}{\labelwidth 0pt\leftmargin\labelwidth \itemsep 0.5ex} +% \def\newblock{\hskip .11em plus .33em minus .07em} +% \sloppy\clubpenalty4000\widowpenalty4000 +% \sfcode`\.=1000\relax} \ No newline at end of file diff --git a/docs/paper/overleaf/images/DQN_score.png b/docs/paper/overleaf/images/DQN_score.png new file mode 100644 index 0000000..f6a440e Binary files /dev/null and b/docs/paper/overleaf/images/DQN_score.png differ diff --git a/docs/paper/overleaf/images/PPO_score.png b/docs/paper/overleaf/images/PPO_score.png new file mode 100644 index 0000000..a6a0e26 Binary files /dev/null and b/docs/paper/overleaf/images/PPO_score.png differ diff --git a/docs/paper/overleaf/images/flappy-bird.jpeg b/docs/paper/overleaf/images/flappy-bird.jpeg new file mode 100644 index 0000000..f43c153 Binary files /dev/null and b/docs/paper/overleaf/images/flappy-bird.jpeg differ diff --git a/docs/paper/overleaf/main.tex b/docs/paper/overleaf/main.tex new file mode 100644 index 0000000..164d4ef --- /dev/null +++ b/docs/paper/overleaf/main.tex @@ -0,0 +1,260 @@ +%File: formatting-instruction.tex +\documentclass[letterpaper]{article} +\usepackage{aaai} +\usepackage{times} +\usepackage{helvet} +\usepackage{courier} +\usepackage{graphicx} +\usepackage{url} +\usepackage{algorithm} +\usepackage{algpseudocode} + +\frenchspacing +\setlength{\pdfpagewidth}{8.5in} +\setlength{\pdfpageheight}{11in} +\pdfinfo{ +/Title (Implementation of RL Algorithms in OpenAI Gym) +/Author (Douglas Trajano, Sirleno Vidaletti)} +\setcounter{secnumdepth}{0} + + \begin{document} + +\title{Playing Flappy Bird with Reinforcement Learning} +\author{Douglas Trajano\\ +Pontifical Catholic University of Rio Grande do Sul - PUCRS\\ +School of Technology. Porto Alegre, Brazil\\ +douglas.trajano@edu.pucrs.br +} +\maketitle +\begin{abstract} +\begin{quote} +Flappy Bird is an electronic game created in 2013. The objective in the game is to earn as many points as possible by controlling a bird, without letting it crash into the pipes. The Flappy Bird environment uses OpenAI Gym API. In this work, we will implement reinforcement learning algorithms such as Deep Q-Network (DQN) and Proximal Policy Gradient (PPO) that will be used to train the agent to play the game. +\end{quote} +\end{abstract} + +\section{Introduction} + +The Flappy Bird was released in May 2013, but it received a sudden rise in popularity in early 2014 becoming a viral hit. + +The game was developed by Vietnamese programmer Dong Nguyen. It has simple gameplay, the player controls a bird, attempting to fly between green pipes without hitting them. The Flappy Bird received poor reviews from some critics, who criticized its high level of difficulty and alleged plagiarism in graphics and game mechanics, while other reviewers found it addictive. + +Flappy Bird was removed from both the App Store and Google Play by its creator on February 10, 2014. He claims that he felt guilt over what he considered to be its addictive nature and overuse. + +\begin{figure}[ht] + \centering + \includegraphics[width=8cm]{images/flappy-bird.jpeg} + \caption{Flappy Bird Game} + \label{fig:flappy-bird-game} +\end{figure} + +Learning to control agents directly from high-dimensional sensory inputs like vision and speech is +one of the long-standing challenges of reinforcement learning (RL). The basic idea behind many reinforcement learning algorithms is to estimate the action-value function. The goal of the agent (powered by a reinforcement learning algorithm) is to interact with the emulator by selecting actions in a way that maximizes future rewards.\cite{mnih2013playing} + +We will develop some reinforcement learning algorithms that will be used to train our agents in the OpenAI Gym environment. OpenAI Gym is a toolkit for reinforcement learning research. It includes a growing collection of benchmark problems that expose a common interface.\cite{brockman2016openai} + +\section{Approach} + +Our technical approach consists of two parts. The first part is the definition of the environment. The environment that will be explored in this project is provided by OpenAI Gym. The second part is the reinforcement learning algorithm, which will be implemented by ourselves. + +\subsection{Environment} +The Flappy Bird environment was developed by Gabriel Nogueira and is publicly available on GitHub~\footnote{\url{https://github.com/Talendar/flappy-bird-gym}}, it also can be installed using PyPI~\footnote{\url{https://pypi.org/project/flappy-bird-gym/}}. It was developed in Python and uses the OpenAI Gym API. + +The state observation is composed of the horizontal distance to the next pipe and the difference between the player's y position and the next hole's y position. + +Two actions are available: do nothing and jump. + +\subsection{RL Algorithms} + +The algorithms are responsible for learning the policy to solve the problem, it will be used to take actions in the environment. The reinforcement learning (RL) algorithms were developed in Python, we developed a base class with a random policy, it also provides the API to implement the RL algorithms. The RL algorithms are: + +\subsubsection{Deep Q-Network (DQN)} combines Q-Learning with deep neural networks to let RL work for complex, high-dimensional environments, like video games, or robotics. A critical component of DQN-style algorithms is the memory buffer known as experience replay, it holds the most recent transitions collected by the policy.\cite{fedus2020revisiting} + +Two different approaches of the memory buffer will be developed and tested. + +\begin{itemize} + \item \textbf{Experience Replay (ER)}: The most basic sampling strategy, it uses uniform sampling, whereby each transition in the buffer is sampled with equal probability.\cite{fedus2020revisiting} + \item \textbf{Prioritized Experience Replay (PER)}: Extends experience replay function by learning to replay memories where the real reward significantly diverges from the expected reward, letting the agent adjust itself in response to developing incorrect assumptions.\cite{schaul2015prioritized} +\end{itemize} + +\subsubsection{Proximal Policy Optimization (PPO)} is a policy gradient method that trains a stochastic policy in an on-policy way. Also, it utilizes the actor-critic method. The actor maps the observation to action and the critic gives an expectation of the rewards of the agent for the observation given. Firstly, it collects a set of trajectories for each epoch by sampling from the latest version of the stochastic policy. Then, the rewards-to-go and the advantage estimates are computed to update the policy and fit the value function. The policy is updated via a stochastic gradient ascent optimizer, while the value function is fitted via some gradient descent algorithm. This procedure is applied for many epochs until the environment is solved. \cite{schulman2017proximal} + +\section{Implementation} + +% What it does, what language or system it’s written in, etc. +% Use figures or screen dumps if appropriate + +We developed the reinforcement learning algorithms in Python using PyTorch and Tensorflow. The source code of this project is available on GitHub~\footnote{\url{https://github.com/DougTrajano/drl-flappy-bird}}. The code is divided into two parts: + +\subsection{Trainer} + +The trainer is responsible for training the agents. In the training process, the environment provides observations and the agent takes actions for each observation. The Flappy Bird environment provides a reward 1 for each time step, except when the bird crashes the ground or the pipe. + +\subsection{Agents} + +The agents are responsible for choosing actions for a given observation. We developed a base agent that provides the basic functions that agents should use to interact with the environment, for example, the act function receives the observation state and returns the selected action, in the base agent, the act function provides random actions based on the number of available actions. Both algorithms that we developed extend the base agent. The DQN Agent uses the experience replay (ER) or prioritized experience replay (PER), also a neural network is used to approximate the Q-function, our implementation of DQN also has an epsilon-greedy policy to handle the trade-off between exploration and exploitation. Our PPO Agent uses a Proximal Policy Optimization (PPO), a policy gradient method that uses the actor-critic method to train the policy. PPO Agent also has a buffer that uses Generalized Advantage Estimation (GAE-Lambda). We developed DQN algorithm using TensorFlow and PPO algorithm using Pytorch. + +\section{Related work} + +In \cite{mnih2013playing} the researchers developed the Deep Q-Network (DQN) algorithm. In this study, the agent is trained from the screen images of Atari games and it produced results far above human results. \cite{stanford2016alp} trained DQN to play Flappy Bird, the average score for the algorithm is 3.3, and the human score is 4.25. In \cite{alp2019playing} two algorithms were trained: Deep Q-Network (DQN) and Asynchronous Advantage Actor-Critic (A3C), and the results said that the A3C resulted in much faster training because it uses its own reward function. The \cite{vu2020flapai} showed that the SARSA and Q-Learning algorithms can be used to learn the Flappy Bird game. New algorithms of policy gradient methods were introduced in \cite{schulman2017proximal}, the most relevant method is proximal policy optimization (PPO) that we will implement and test in this work. + +\section{Experiments} + +In this experiment, we want to see if one of the two reinforcement learning algorithms that we developed can learn to play the Flappy Bird game properly. + +\subsection{Trainer hyperparameters} + +The following hyperparameters are used for training all the agents. + +\begin{itemize} + \item Number of episodes (n\_episodes): 60,000 + \item Early stopping (early\_stop): 120 + \item Maximum number of time steps per episode (max\_timestep): None +\end{itemize} + +\subsection{DQN Agent} + +The following hyperparameters are used in the DQN agent: + +\begin{itemize} + \item Dimension of each observation (state\_size): 2 + \item Quantity of available actions (action\_size): 2 + \item Random seed (seed): 1993 + \item Hidden units in the network (nb\_hidden): (64, 64) + \item Learning rate for the optimizer (learning\_rate): 0.0005 + \item Size of the memory (memory\_size): 100,000 + \item Prioritized replay memory (prioritized\_memory): False + \item Size of the batch to train the network (batch\_size): 64 + \item Discount factor (gamma): 0.99 + \item Interpolation parameter for target network (tau): 0.001 + \item Small value used in the priority update (small\_eps): 0.03 + \item Number of steps before updating the target network (update\_every): 4 + \item Use epsilon-greedy action selection (epsilon\_enabled): True + \item Starting value of epsilon, for epsilon-greedy action selection (epsilon\_start): 1.0 + \item Minimum value of epsilon (epsilon\_end): 0.01 + \item Decay rate for epsilon (epsilon\_decay): 0.99995 +\end{itemize} + +The training session was not stopped by the early stopping policy, and the agent was trained for 60,000 episodes. Figure 2 shows the training process of the DQN agent. We can see that in 40,000 episodes, the agent achieved the best score (116.23) of the training session, but it isn't enough to stop the training. + +\begin{figure}[ht] + \centering + \includegraphics[width=8cm]{images/DQN_score.png} + \caption{DQN - Average Scores by episodes} + \label{fig:DQN-avg-scores} +\end{figure} + +The DQN Agent (with 60,000 episodes) was tested in 10 trials as described in Table 1. The \textbf{Trial} column represents the number of the test, the \textbf{Env score} column represents the scores provided by the OpenAI Gym environment, and the \textbf{Skipped pipes} column represents the number of pipes skipped by the bird. + +\begin{table}[h] + \centering + \begin{tabular}{|c||c|c|} + \hline + Trial & Env score & Skipped pipes \\ + \hline + 1 & 150 & 2 \\ + \hline + 2 & 114 & 1 \\ + \hline + 3 & 137 & 1 \\ + \hline + 4 & 137 & 1 \\ + \hline + 5 & 119 & 1 \\ + \hline + 6 & 121 & 1 \\ + \hline + 7 & 120 & 1 \\ + \hline + 8 & 109 & 0 \\ + \hline + 9 & 151 & 2 \\ + \hline + 10 & 114 & 1 \\ + \hline + \end{tabular} + \caption{DQN - Results table} + \label{tab:dqn_table} +\end{table} + +It seems that the DQN Agent learned a little bit as in the best case, the bird was able to skip only 2 pipes. The average score was 127.2, and the average number of pipes skipped was 1.1. + +We developed the Prioritized Experience Replay (PER) to be used with Deep Q-Network (DQN) algorithm, but it takes a lot of time to train and it is not used in the paper. + +\subsection{PPO Agent} + +The following hyperparameters are used in the PPO agent: + +\begin{itemize} + \item Dimension of each observation (state\_size): 2 + \item Quantity of available actions (action\_size): 2 + \item Random seed (seed): 1993 + \item Size of the memory (memory\_size): 100,000 + \item Hidden units in the network (nb\_hidden): (64, 64) + \item Discount factor (gamma): 0.99 + \item Lambda for GAE-Lambda (lam): 0.97 + \item KL divergence between target and current policy (target\_kl): 0.01 + \item Learning rate for the policy optimizer (policy\_lr): 0.0003 + \item Learning rate for the value function optimizer (value\_lr): 0.001 + \item Number of iterations to train the policy (train\_policy\_iters): 10 + \item Number of iterations to train the value function (train\_value\_iters): 10 + \item Clipping ratio for the policy objective (clip\_ratio): 0.2 + \item Use epsilon-greedy action selection (epsilon\_enabled): True + \item Starting value of epsilon, for epsilon-greedy action selection (epsilon\_start): 1.0 + \item Minimum value of epsilon (epsilon\_end): 0.01 + \item Decay rate for epsilon (epsilon\_decay): 0.995 +\end{itemize} + +The training session was stopped when the average score of the last 100 episodes was higher than 120 in 36,029. Figure 2 shows the training process of the PPO agent. + +\begin{figure}[ht] + \centering + \includegraphics[width=8cm]{images/PPO_score.png} + \caption{PPO - Average Scores by episodes} + \label{fig:PPO-avg-scores} +\end{figure} + +We can see that nearly to 35,000 episodes, the agent started to learn more quickly and finished the training achieving the stop condition (median of the last 100 scores higher than 120). + +The PPO Agent (with 36,029 episodes trained) was tested in 10 trials as described in Table 2. The \textbf{Trial} column represents the number of the test, the \textbf{Env score} column represents the scores provided by the OpenAI Gym environment, and the \textbf{Skipped pipes} column represents the number of pipes skipped by the bird. + +\begin{table}[h] + \centering + \begin{tabular}{|c||c|c|} + \hline + Trial & Env score & Skipped pipes \\ + \hline + 1 & 840 & 20 \\ + \hline + 2 & 692 & 16 \\ + \hline + 3 & 174 & 2 \\ + \hline + 4 & 527 & 12 \\ + \hline + 5 & 342 & 7 \\ + \hline + 6 & 506 & 11 \\ + \hline + 7 & 770 & 18 \\ + \hline + 8 & 897 & 21 \\ + \hline + 9 & 367 & 7 \\ + \hline + 10 & 1062 & 26 \\ + \hline + \end{tabular} + \caption{PPO - Results table} + \label{tab:ppo_table} +\end{table} + +The PPO Agent performs better than the DQN Agent, in the best case, the bird was able to skip 26 pipes. The average score was 511.5, and the average number of pipes skipped was 14. + +\section{Conclusion} + +Deep Reinforcement Learning (DRL) is a powerful tool for solving complex, high-dimensional environments. It can be used for video games, robotics, and many other tasks. In this paper, we developed two DRL algorithms: Deep Q-Network (DQN) and Proximal Policy Optimization (PPO). We tested the algorithms on the Flappy Bird game and showed that the PPO algorithm can learn to play the game properly. For future work, we plan to add hyperparameter tuning and other algorithms to the paper. We also want to run DQN with Prioritized Experience Replay (PER) with more time, and compare the results. We also can change the environment to use the screen of the game as the observation space. + +\bibliographystyle{aaai} +\bibliography{references.bib} + +\end{document} \ No newline at end of file diff --git a/docs/paper/overleaf/references.bib b/docs/paper/overleaf/references.bib new file mode 100644 index 0000000..42e0262 --- /dev/null +++ b/docs/paper/overleaf/references.bib @@ -0,0 +1,76 @@ +@article{buffet2007shaping, + title={Shaping multi-agent systems with gradient reinforcement learning}, + author={Buffet, Olivier and Dutech, Alain and Charpillet, Fran{\c{c}}ois}, + journal={Autonomous Agents and Multi-Agent Systems}, + volume={15}, + number={2}, + pages={197--220}, + year={2007}, + publisher={Springer} +} + +@inproceedings{fedus2020revisiting, + title={Revisiting fundamentals of experience replay}, + author={Fedus, William and Ramachandran, Prajit and Agarwal, Rishabh and Bengio, Yoshua and Larochelle, Hugo and Rowland, Mark and Dabney, Will}, + booktitle={International Conference on Machine Learning}, + pages={3061--3071}, + year={2020}, + organization={PMLR} +} + +@article{schaul2015prioritized, + title={Prioritized experience replay}, + author={Schaul, Tom and Quan, John and Antonoglou, Ioannis and Silver, David}, + journal={arXiv preprint arXiv:1511.05952}, + year={2015} +} + +@article{mnih2013playing, + title={Playing atari with deep reinforcement learning}, + author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Graves, Alex and Antonoglou, Ioannis and Wierstra, Daan and Riedmiller, Martin}, + journal={arXiv preprint arXiv:1312.5602}, + year={2013} +} + +@misc{brockman2016openai, + Author = {Greg Brockman and Vicki Cheung and Ludwig Pettersson and Jonas Schneider and John Schulman and Jie Tang and Wojciech Zaremba}, + Title = {OpenAI Gym}, + Year = {2016}, + Eprint = {arXiv:1606.01540}, +} + +@book{sutton2018reinforcement, + title={Reinforcement learning: An introduction}, + author={Sutton, Richard S and Barto, Andrew G}, + year={2018}, + publisher={MIT press} +} + +@article{schulman2017proximal, + title={Proximal policy optimization algorithms}, + author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg}, + journal={arXiv preprint arXiv:1707.06347}, + year={2017} +} + +@article{stanford2016alp, + title={Playing Flappy Bird via Asynchronous Advantage Actor Critic Algorithm}, + author={Alp, Elit Cenk and Guzel, Mehmet}, + year={2019}, + month={07}, + doi = {10.13140/RG.2.2.13159.96165} +} + +@article{alp2019playing, + title={Playing Flappy Bird via Asynchronous Advantage Actor Critic Algorithm}, + author={Alp, Elit Cenk and Guzel, Mehmet Serdar}, + journal={arXiv preprint arXiv:1907.03098}, + year={2019} +} + +@article{vu2020flapai, + title={FlapAI Bird: Training an Agent to Play Flappy Bird Using Reinforcement Learning Techniques}, + author={Vu, Tai and Tran, Leon}, + journal={arXiv preprint arXiv:2003.09579}, + year={2020} +} \ No newline at end of file diff --git a/docs/paper/paper.pdf b/docs/paper/paper.pdf new file mode 100644 index 0000000..e49deaa Binary files /dev/null and b/docs/paper/paper.pdf differ