-
Notifications
You must be signed in to change notification settings - Fork 4
/
Datalogy.bib
574 lines (531 loc) · 40.7 KB
/
Datalogy.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
@online{naur_peter_nodate,
title = {Peter Naur: Concise Survey of Computer Methods, 397 p},
url = {http://www.naur.com/Conc.Surv.html},
author = {Naur, Peter},
urldate = {2024-06-19},
file = {Peter Naur\: Concise Survey of Computer Methods, 397 p:/Users/lpa2a/Zotero/storage/C2J6H7SY/Conc.Surv.html:text/html},
}
@article{fayyad_toward_2020,
title = {Toward Foundations for Data Science and Analytics: A Knowledge Framework for Professional Standards},
volume = {2},
issn = {2644-2353, 2688-8513},
url = {https://hdsr.mitpress.mit.edu/pub/6wx0qmkl/release/4},
doi = {10.1162/99608f92.1a99e67a},
shorttitle = {Toward Foundations for Data Science and Analytics},
abstract = {As the industry is racing to harness the power of data, demand for data science professionals is growing at an increasing rate. However, almost every organization has a unique way of defining roles in data science and associated skills and knowledge. This has resulted in a confusing industry landscape for employers, academic and training institutions, and existing and aspiring data science professionals. This article is the first in a series authored by Initiative for Analytics and Data Science Standards ({IADSS}). We review the history of data science, which we trace back to 1974, and the emergence of data science as a profession in the industry, followed by a classification of knowledge and skills commonly associated with data science professionals, pointing to a lack of detailed and consistent treatment of the topic. We then present a Data Science Knowledge Framework, that we believe can support industry standardization and building measurement and assessment methodologies for data science professionals.},
number = {2},
journaltitle = {Harvard Data Science Review},
author = {Fayyad, Usama and Hamutcu, Hamit},
urldate = {2024-06-19},
date = {2020-04-30},
langid = {english},
file = {Full Text PDF:/Users/lpa2a/Zotero/storage/UFZZQRXA/Fayyad and Hamutcu - 2020 - Toward Foundations for Data Science and Analytics.pdf:application/pdf},
}
@article{naur_science_1966,
title = {The science of datalogy},
volume = {9},
issn = {0001-0782},
url = {https://dl.acm.org/doi/10.1145/365719.366510},
doi = {10.1145/365719.366510},
pages = {485},
number = {7},
journaltitle = {Communications of the {ACM}},
shortjournal = {Commun. {ACM}},
author = {Naur, Peter},
urldate = {2024-06-19},
date = {1966-07-01},
file = {Full Text PDF:/Users/lpa2a/Zotero/storage/XRFQISJP/Naur - 1966 - The science of datalogy.pdf:application/pdf},
}
@online{noauthor_readme_nodate,
title = {{README}},
url = {https://docs.google.com/document/u/0/d/1gLqFXey6x_-CDZWwpWNs2MAy1GlJtSguuoxy3vo659A/edit?usp=embed_facebook},
titleaddon = {Google Docs},
urldate = {2024-06-18},
langid = {english},
file = {Snapshot:/Users/lpa2a/Zotero/storage/ELLZSQEA/edit.html:text/html},
}
@article{wiener_ergodic_1939,
title = {The ergodic theorem},
volume = {5},
issn = {0012-7094, 1547-7398},
url = {https://projecteuclid.org/journals/duke-mathematical-journal/volume-5/issue-1/The-ergodic-theorem/10.1215/S0012-7094-39-00501-6.full},
doi = {10.1215/S0012-7094-39-00501-6},
abstract = {Duke Mathematical Journal},
pages = {1--18},
number = {1},
journaltitle = {Duke Mathematical Journal},
author = {Wiener, Norbert},
urldate = {2024-06-18},
date = {1939-03},
note = {Publisher: Duke University Press},
}
@book{shannon_mathematical_1949,
location = {Champaign, {IL}, {US}},
title = {The mathematical theory of communication},
series = {The mathematical theory of communication},
abstract = {In the second part of this volume Weaver suggests that there are 3 levels of problem in general communication. The first is technical, i.e., "How accurately can the symbols of communication be transmitted?" Second, the semantic problem, i.e., "How precisely do the transmitted symbols convey the desired meaning?" Third, the effectiveness problem, i.e., "How effectively does the received meaning affect conduct in the desired way?" The first level is essentially an engineering one and in the first part of the book Shannon develops a mathematical theory of communication. Weaver discusses the significance of this theory to the other two levels. ({PsycINFO} Database Record (c) 2016 {APA}, all rights reserved)},
pagetotal = {vi, 117},
publisher = {University of Illinois Press},
author = {Shannon, Claude E. and Weaver, Warren},
date = {1949},
note = {Pages: vi, 117},
file = {Snapshot:/Users/lpa2a/Zotero/storage/H4LHAHJT/1950-04584-000.html:text/html},
}
@online{crawford_connections_nodate,
title = {On the Connections between Data and Things in the Real World (p51-57)},
url = {https://files.eric.ed.gov/fulltext/ED093370.pdf},
author = {Crawford, Perry},
urldate = {2024-06-19},
file = {Management of data elements in information processing:/Users/lpa2a/Zotero/storage/RHNPTEE3/ED093370.pdf:application/pdf},
}
@book{gleick_information_2011,
location = {New York},
title = {The Information: A History, a Theory, a Flood},
isbn = {978-0-375-42372-7},
shorttitle = {The Information},
publisher = {Pantheon Books},
author = {Gleick, James},
date = {2011},
keywords = {Information science -- History, Information society},
}
@incollection{jaeger_wheelers_2023,
location = {Cham},
title = {On Wheeler’s Quantum Circuit},
isbn = {978-3-031-12986-5},
url = {https://doi.org/10.1007/978-3-031-12986-5_2},
abstract = {The Meaning Circuit Hypothesis ({MCH}) is a synthesis of ideas providing John Wheeler’s outline of ultimate physics, which he fine-tuned over several decades from the 1970s onward. It is a ‘working hypothesis’ in which ‘existence is a ‘meaning circuit”’ that portrays the world as a “system self-synthesized by quantum networking.” It was strongly advocated by him for roughly two decades and since then has had an increasingly strong impact on the approach of many investigators of quantum theory [1–3]; in particular, elements such as the quantum participator and ‘it from bit’ are now considered by others as candidate components of a foundation for quantum theory in which {informationInformation} is involved essentially; cf., e.g., [4–6]. Therefore, it is worthy of review and critique.},
pages = {25--59},
booktitle = {The Quantum-Like Revolution: A Festschrift for Andrei Khrennikov},
publisher = {Springer International Publishing},
author = {Jaeger, Gregg},
editor = {Plotnitsky, Arkady and Haven, Emmanuel},
urldate = {2024-06-19},
date = {2023},
langid = {english},
doi = {10.1007/978-3-031-12986-5_2},
file = {Full Text PDF:/Users/lpa2a/Zotero/storage/4GXCHFNA/Jaeger - 2023 - On Wheeler’s Quantum Circuit.pdf:application/pdf},
}
@book{zurek_complexity_1990,
location = {Redwood City, Calif},
title = {Complexity, Entropy, and the Physics of Information: Proceedings of the 1988 Workshop on Complexity, Entropy, and the Physics of Information Held May-June, 1989 in Sante Fe, New Mexico},
isbn = {978-0-201-51509-1 978-0-201-51506-0},
series = {Proceedings volume in the Santa Fe Institute studies in the sciences of complexity},
shorttitle = {Complexity, Entropy, and the Physics of Information},
publisher = {Addison-Wesley, The Advanced Book Program},
author = {Zurek, Wojciech Hubert},
date = {1990},
keywords = {Computational complexity -- Congresses, Entropy -- Congresses, Physical measurements -- Congresses, Quantum theory -- Congresses},
}
@article{bosak_information_1962,
title = {An information algebra: phase 1 report—language structure group of the {CODASYL} development committee},
volume = {5},
issn = {0001-0782},
url = {https://dl.acm.org/doi/10.1145/366920.366935},
doi = {10.1145/366920.366935},
shorttitle = {An information algebra},
pages = {190--204},
number = {4},
journaltitle = {Communications of the {ACM}},
shortjournal = {Commun. {ACM}},
author = {Bosak, Robert and Clippinger, Richard F. and Dobbs, Carey and Goldfinger, Roy and Jasper, Renee B. and Keating, William and Kendrick, George and Sammet, Jean E.},
urldate = {2024-06-19},
date = {1962-04-01},
file = {Full Text PDF:/Users/lpa2a/Zotero/storage/PG88PAMP/Bosak et al. - 1962 - An information algebra phase 1 report—language st.pdf:application/pdf},
}
@book{langefors_essays_1995,
title = {Essays on infology : summing up and planning for the future},
isbn = {978-91-44-61301-7},
shorttitle = {Essays on infology},
pagetotal = {180},
publisher = {Studentlitteratur},
author = {Langefors, Börje},
date = {1995},
langid = {english},
note = {Google-Books-{ID}: {RGQ}4HQAACAAJ},
}
@book{langefors_theoretical_1973,
location = {[Lund] Sweden, Philadelphia},
title = {Theoretical Analysis of Information Systems},
isbn = {978-0-87769-151-8},
publisher = {Studentlitteratur, Auerbach},
author = {Langefors, Börje},
date = {1973},
keywords = {Electronic data processing, Management information systems},
}
@online{oed_information_nodate,
title = {information, n. meanings, etymology and more {\textbar} Oxford English Dictionary},
url = {https://www.oed.com/dictionary/information_n},
abstract = {information, n. meanings, etymology, pronunciation and more in the Oxford English Dictionary},
author = {{OED}},
urldate = {2024-06-20},
langid = {english},
file = {Snapshot:/Users/lpa2a/Zotero/storage/9HQTUBXJ/information_n.html:text/html},
}
@article{fisher_theory_1925,
title = {Theory of Statistical Estimation},
volume = {22},
issn = {1469-8064, 0305-0041},
url = {https://www.cambridge.org/core/journals/mathematical-proceedings-of-the-cambridge-philosophical-society/article/abs/theory-of-statistical-estimation/7A05FB68C83B36C0E91D42C76AB177D4},
doi = {10.1017/S0305004100009580},
abstract = {It has been pointed out to me that some of the statistical ideas employed in the following investigation have never received a strictly logical definition and analysis. The idea of a frequency curve, for example, evidently implies an infinite hypothetical population distributed in a definite manner; but equally evidently the idea of an infinite hypothetical population requires a more precise logical specification than is contained in that phrase. The same may be said of the intimately connected idea of random sampling. These ideas have grown up in the minds of practical statisticians and lie at the basis especially of recent work; there can be no question of their pragmatic value. It was no part of my original intention to deal with the logical bases of these ideas, but some comments which Dr Burnside has kindly made have convinced me that it may be desirable to set out for criticism the manner in which I believe the logical foundations of these ideas may be established.},
pages = {700--725},
number = {5},
journaltitle = {Mathematical Proceedings of the Cambridge Philosophical Society},
author = {Fisher, R. A.},
urldate = {2024-06-20},
date = {1925-07},
langid = {english},
file = {Submitted Version:/Users/lpa2a/Zotero/storage/LPRLAUR6/Fisher - 1925 - Theory of Statistical Estimation.pdf:application/pdf},
}
@article{fisher_mathematical_1921,
title = {On the mathematical foundations of theoretical statistics},
volume = {222},
url = {https://royalsocietypublishing.org/doi/10.1098/rsta.1922.0009},
doi = {10.1098/rsta.1922.0009},
abstract = {Several reasons have contributed to the prolonged neglect into which the study of statistics, in its theoretical aspects, has fallen. In spite of the immense amount of fruitful labour which has been expended in its practical applications, the basic principles of this organ of science are still in a state of obscurity, and it cannot be denied that, during the recent rapid development of practical methods, fundamental problems have been ignored and fundamental paradoxes left unresolved. This anomalous state of statistical science is strikingly exemplified by a recent paper entitled "The Fundamental Problem of Practical Statistics," in which one of the most eminent of modern statisticians presents what purports to be a general proof of {BAYES}' postulate, a proof which, in the opinion of a second statistician of equal eminence, "seems to rest upon a very peculiar -- not to say hardly supposable -- relation."},
pages = {309--368},
number = {594},
journaltitle = {Philosophical Transactions of the Royal Society of London. Series A, Containing Papers of a Mathematical or Physical Character},
author = {Fisher, R. A.},
urldate = {2024-06-20},
date = {1921-11-17},
note = {Publisher: Royal Society},
file = {Full Text PDF:/Users/lpa2a/Zotero/storage/BM2JRCSN/Fisher and Russell - 1997 - On the mathematical foundations of theoretical sta.pdf:application/pdf},
}
@book{wiener_cybernetics_1948,
location = {Cambridge, Mass},
title = {Cybernetics: Or, Control and Communication in the Animal and the Machine},
url = {https://hdl.handle.net/2027/mdp.39015030185493},
shorttitle = {Cybernetics},
publisher = {Technology press},
author = {Wiener, Norbert},
urldate = {2024-06-20},
date = {1948},
keywords = {Mathematical statistics, Servomechanisms},
}
@article{hartley_transmission_1928,
title = {Transmission of Information},
volume = {7},
rights = {© 1928 The Bell System Technical Journal},
issn = {1538-7305},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/j.1538-7305.1928.tb01236.x},
doi = {10.1002/j.1538-7305.1928.tb01236.x},
abstract = {Synopsis: A quantitative measure of “information” is developed which is based on physical as contrasted with psychological considerations. How the rate of transmission of this information over a system is limited by the distortion resulting from storage of energy is discussed from the transient viewpoint. The relation between the transient and steady state viewpoints is reviewed. It is shown that when the storage of energy is used to restrict the steady state transmission to a limited range of frequencies the amount of information that can be transmitted is proportional to the product of the width of the frequency-range by the time it is available. Several illustrations of the application of this principle to practical systems are included. In the case of picture transmission and television the spacial variation of intensity is analyzed by a steady state method analogous to that commonly used for variations with time.},
pages = {535--563},
number = {3},
journaltitle = {Bell System Technical Journal},
author = {Hartley, R. V. L.},
urldate = {2024-06-20},
date = {1928},
langid = {english},
note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/j.1538-7305.1928.tb01236.x},
file = {Snapshot:/Users/lpa2a/Zotero/storage/CSAP2HGG/j.1538-7305.1928.tb01236.html:text/html},
}
@book{abramson_information_1963,
title = {Information theory and coding. --},
url = {http://archive.org/details/informationtheor0000abra},
abstract = {xvi, 201 p. :; Bibliography: p. [191]-195},
pagetotal = {234},
publisher = {New York : {McGraw}-Hill},
author = {Abramson, Norman},
editora = {{Internet Archive}},
editoratype = {collaborator},
urldate = {2024-06-20},
date = {1963},
keywords = {Information theory},
}
@article{shannon_mathematical_1948,
title = {A mathematical theory of communication},
volume = {27},
issn = {0005-8580},
url = {https://ieeexplore.ieee.org/document/6773024},
doi = {10.1002/j.1538-7305.1948.tb01338.x},
abstract = {The recent development of various methods of modulation such as {PCM} and {PPM} which exchange bandwidth for signal-to-noise ratio has intensified the interest in a general theory of communication. A basis for such a theory is contained in the important papers of Nyquist1 and Hartley2 on this subject. In the present paper we will extend the theory to include a number of new factors, in particular the effect of noise in the channel, and the savings possible due to the statistical structure of the original message and due to the nature of the final destination of the information.},
pages = {379--423},
number = {3},
journaltitle = {The Bell System Technical Journal},
author = {Shannon, C. E.},
urldate = {2024-06-20},
date = {1948-07},
note = {Conference Name: The Bell System Technical Journal},
file = {IEEE Xplore Abstract Record:/Users/lpa2a/Zotero/storage/W852YK5V/6773024.html:text/html},
}
@book{mukherjee_gene_2016,
location = {New York},
title = {The Gene: An Intimate History},
isbn = {978-1-4767-3350-0 978-1-4767-3352-4 978-1-4767-3353-1},
url = {http://www.simonandschuster.com},
shorttitle = {The Gene},
abstract = {"The story of the gene begins in earnest in an obscure Augustinian abbey in Moravia in 1856 where Gregor Mendel, a monk working with pea plants, stumbles on the idea of a "unit of heredity." It intersects with Darwin's theory of evolution, and collides with the horrors of Nazi eugenics in the 1940s. The gene transforms postwar biology. It invades discourses concerning race and identity and provides startling answers to some of the most potent questions coursing through our political and cultural realms. It reorganizes our understanding of sexuality, gender identity, sexual orientation, temperament, choice, and free will, thus raising the most urgent questions affecting our personal realms. Above all, the story of the gene is driven by human ingenuity and obsessive minds -- from Gregor Mendel and Charles Darwin to Francis Crick, James Watson, and Rosalind Franklin to the thousands of scientists working today to understand the code of codes. Author of the Pulitzer Prize-winning bestseller The Emperor of All Maladies, Mukherjee draws on his scientific knowledge and research to describe the magisterial history of a scientific idea. Woven through The Gene is the story of Mukherjee's own family and its recurring pattern of schizophrenia, a haunting reminder that the science of genetics is not confined to the laboratory but is vitally relevant to everyday lives. The moral complexity of genetics reverberates even more urgently today as we learn to "read" and "write" the human genome--unleashing the potential to change the fates and identities of our children and our children's children"--Jacket., "Magnificent, beautifully written, and riveting, Siddhartha Mukherjee's The Gene: An Intimate History illuminates the quest to decipher the master-code of instructions that makes and defines humans; that governs our form, function, and fate; and that determines the future of our children"--Jacket.},
publisher = {Scribner},
author = {Mukherjee, Siddhartha},
urldate = {2024-06-20},
date = {2016},
keywords = {Buck, Carrie -- 1906-1983, Families -- Health and hygiene, Genes, Genetics, Genetics -- history, Genetics -- History, Heredity, History, Medical ethics, Medical ethics -- History, Mukherjee, Siddhartha -- Family -- Health},
}
@online{langefors_information_nodate,
title = {Information and Management Systems},
url = {https://tidsskrift.dk/ledelseogerhvervsoekonomi/article/download/35141/35731?inline=1},
abstract = {Ledelse og Erhvervsøkonomi/Handelsvidenskabeligt Tidsskrift/Erhvervsøkonomisk Tidsskrift, Bind 50 (1986) 2},
author = {Langefors},
urldate = {2024-06-21},
file = {Information and Management Systems:/Users/lpa2a/Zotero/storage/D6KWDB3N/35731.html:text/html},
}
@book{zetterlund_infology_nodate,
title = {Infology: a study to identify possible elements of infology in some system development methodologies},
url = {https://www.diva-portal.org/smash/get/diva2:2773/FULLTEXT02.pdf},
author = {Zetterlund},
}
@online{tukey_first_nodate,
title = {First Use of 'bit', Bell Labs Memo 1947},
url = {https://www.amphilsoc.org/item-detail/john-tukey},
abstract = {One of the most influential statisticians of the 20th century, John Tukey spent much of his career working on computer technology. He coined the terms “software” and “hardware.” Tukey is also credited with the first use of the word “bit” (a combination of the words “binary” and “digit”) to describe a unit of information in computing.},
author = {Tukey, John},
urldate = {2024-06-25},
langid = {english},
file = {Snapshot:/Users/lpa2a/Zotero/storage/5RMJW8SZ/john-tukey.html:text/html},
}
@article{oettinger_language_1968,
title = {Language and Information},
volume = {19},
rights = {Copyright Wiley Periodicals Inc. Jul 1968},
issn = {0096946X},
url = {https://www.proquest.com/docview/195442499/abstract/C8C17DE7CE6E4736PQ/1},
abstract = {The more time I spend on the study of language the more puzzled I get because the last two decades, which have witnessed linguistic research of unprecedented intensity, have mainly revealed just how miraculously the process of human communication performs its everyday wonders. So simple that every child masters it almost unconsciously, the process is so complex that all our efforts to understand it seem to yield only a fragmentary...},
pages = {295},
number = {3},
journaltitle = {American Documentation (pre-1986)},
author = {Oettinger, A. G.},
urldate = {2024-06-25},
date = {1968-07},
note = {Num Pages: 4
Place: Washington, United States
Publisher: Wiley Periodicals Inc.},
keywords = {Communication, Information, Language, Linguistics},
file = {Full Text PDF:/Users/lpa2a/Zotero/storage/UQIHSC8Y/Oettinger - 1968 - Language and Information.pdf:application/pdf},
}
@online{kettinger_infological_nodate,
title = {The infological equation extended: towards conceptual clarity in the relationship between data, information and knowledge - {ProQuest}},
url = {https://www.proquest.com/docview/742649696/fulltextPDF/4A8A0ADF17484AAEPQ/3?accountid=14678&sourcetype=Scholarly%20Journals},
shorttitle = {The infological equation extended},
abstract = {Explore millions of resources from scholarly journals, books, newspapers, videos and more, on the {ProQuest} Platform.},
author = {Kettinger, William and Li, Yuan},
urldate = {2024-06-26},
langid = {english},
file = {Snapshot:/Users/lpa2a/Zotero/storage/47BKDWGA/3.html:text/html},
}
@article{langefors_infological_1980,
title = {Infological models and information user views [ behind paywall ]},
volume = {5},
issn = {0306-4379},
url = {https://www.sciencedirect.com/science/article/pii/0306437980900654},
doi = {10.1016/0306-4379(80)90065-4},
abstract = {The study of data systems as information systems put into focus the role of data as representations of information in the sense of knowledge about some slice of the world. This information system-view—or infological view—made it clear that the data alone cannot “carry” information. They can only, at best, give rise to information in the minds of people and only in those people who hold a suitable frame-of-reference or world view, or “receiving structure”, in their mind. Thus the infological perspective had to be widened successively from a concern merely with information representation, structuring, and exploitation to the study of social, sociopsychological and socio-linguistical aspects and of “object system”, job design and other socio-technical issues. The term “user view” is employed widely in recent data base work. The use of the term “user view” suggests such an “infological” perspective. However, a closer look at how the term is actually used indicates a much more delimited interpretation which focuses on representational aspects and processing. This is also made explicit, to some degree, by the usual formulation “user view of the data” rather than, for instance, “user view of the world”. In this paper a brief study is made of the two aspects of user views, 1.(i) the infological/conceptual aspect, which is concerned with how conception relates to data and information, and to reality, and2.(ii) the “datalogical” aspect, which is concerned with the selection of data from a data base and the rearrangement of them to suit a “user view” of the data (as seen from the application programmer). The infological aspect is illustrated through a discussion of some of my own earlier results which are here brought together. The datalogical aspect is exemplified by some quotations from the most recent data base literature, as well as by some earlier results of my own. The term “user view” is frequently used in the datalogical sense whereas the infological or information-system-theoretical studies often have addressed questions that have to do with the infological/conceptual aspects of “user views”, without employing that term. In this paper some aspects of the problem of infological/conceptual user views are treated with a view to gain understanding of how both aspects of user views affect the design and use of information systems and data bases. Illustrations are taken from the author's own earlier work, for three reasons: 1.(i) they were most easily available,2.ii) they are directly associated with the problem at hand, and3.(iii) they have earlier been scattered over several works and it was useful, for the purpose of the present discussion, to bring them together.},
pages = {17--32},
number = {1},
journaltitle = {Information Systems},
shortjournal = {Information Systems},
author = {Langefors, Börje},
urldate = {2024-06-26},
date = {1980-01-01},
file = {ScienceDirect Snapshot:/Users/lpa2a/Zotero/storage/Q926JFQT/0306437980900654.html:text/html},
}
@article{langefors_information_1977,
title = {Information systems theory [behind paywall]},
volume = {2},
issn = {0306-4379},
url = {https://www.sciencedirect.com/science/article/pii/0306437977900096},
doi = {10.1016/0306-4379(77)90009-6},
abstract = {The overview presented covers a wide spectrum of aspects on information systems. Consequently, we had to be very brief and for detailed definitions and discussions we must refer the interested reader to the underlying literature. We have described how information systems present complex problems to their designers and we argued that it is hardly possible for any one individual to acquire (and continuously update) sufficient skill over the whole spectrum of problems. It is shown how the partitioning of the design task into two major areas, the infological or behavioral area on the one hand and the datalogical and computer technology oriented area on the other hand, makes it possible to combine the skills of two (or more) groups of people. In addition, the users are to be directly involved in the (infological part of) design. Development in the “infological area”, as surveyed in the paper, has brought us to the situation where it is possible to apply a documentation technique that is computer independent and intelligible to the lay users in its infological parts and yet is precise enough to the data and program design stage. Actual research problems in the infological area are associated with how one could develop the understanding and the motivation of the users so that they can better exploit the possibility to control the design process that is now offered to them. Such research is not covered by the paper. Development in the “datalogical area”, as presented has increased the possibilities for using computers as aid to the designers and to base the design on more system-wide information. A research field which is presently of high interest, but not presented in the paper, is the development of more formalized methods for handling the interface between the infological and the datalogical design stages. Such research is presently making promising progress in combining recent results from “structured programming” and ”structured information analysis”.},
pages = {207--219},
number = {4},
journaltitle = {Information Systems},
shortjournal = {Information Systems},
author = {Langefors, Börje},
urldate = {2024-06-26},
date = {1977-01-01},
file = {ScienceDirect Snapshot:/Users/lpa2a/Zotero/storage/WY9TIYAF/0306437977900096.html:text/html},
}
@book{kuhn_structure_1970,
location = {Chicago},
title = {The Structure of Scientific Revolutions},
isbn = {978-0-226-45803-8 978-0-226-45804-5},
series = {International encyclopedia of unified science},
publisher = {University of Chicago Press},
author = {Kuhn, Thomas S. (Thomas Samuel)},
date = {1970},
keywords = {Philosophy, Science, Science -- history, Science -- History, Science -- Philosophy},
}
@online{the_britannica_dictionary_knowledge_nodate,
title = {Knowledge and Information {\textbar} Britannica Dictionary},
url = {https://www.britannica.com/dictionary/eb/qa/Knowledge-and-Information},
abstract = {Knowledge is awareness, understanding, or skill that you get from experience or education. Information is the facts or details of a subject. They are},
author = {The Britannica Dictionary},
urldate = {2024-06-27},
langid = {american},
file = {Snapshot:/Users/lpa2a/Zotero/storage/9ZYYFRWG/Knowledge-and-Information.html:text/html},
}
@article{shannon_communication_1949,
title = {Communication theory of secrecy systems},
volume = {28},
issn = {0005-8580},
url = {https://ieeexplore.ieee.org/document/6769090},
doi = {10.1002/j.1538-7305.1949.tb00928.x},
abstract = {{THE} problems of cryptography and secrecy systems furnish an interesting application of communication theory.1 In this paper a theory of secrecy systems is developed. The approach is on a theoretical level and is intended to complement the treatment found in standard works on cryptography.2 There, a detailed study is made of the many standard types of codes and ciphers, and of the ways of breaking them. We will be more concerned with the general mathematical structure and properties of secrecy systems.},
pages = {656--715},
number = {4},
journaltitle = {The Bell System Technical Journal},
author = {Shannon, C. E.},
urldate = {2024-06-27},
date = {1949-10},
note = {Conference Name: The Bell System Technical Journal},
file = {IEEE Xplore Abstract Record:/Users/lpa2a/Zotero/storage/WBV8IM9X/6769090.html:text/html;IEEE Xplore Full Text PDF:/Users/lpa2a/Zotero/storage/TLA4ZY8I/Shannon - 1949 - Communication theory of secrecy systems.pdf:application/pdf},
}
@book{thompson_mathematical_1972,
location = {Princeton, N. J.},
title = {Mathematical Statistical Mechanics},
isbn = {978-0-691-08220-2},
publisher = {Princeton University Press},
author = {Thompson, Colin J.},
date = {1972},
keywords = {Biomathematics, Mathematical physics, Statistical mechanics},
}
@misc{alvarado_41_2023,
title = {The 4+1 Model of Data Science},
url = {http://arxiv.org/abs/2311.07631},
doi = {10.48550/arXiv.2311.07631},
abstract = {Data Science is a complex and evolving field, but most agree that it can be defined as a combination of expertise drawn from three broad areascomputer science and technology, math and statistics, and domain knowledge -- with the purpose of extracting knowledge and value from data. Beyond this, the field is often defined as a series of practical activities ranging from the cleaning and wrangling of data, to its analysis and use to infer models, to the visual and rhetorical representation of results to stakeholders and decision-makers. This essay proposes a model of data science that goes beyond laundry-list definitions to get at the specific nature of data science and help distinguish it from adjacent fields such as computer science and statistics. We define data science as an interdisciplinary field comprising four broad areas of expertise: value, design, systems, and analytics. A fifth area, practice, integrates the other four in specific contexts of domain knowledge. We call this the 4+1 model of data science. Together, these areas belong to every data science project, even if they are often unconnected and siloed in the academy.},
number = {{arXiv}:2311.07631},
publisher = {{arXiv}},
author = {Alvarado, Rafael C.},
urldate = {2024-07-23},
date = {2023-11-13},
eprinttype = {arxiv},
eprint = {2311.07631 [cs]},
keywords = {Computer Science - General Literature, K.2, Computer Science - Databases, E.m},
file = {arXiv Fulltext PDF:/Users/lpa2a/Zotero/storage/75P2I7AS/Alvarado - 2023 - The 4+1 Model of Data Science.pdf:application/pdf;arXiv.org Snapshot:/Users/lpa2a/Zotero/storage/HKITGGAX/2311.html:text/html},
}
@misc{alvarado_data_2023,
title = {Data Science from 1963 to 2012},
url = {http://arxiv.org/abs/2311.03292},
doi = {10.48550/arXiv.2311.03292},
abstract = {Consensus on the definition of data science remains low despite the widespread establishment of academic programs in the field and continued demand for data scientists in industry. Definitions range from rebranded statistics to data-driven science to the science of data to simply the application of machine learning to so-called big data to solve real-world problems. Current efforts to trace the history of the field in order to clarify its definition, such as Donoho's "50 Years of Data Science" (Donoho 2017), tend to focus on a short period when a small group of statisticians adopted the term in an unsuccessful attempt to rebrand their field in the face of the overshadowing effects of computational statistics and data mining. Using textual evidence from primary sources, this essay traces the history of the term to the 1960s, when it was first used by the {US} Air Force in a surprisingly similar way to its current usage, to 2012, the year that Harvard Business Review published the enormously influential article "Data Scientist: The Sexiest Job of the 21st Century" (Davenport and Patil 2012), while the American Statistical Association acknowledged a profound disconnect between statistics and data science. Among the themes that emerge from this review are (1) the long-standing opposition between data analysts and data miners that continues to animate the field, (2) an established definition of the term as the practice of managing and processing scientific data that has been occluded by recent usage, and (3) the phenomenon of data impedance -- the disproportion between surplus data, indexed by phrases like data deluge and big data, and the limitations of computational machinery and methods to process them. This persistent condition appears to have motivated the use of the term and the field itself since its beginnings.},
number = {{arXiv}:2311.03292},
publisher = {{arXiv}},
author = {Alvarado, Rafael C.},
urldate = {2024-07-23},
date = {2023-11-07},
eprinttype = {arxiv},
eprint = {2311.03292 [cs]},
keywords = {Computer Science - Digital Libraries, Computer Science - General Literature, K.2},
file = {arXiv Fulltext PDF:/Users/lpa2a/Zotero/storage/SMLX7VTF/Alvarado - 2023 - Data Science from 1963 to 2012.pdf:application/pdf;arXiv.org Snapshot:/Users/lpa2a/Zotero/storage/IXEC9GRJ/2311.html:text/html},
}
@article{szilard_decrease_1964,
title = {On the decrease of entropy in a thermodynamic system by the intervention of intelligent beings},
volume = {9},
rights = {http://doi.wiley.com/10.1002/tdm\_license\_1.1},
issn = {00057940, 10991743},
url = {https://onlinelibrary.wiley.com/doi/10.1002/bs.3830090402},
doi = {10.1002/bs.3830090402},
pages = {301--310},
number = {4},
journaltitle = {Behavioral Science},
shortjournal = {Syst. Res.},
author = {Szilard, Leo},
urldate = {2024-07-22},
date = {1964},
langid = {english},
file = {Szilard - 1964 - On the decrease of entropy in a thermodynamic syst.pdf:/Users/lpa2a/Zotero/storage/6JNRVDPV/Szilard - 1964 - On the decrease of entropy in a thermodynamic syst.pdf:application/pdf},
}
@article{gould_ifip_1971,
title = {{IFIP} guide to concepts and terms in data processing},
author = {Gould, Ian H. and Fédération internationale pour le traitement de l'information.},
date = {1971},
keywords = {Electronic data processing--Terminology.},
file = {Gould and Fédération internationale pour le traitement de l'information. - 1971 - IFIP guide to concepts and terms in data processin.pdf:/Users/lpa2a/Zotero/storage/CJTUJCFF/Gould and Fédération internationale pour le traitement de l'information. - 1971 - IFIP guide to concepts and terms in data processin.pdf:application/pdf},
}
@article{jaynes_information_1957,
title = {Information Theory and Statistical Mechanics. {II}},
volume = {108},
url = {https://link.aps.org/doi/10.1103/PhysRev.108.171},
doi = {10.1103/PhysRev.108.171},
abstract = {Treatment of the predictive aspect of statistical mechanics as a form of statistical inference is extended to the density-matrix formalism and applied to a discussion of the relation between irreversibility and information loss. A principle of "statistical complementarity" is pointed out, according to which the empirically verifiable probabilities of statistical mechanics necessarily correspond to incomplete predictions. A preliminary discussion is given of the second law of thermodynamics and of a certain class of irreversible processes, in an approximation equivalent to that of the semiclassical theory of radiation.},
pages = {171--190},
number = {2},
journaltitle = {Physical Review},
shortjournal = {Phys. Rev.},
author = {Jaynes, E. T.},
urldate = {2024-07-22},
date = {1957-10-15},
note = {Publisher: American Physical Society},
file = {APS Snapshot:/Users/lpa2a/Zotero/storage/H8R26JEB/PhysRev.108.html:text/html},
}
@article{jaynes_information_1957-1,
title = {Information Theory and Statistical Mechanics},
volume = {106},
url = {https://link.aps.org/doi/10.1103/PhysRev.106.620},
doi = {10.1103/PhysRev.106.620},
abstract = {Information theory provides a constructive criterion for setting up probability distributions on the basis of partial knowledge, and leads to a type of statistical inference which is called the maximum-entropy estimate. It is the least biased estimate possible on the given information; i.e., it is maximally noncommittal with regard to missing information. If one considers statistical mechanics as a form of statistical inference rather than as a physical theory, it is found that the usual computational rules, starting with the determination of the partition function, are an immediate consequence of the maximum-entropy principle. In the resulting "subjective statistical mechanics," the usual rules are thus justified independently of any physical argument, and in particular independently of experimental verification; whether or not the results agree with experiment, they still represent the best estimates that could have been made on the basis of the information available.},
pages = {620--630},
number = {4},
journaltitle = {Physical Review},
shortjournal = {Phys. Rev.},
author = {Jaynes, E. T.},
urldate = {2024-07-22},
date = {1957-05-15},
note = {Publisher: American Physical Society},
file = {APS Snapshot:/Users/lpa2a/Zotero/storage/GYV48D96/PhysRev.106.html:text/html},
}
@book{kahre_mathematical_2002,
location = {Boston},
title = {The Mathematical Theory of Information},
isbn = {978-1-4020-7064-8},
series = {Kluwer international series in engineering and computer science},
publisher = {Kluwer Academic Publishers},
author = {Kåhre, Jan},
date = {2002},
keywords = {Information theory},
}
@book{eddington_nature_1928,
location = {New York},
title = {The Nature of the Physical World},
series = {Gifford lectures, 1927},
publisher = {The Macmillan Company},
author = {Eddington, Arthur Stanley Sir},
date = {1928},
keywords = {Science -- Philosophy, Physics -- Philosophy},
}
@article{szilard_uber_1929,
title = {Uber die Entropieverminderung in einem thermodynamischren System bei Eingriffen intelligenter Wesen},
volume = {53},
pages = {840--856},
journaltitle = {Zeitschrift fur Physik},
author = {Szilard, Leo},
date = {1929},
}
@online{bar-hillel_language_1964,
title = {Language and Information: Selected Essays on Their Theory and Application},
url = {https://search.lib.virginia.edu/sources/uva_library/items/u991585},
author = {Bar-Hillel, Yehoshua},
urldate = {2024-07-31},
date = {1964},
file = {Language and Information\: Selected Essays on Their Theory and Application:/Users/lpa2a/Zotero/storage/UE9BTRU3/u991585.html:text/html},
}
@article{duncan_information_2007,
title = {Information Loss as a Foundational Principle for the Second Law of Thermodynamics},
volume = {37},
issn = {1572-9516},
url = {https://doi.org/10.1007/s10701-007-9159-z},
doi = {10.1007/s10701-007-9159-z},
abstract = {In a previous paper (Duncan, T.L., Semura, J.S. in Entropy 6:21, 2004) we considered the question, “What underlying property of nature is responsible for the second law?” A simple answer can be stated in terms of information: The fundamental loss of information gives rise to the second law. This line of thinking highlights the existence of two independent but coupled sets of laws: Information dynamics and energy dynamics. The distinction helps shed light on certain foundational questions in statistical mechanics. For example, the confusion surrounding previous “derivations” of the second law from energy dynamics can be resolved by noting that such derivations incorporate one or more assumptions that correspond to the loss of information. In this paper we further develop and explore the perspective in which the second law is fundamentally a law of information dynamics.},
pages = {1767--1773},
number = {12},
journaltitle = {Foundations of Physics},
shortjournal = {Found Phys},
author = {Duncan, T. L. and Semura, J. S.},
urldate = {2024-07-31},
date = {2007-12-01},
langid = {english},
keywords = {Information, Statistical mechanics, Entropy, Second law, Thermodynamics},
file = {Submitted Version:/Users/lpa2a/Zotero/storage/D68AGL5R/Duncan and Semura - 2007 - Information Loss as a Foundational Principle for t.pdf:application/pdf},
}