QSS/cocit_r.bib at master · NETESOLUTIONS/QSS · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
@article{de_solla_price_networks_1965,
	title = {Networks of {Scientific} {Papers}},
	volume = {149},
	issn = {0036-8075, 1095-9203},
	doi = {10.1126/science.149.3683.510},
	language = {en},
	number = {3683},
	journal = {Science},
	author = {de Solla Price, D. J.},
	month = jul,
	year = {1965},
	pages = {510--515},
}

@article{garfield_citation_1955,
	title = {Citation {Indexes} for {Science}: {A} {New} {Dimension} in {Documentation} through {Association} of {Ideas}},
	volume = {122},
	copyright = {Copyright © 1955 by the American Association for the Advancement of Science},
	issn = {0036-8075, 1095-9203},
	shorttitle = {Citation {Indexes} for {Science}},
	doi = {10.1126/science.122.3159.108},
	language = {en},
	number = {3159},
	urldate = {2019-05-16},
	journal = {Science},
	author = {Garfield, Eugene},
	month = jul,
	year = {1955},
	pmid = {14385826},
	pages = {108--111}
}

@article{stigler_1994,
	title = {Citation Patterns in the Journals of Statistics and Probability},
	volume = {9},
	shorttitle = {Citation Patterns in Statistics and Probability},
	language = {en},
	number = {1},
	journal = {Statistical Science},
	author = {Stigler, Stephen},
	year = {1994},
	pages = {94-108}
}


@article{newman_structure_2001,
	title = {The structure of scientific collaboration networks},
	volume = {98},
	copyright = {Copyright 2001, The National Academy of Sciences},
	issn = {0027-8424, 1091-6490},
	doi = {10.1073/pnas.98.2.404},
	abstract = {The structure of scientific collaboration networks is investigated. Two scientists are considered connected if they have authored a paper together and explicit networks of such connections are constructed by using data drawn from a number of databases, including MEDLINE (biomedical research), the Los Alamos e-Print Archive (physics), and NCSTRL (computer science). I show that these collaboration networks form small worlds, in which randomly chosen pairs of scientists are typically separated by only a short path of intermediate acquaintances. I further give results for mean and distribution of numbers of collaborators of authors, demonstrate the presence of clustering in the networks, and highlight a number of apparent differences in the patterns of collaboration between the fields studied.},
	language = {en},
	number = {2},
	urldate = {2019-03-22},
	journal = {Proceedings of the National Academy of Sciences},
	author = {Newman, M. E. J.},
	month = jan,
	year = {2001},
	pages = {404--409}
}


@article{small_co-citation_1973,
	title = {Co-citation in the scientific literature: {A} new measure of the relationship between two documents},
	volume = {24},
	issn = {00028231, 10974571},
	shorttitle = {Co-citation in the scientific literature},
	doi = {10.1002/asi.4630240406},
	language = {en},
	number = {4},
	urldate = {2019-02-03},
	journal = {Journal of the American Society for Information Science},
	author = {Small, Henry},
	month = jul,
	year = {1973},
	pages = {265--269}
}

@article{marshakova-shaikevich_co-citation_1973,
	title = {System of document connections based on references},
	volume = {6},
	issn = {00028231, 10974571},
	shorttitle = {Co-citation in the scientific literature},
	doi = {10.1002/asi.4630240406},
	language = {ru},
	number = {4},
	urldate = {2019-02-03},
	journal = {Nauchno-Tekhnicheskaya Informatsiya Seriya 2-Informatsionnye Protsessy I Sistemy},
	author = {Marshakova-Shaikevich, Irina},
	month = jul,
	year = {1973},
	pages = {3-8}
}


@article{boyack_co-citation_2010,
	title = {Co-citation analysis, bibliographic coupling, and direct citation: {Which} citation approach represents the research front most accurately?},
	volume = {61},
	copyright = {© 2010 ASIS\&T},
	issn = {1532-2890},
	shorttitle = {Co-citation analysis, bibliographic coupling, and direct citation},
	doi = {10.1002/asi.21419},
	abstract = {In the past several years studies have started to appear comparing the accuracies of various science mapping approaches. These studies primarily compare the cluster solutions resulting from different similarity approaches, and give varying results. In this study we compare the accuracies of cluster solutions of a large corpus of 2,153,769 recent articles from the biomedical literature (2004–2008) using four similarity approaches: co-citation analysis, bibliographic coupling, direct citation, and a bibliographic coupling-based citation-text hybrid approach. Each of the four approaches can be considered a way to represent the research front in biomedicine, and each is able to successfully cluster over 92\% of the corpus. Accuracies are compared using two metrics—within-cluster textual coherence as defined by the Jensen-Shannon divergence, and a concentration measure based on the grant-to-article linkages indexed in MEDLINE. Of the three pure citation-based approaches, bibliographic coupling slightly outperforms co-citation analysis using both accuracy measures; direct citation is the least accurate mapping approach by far. The hybrid approach improves upon the bibliographic coupling results in all respects. We consider the results of this study to be robust given the very large size of the corpus, and the specificity of the accuracy measures used.},
	language = {en},
	number = {12},
	urldate = {2019-01-05},
	journal = {Journal of the American Society for Information Science and Technology},
	author = {Boyack, Kevin and Klavans, Richard},
	year = {2010},
	pages = {2389--2404}
}

@article{boyack_improving_2013,
	title = {Improving the accuracy of co-citation clustering using full text: {Improving} the {Accuracy} of {Co}-citation {Clustering} {Using} {Full} {Text}},
	volume = {64},
	issn = {15322882},
	shorttitle = {Improving the accuracy of co-citation clustering using full text},
	url = {http://doi.wiley.com/10.1002/asi.22896},
	doi = {10.1002/asi.22896},
	language = {en},
	number = {9},
	urldate = {2019-02-03},
	journal = {Journal of the American Society for Information Science and Technology},
	author = {Boyack, Kevin W. and Small, Henry and Klavans, Richard},
	month = sep,
	year = {2013},
	pages = {1759--1767}
}

@article{uzzi_atypical_2013,
	title = {Atypical combinations and scientific impact},
	volume = {342},
	issn = {1095-9203},
	doi = {10.1126/science.1240474},
	abstract = {Novelty is an essential feature of creative ideas, yet the building blocks of new ideas are often embodied in existing knowledge. From this perspective, balancing atypical knowledge with conventional knowledge may be critical to the link between innovativeness and impact. Our analysis of 17.9 million papers spanning all scientific fields suggests that science follows a nearly universal pattern: The highest-impact science is primarily grounded in exceptionally conventional combinations of prior work yet simultaneously features an intrusion of unusual combinations. Papers of this type were twice as likely to be highly cited works. Novel combinations of prior work are rare, yet teams are 37.7\% more likely than solo authors to insert novel combinations into familiar knowledge domains.},
	language = {eng},
	number = {6157},
	journal = {Science (New York, N.Y.)},
	author = {Uzzi, Brian and Mukherjee, Satyam and Stringer, Michael and Jones, Ben},
	month = oct,
	year = {2013},
	pmid = {24159044},
	keywords = {Creativity, Knowledge, Periodicals as Topic, Research},
	pages = {468--472}
}

@article{klavans_research_2017,
	title = {Research portfolio analysis and topic prominence},
	volume = {11},
	issn = {1751-1577},
	doi = {10.1016/j.joi.2017.10.002},
	abstract = {Stakeholders in the science system need to decide where to place their bets. Example questions include: Which areas of research should get more funding? Who should we hire? Which projects should we abandon and which new projects should we start? Making informed choices requires knowledge about these research options. Unfortunately, to date research portfolio options have not been defined in a consistent, transparent and relevant manner. Furthermore, we don’t know how to define demand for these options. In this article, we address the issues of consistency, transparency, relevance and demand by using a model of science consisting of 91,726 topics (or research options) that contain over 58 million documents. We present a new indicator of topic prominence – a measure of visibility, momentum and, ultimately, demand. We assign over \$203 billion of project-level funding data from STAR METRICS® to individual topics in science, and show that the indicator of topic prominence, explains over one-third of the variance in current (or future) funding by topic. We also show that highly prominent topics receive far more funding per researcher than topics that are not prominent. Implications of these results for research planning and portfolio analysis by institutions and researchers are emphasized.},
	number = {4},
	journal = {Journal of Informetrics},
	author = {Klavans, Richard and Boyack, Kevin W.},
	month = nov,
	year = {2017},
	keywords = {Direct citation, Project-level grant data, Prominence, Research portfolio analysis, Research topics},
	pages = {1158--1174},
}

@article{moed_measuring_2010,
	title = {Measuring contextual citation impact of scientific journals},
	volume = {4},
	number = {3},
	journal = {Journal of informetrics},
	author = {Moed, Henk F.},
	year = {2010},
	pages = {265--277},
}

@article{newman_random_2002,
	title = {Random graph models of social networks},
	volume = {99},
	issn = {0027-8424},
	doi = {10.1073/pnas.012582999},
	abstract = {We describe some new exactly solvable models of the structure of social networks, based on random graphs with arbitrary degree distributions. We give models both for simple unipartite networks, such as acquaintance networks, and bipartite networks, such as affiliation networks. We compare the predictions of our models to data for a number of real-world social networks and find that in some cases, the models are in remarkable agreement with the data, whereas in others the agreement is poorer, perhaps indicating the presence of additional social structure in the network that is not captured by the random graph.},
	number = {suppl 1},
	journal = {Proceedings of the National Academy of Sciences},
	author = {Newman, M. E. J. and Watts, D. J. and Strogatz, S. H.},
	year = {2002},
	pages = {2566--2572}
}

@inproceedings{Shi:2010:CHI:1816123.1816131,
 author = {Shi, Xiaolin and Leskovec, Jure and McFarland, Daniel A.},
 title = {Citing for High Impact},
 booktitle = {Proceedings of the 10th Annual Joint Conference on Digital Libraries},
 series = {JCDL '10},
 year = {2010},
 isbn = {978-1-4503-0085-8},
 location = {Gold Coast, Queensland, Australia},
 pages = {49--58},
 numpages = {10},
 doi = {10.1145/1816123.1816131},
 acmid = {1816131},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {citation networks, citation projection, publication impact}
}

@inproceedings{boyack_vs_uzzi_2014,
  author    = {Boyack, Kevin and Klavans, Richard},
  title     = {{Atypical combinations are confounded by disciplinary effects}},
  year      = {2014},
  maintitle = {{19th International Conference on Science and Technology Indicators}},
  booktitle = {International Conference on Science and Technology Indicators},
  pages = {49--58},
  publisher = {CWTS-Leiden University},
  address = {Leiden, Netherlands}
}

@book{garfield_1979,
  author    = {Eugene Garfield},
  title     = {{Citation Indexing-Its Theory and Application
in Science, Technology, and Humanities }},
  publisher = {John Wiley and Sons, ISI Press},
  year      = 1979,
  volume ={},
  series    = {},
  address   = {New York, NY, USA},
  edition   = {1},
  month     = {},
  isbn      = {089495024X, 9780894950247}
}

@techreport{NBERw0000,
 title = "NBER WORKING PAPERS ONLINE TEST DOCUMENT",
 author = "Document, Testing",
 institution = "National Bureau of Economic Research",
 type = "Working Paper",
 series = "Working Paper Series",
 number = "",
 year = "1973",
 month = "1973",
 doi = {10.3386/w0000},
 abstract = {This is a test document for the NBER Online working paper system. Every user should be able to search for and display bibliographic information in this format. The URL for this document is http://www.nber.org/papers/w0000. Full-Text access to working papers is offered on a subscription basis, (and free to press, US government, and low income). If your domain name or IP address is in the authorization file, or if you are properly logged onto this site, you should see both HTML and PDF buttons below this text block.  If the button is missing, you are not in our authorization file. Nevertheless papers older than 18 months and certain other papers are always freely available.

<p><h4>Test of Mathjax </h4><p>
You should see a nicely formatted solution to a quadratic equation here.<br>
<script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=AM_HTMLorMML-full"></script>
<p>

`x = (-b +- sqrt(b^2-4ac))/(2a) `}
}

@Article{wallace_lariviere_gingras_2012,
	Author={Wallace, Mathew L. and Lariviere, Vincent and Gingras, Yves},
	title = {A {Small} {World} of {Citations}? {The} {Influence} of {Collaboration} {Networks} on {Citation} {Practices}},
	Journal={{PLOS One}},
	Year={2012},
	Volume={7},
	Issue={3},
	Pages={e33339},
	doi={10.1371/journal.pone.0033339},
	urldate = {2019-05-16},
	file = {A Small World of Citations? The Influence of Collaboration Networks on Citation Practices:/Users/chackoge/Zotero/storage/C4CBEPES/article.html:text/html}
}


@Article{patience_pmid28560354,
   Author={Patience, G. S.  and Patience, C. A.  and Blais, B.  and Bertrand, F. },
   Title="{{C}itation analysis of scientific categories}",
   Journal="Heliyon",
   Year="2017",
   Volume="3",
   Number="5",
   Pages="e00300",
   Month="May"
}

@techreport{GithubERNIE2019,
  author = {Korobskiy, D. and Davey, A. and Liu, S. and Devarakonda, S. and  Chacko, G.},
  title = {{Enhanced Research Network Informatics Environment (ERNIE)}},
  institution = {NET ESolutions Corporation},
  url={https://github.com/NETESOLUTIONS/ERNIE},
  year = {2019},
  type = {Github Repository}
}


@article{Keserci371955,
	author = {Keserci, Samet and Davey, Avon and Pico, Alexander R and Korobskiy, Dmitriy and Chacko, George},
	title = {{ERNIE}: A Data Platform for Research Assessment},
	elocation-id = {371955},
	year = {2018},
	doi = {10.1101/371955},
	publisher = {Cold Spring Harbor Laboratory},
	journal = {bioRxiv},
	abstract = {Data mining coupled to network analysis has been successfully used to study relationships between basic discovery and translational applications such as drug development; as well to document research collaborations and knowledge flows. Assembling relevant data for such studies in a form that supports analysis presents challenges. We have developed Enhanced Research Network Information Environment (ERNIE), an open source, scalable cloud-based platform that (i) integrates data drawn from public and commercial sources (ii) provides users with analytical workflows that incorporate expert input at critical stages. A modular design enables the addition, deletion, or substitution of data sources. To demonstrate the capabilities of ERNIE, we have conducted case studies that span drug development and pharmacogenetics. In these studies, we analyze data from regulatory documents, bibliographic and patent databases, research grant records, and clinical trials, to document collaborations and identify influential research accomplishments.}
}


@article{hicks_bibliometrics:_2015,
	title = {Bibliometrics: {The} {Leiden} {Manifesto} for research metrics},
	volume = {520},
	shorttitle = {Bibliometrics},
	doi = {10.1038/520429a},
	abstract = {Use these ten principles to guide research evaluation, urge Diana Hicks, Paul Wouters and colleagues.},
	language = {en},
	number = {7548},
	urldate = {2019-03-26},
	journal = {Nature News},
	author = {Hicks, Diana and Wouters, Paul and Waltman, Ludo and de Rijcke, Sarah and Rafols, Ismael},
	month = apr,
	year = {2015},
	pages = {429},
	file = {Snapshot:/Users/chackoge/Zotero/storage/M74K35LX/bibliometrics-the-leiden-manifesto-for-research-metrics-1.html:text/html}
}


@article{stringer_statistical_2010,
	title = {Statistical validation of a global model for the distribution of the ultimate number of citations accrued by papers published in a scientific journal},
	volume = {61},
	issn = {1532-2882},
	doi = {10.1002/asi.21335},
	abstract = {A central issue in evaluative bibliometrics is the characterization of the citation distribution of papers in the scientific literature. Here, we perform a large-scale empirical analysis of journals from every field in Thomson Reuters' Web of Science database. We find that only 30 of the 2,184 journals have citation distributions that are inconsistent with a discrete lognormal distribution at the rejection threshold that controls the false discovery rate at 0.05. We find that large, multidisciplinary journals are over-represented in this set of 30 journals, leading us to conclude that, within a discipline, citation distributions are lognormal. Our results strongly suggest that the discrete lognormal distribution is a globally accurate model for the distribution of “eventual impact” of scientific papers published in single-discipline journal in a single year that is removed sufficiently from the present date.},
	number = {7},
	urldate = {2019-05-22},
	journal = {Journal of the American Society for Information Science and Technology},
	author = {Stringer, Michael J and Sales-Pardo, Marta and Amaral, Luís A Nunes},
	month = jul,
	year = {2010},
	pmid = {21858251},
	pmcid = {PMC3158611},
	pages = {1377--1385},
	file = {PubMed Central Full Text PDF:/Users/chackoge/Zotero/storage/H52CM3H3/Stringer et al. - 2010 - Statistical validation of a global model for the d.pdf:application/pdf}
}

@article{perline_strong_2005,
	title = {Strong, {Weak} and {False} {Inverse} {Power} {Laws}},
	volume = {20},
	issn = {0883-4237},
	abstract = {[Pareto, Zipf and numerous subsequent investigators of inverse power distributions have often represented their findings as though their data conformed to a power law form for all ranges of the variable of interest. I refer to this ideal case as a strong inverse power law (SIPL). However, many of the examples used by Pareto and Zipf, as well as others who have followed them, have been truncated data sets, and if one looks more carefully in the lower range of values that was originally excluded, the power law behavior usually breaks down at some point. This breakdown seems to fall into two broad cases, called here (1) weak and (2) false inverse power laws (WIPL and FIPL, resp.). Case 1 refers to the situation where the sample data fit a distribution that has an approximate inverse power form only in some upper range of values. Case 2 refers to the situation where a highly truncated sample from certain exponential-type (and in particular, "lognormal-like") distributions can convincingly mimic a power law. The main objectives of this paper are (a) to show how the discovery of Pareto-Zipf-type laws is closely associated with truncated data sets; (b) to elaborate on the categories of strong, weak and false inverse power laws; and (c) to analyze FIPLs in some detail. I conclude that many, but not all, Pareto-Zipf examples are likely to be FIPL finite mixture distributions and that there are few genuine instances of SIPLs.]},
	number = {1},
	urldate = {2019-05-22},
	journal = {Statistical Science},
	author = {Perline, Richard},
	year = {2005},
	pages = {68--88}
}

@article{kullback_information_1951,
	title = {On {Information} and {Sufficiency}},
	volume = {22},
	issn = {0003-4851, 2168-8990},
	doi = {10.1214/aoms/1177729694},
	abstract = {Project Euclid - mathematics and statistics online},
	language = {EN},
	number = {1},
	urldate = {2019-05-22},
	journal = {The Annals of Mathematical Statistics},
	author = {Kullback, S. and Leibler, R. A.},
	month = mar,
	year = {1951},
	mrnumber = {MR39968},
	zmnumber = {0042.38403},
	pages = {79--86},
	file = {Full Text PDF:/Users/chackoge/Zotero/storage/84EER733/Kullback and Leibler - 1951 - On Information and Sufficiency.pdf:application/pdf;Snapshot:/Users/chackoge/Zotero/storage/G96ER5RE/1177729694.html:text/html}
}

@ARTICLE{10.3389/frma.2018.00020,
AUTHOR={Zuckerman, Harriet},
TITLE={{The Sociology of Science and the Garfield Effect: Happy Accidents, Unanticipated Developments and Unexploited Potentials}},
JOURNAL={Frontiers in Research Metrics and Analytics},
VOLUME={3},
PAGES={20},
YEAR={2018},
DOI={10.3389/frma.2018.00020},
ISSN={2504-0537},

ABSTRACT={The Sociology of Science and the Garfield Effect: Happy Accidents, Unpredictable Developments and Unexploited Potentials}
}

@Article{seewave2008,
    title = {Seewave: a free modular tool for sound analysis and
      synthesis},
    author = {J. Sueur and T. Aubin and C. Simonis},
    year = {2008},
    journal = {Bioacoustics},
    volume = {18},
    pages = {213-226},
  }

  @article{wang_bias_2017,
  title = {Bias against Novelty in Science: {{A}} Cautionary Tale for Users of Bibliometric Indicators},
  volume = {46},
  issn = {0048-7333},
  shorttitle = {Bias against Novelty in Science},
  abstract = {Research which explores unchartered waters has a high potential for major impact but also carries a higher uncertainty of having impact. Such explorative research is often described as taking a novel approach. This study examines the complex relationship between pursuing a novel approach and impact. Viewing scientific research as a combinatorial process, we measure novelty in science by examining whether a published paper makes first-time-ever combinations of referenced journals, taking into account the difficulty of making such combinations. We apply this newly developed measure of novelty to all Web of Science research articles published in 2001 across all scientific disciplines. We find that highly novel papers, defined to be those that make more (distant) new combinations, deliver high gains to science: they are more likely to be a top 1\% highly cited paper in the long run, to inspire follow-on highly cited research, and to be cited in a broader set of disciplines and in disciplines that are more distant from their ``home'' field. At the same time, novel research is also more risky, reflected by a higher variance in its citation performance. We also find strong evidence of delayed recognition of novel papers as novel papers are less likely to be top cited when using short time-windows. In addition, we find that novel research is significantly more highly cited in ``foreign'' fields but not in their ``home'' field. Finally, novel papers are published in journals with a lower Impact Factor, compared with non-novel papers, ceteris paribus. These findings suggest that science policy, in particular funding decisions which rely on bibliometric indicators based on short-term citation counts and Journal Impact Factors, may be biased against ``high risk/high gain'' novel research. The findings also caution against a mono-disciplinary approach in peer review to assess the true value of novel research.},
  number = {8},
  journal = {Research Policy},
  doi = {10.1016/j.respol.2017.06.006},
  author = {Wang, Jian and Veugelers, Reinhilde and Stephan, Paula},
  month = oct,
  year = {2017},
  keywords = {Bibliometrics,Breakthrough research,Evaluation,Impact,Novelty},
  pages = {1416-1436},
  file = {/Users/george/Zotero/storage/A328QBW4/Wang et al. - 2017 - Bias against novelty in science A cautionary tale.pdf;/Users/george/Zotero/storage/KQYPWYWN/S0048733317301038.html}
}

@article{perianesrodriguez_impact_2018,
  title = {The Impact of Classification Systems in the Evaluation of the Research Performance of the {{Leiden Ranking}} Universities},
  volume = {69},
  copyright = {\textcopyright{} 2018 ASIS\&T},
  issn = {2330-1643},
  abstract = {In this article, we investigate the consequences of choosing different classification systems\textemdash{}namely, the way publications (or journals) are assigned to scientific fields\textemdash{}for the ranking of research units. We study the impact of this choice on the ranking of 500 universities in the 2013 edition of the Leiden Ranking in two cases. First, we compare a Web of Science (WoS) journal-level classification system, consisting of 236 subject categories, and a publication-level algorithmically constructed system, denoted G8, consisting of 5,119 clusters. The result is that the consequences of the move from the WoS to the G8 system using the Top 1\% citation impact indicator are much greater than the consequences of this move using the Top 10\% indicator. Second, we compare the G8 classification system and a publication-level alternative of the same family, the G6 system, consisting of 1,363 clusters. The result is that, although less important than in the previous case, the consequences of the move from the G6 to the G8 system under the Top 1\% indicator are still of a large order of magnitude.},
  language = {en},
  number = {8},
  journal = {Journal of the Association for Information Science and Technology},
  doi = {10.1002/asi.24017},
  author = {Perianes-Rodriguez, Antonio and Ruiz-Castillo, Javier},
  year = {2018},
  pages = {1046-1053},
  file = {/Users/george/Zotero/storage/L64BS8I5/Perianes‐Rodriguez and Ruiz‐Castillo - 2018 - The impact of classification systems in the evalua.pdf;/Users/george/Zotero/storage/937G9L6U/asi.html}
}

@article{traag_louvain_2019,
  title = {From {{Louvain}} to {{Leiden}}: Guaranteeing Well-Connected Communities},
  volume = {9},
  copyright = {2019 The Author(s)},
  issn = {2045-2322},
  shorttitle = {From {{Louvain}} to {{Leiden}}},
  abstract = {Community detection is often used to understand the structure of large and complex networks. One of the most popular algorithms for uncovering community structure is the so-called Louvain algorithm. We show that this algorithm has a major defect that largely went unnoticed until now: the Louvain algorithm may yield arbitrarily badly connected communities. In the worst case, communities may even be disconnected, especially when running the algorithm iteratively. In our experimental analysis, we observe that up to 25\% of the communities are badly connected and up to 16\% are disconnected. To address this problem, we introduce the Leiden algorithm. We prove that the Leiden algorithm yields communities that are guaranteed to be connected. In addition, we prove that, when the Leiden algorithm is applied iteratively, it converges to a partition in which all subsets of all communities are locally optimally assigned. Furthermore, by relying on a fast local move approach, the Leiden algorithm runs faster than the Louvain algorithm. We demonstrate the performance of the Leiden algorithm for several benchmark and real-world networks. We find that the Leiden algorithm is faster than the Louvain algorithm and uncovers better partitions, in addition to providing explicit guarantees.},
  language = {en},
  number = {1},
  journal = {Scientific Reports},
  doi = {10.1038/s41598-019-41695-z},
  author = {Traag, V. A. and Waltman, L. and van Eck, N. J.},
  month = mar,
  year = {2019},
  pages = {1-12},
  file = {/Users/george/Zotero/storage/LVWL6BGZ/Traag et al. - 2019 - From Louvain to Leiden guaranteeing well-connecte.pdf;/Users/george/Zotero/storage/KWY9WBML/s41598-019-41695-z.html}
}

@article{klavans_which_2017,
  title = {Which {{Type}} of {{Citation Analysis Generates}} the {{Most Accurate Taxonomy}} of {{Scientific}} and {{Technical Knowledge}}?},
  volume = {68},
  copyright = {\textcopyright{} 2016 ASIS\&T},
  issn = {2330-1643},
  abstract = {In 1965, Price foresaw the day when a citation-based taxonomy of science and technology would be delineated and correspondingly used for science policy. A taxonomy needs to be comprehensive and accurate if it is to be useful for policy making, especially now that policy makers are utilizing citation-based indicators to evaluate people, institutions and laboratories. Determining the accuracy of a taxonomy, however, remains a challenge. Previous work on the accuracy of partition solutions is sparse, and the results of those studies, although useful, have not been definitive. In this study we compare the accuracies of topic-level taxonomies based on the clustering of documents using direct citation, bibliographic coupling, and co-citation. Using a set of new gold standards\textemdash{}articles with at least 100 references\textemdash{}we find that direct citation is better at concentrating references than either bibliographic coupling or co-citation. Using the assumption that higher concentrations of references denote more accurate clusters, direct citation thus provides a more accurate representation of the taxonomy of scientific and technical knowledge than either bibliographic coupling or co-citation. We also find that discipline-level taxonomies based on journal schema are highly inaccurate compared to topic-level taxonomies, and recommend against their use.},
  language = {en},
  number = {4},
  journal = {Journal of the Association for Information Science and Technology},
  doi = {10.1002/asi.23734},
  author = {Klavans, Richard and Boyack, Kevin W.},
  year = {2017},
  pages = {984-998},
  file = {/Users/george/Zotero/storage/YMYJZQKH/Klavans and Boyack - 2017 - Which Type of Citation Analysis Generates the Most.pdf;/Users/george/Zotero/storage/NFVQRKCG/asi.html}


@article{peters_determinants_1994,
  title = {On {{Determinants}} of {{Citation Scores}}: {{A Case Study}} in {{Chemical Engineering}}},
  volume = {45},
  shorttitle = {On {{Determinants}} of {{Citation Scores}}},
  abstract = {We investigated a broad spectrum of factors in order to identify one or a few that are the primary determinant of citation scores of scientific papers. Our focus is on a large field of applied science, chemical engineering. A set of 226 papers written by 18 internationally recognized scientists (`topauthors') and citations to these papers has been used as a data source. Using multiple regression analysis, we found that the factor `top-author,' i.e., the `personal variation,' contributes the largest number of citations. Other important factors are number of references, language, journal category, and journal influence.},
  journal = {JASIS},
  doi = {10.1002/(SICI)1097-4571(199401)45:1\%3C39::AID-ASI5\%3E3.0.CO;2-Q},
  author = {Peters, H. P. F. and van Raan, Anthony F. J.},
  year = {1994},
  keywords = {Scientific literature},
  pages = {39-49}
}

@article{vieira_citations_2010,
  title = {Citations to Scientific Articles: {{Its}} Distribution and Dependence on the Article Features},
  volume = {4},
  issn = {1751-1577},
  shorttitle = {Citations to Scientific Articles},
  abstract = {The citation counts are increasingly used to assess the impact on the scientific community of publications produced by a researcher, an institution or a country. There are many institutions that use bibliometric indicators to steer research policy and for hiring or promotion decisions. Given the importance that counting citations has today, the aim of the work presented here is to show how citations are distributed within a scientific area and determine the dependence of the citation count on the article features. All articles referenced in the Web of Science in 2004 for Biology \& Biochemistry, Chemistry, Mathematics and Physics were considered. We show that the distribution of citations is well represented by a double exponential-Poisson law. There is a dependence of the mean citation rate on the number of co-authors, the number of addresses and the number of references, although this dependence is a little far from the linear behaviour. For the relation between the mean impact and the number of pages the dependence obtained was very low. For Biology \& Biochemistry and Chemistry we found a linear behaviour between the mean citation per article and impact factor and for Mathematics and Physics the results obtained are near to the linear behaviour.},
  number = {1},
  journal = {Journal of Informetrics},
  doi = {10.1016/j.joi.2009.06.002},
  author = {Vieira, E. S. and Gomes, J. A. N. F.},
  month = jan,
  year = {2010},
  keywords = {Citations analyses,Field,Impact},
  pages = {1-13},
  file = {/Users/george/Zotero/storage/VEQIXUTP/S1751157709000534.html}
}